1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 33 #include "gc_implementation/g1/g1RemSet.hpp" 34 #include "gc_implementation/g1/heapRegionRemSet.hpp" 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 36 #include "gc_implementation/shared/vmGCOperations.hpp" 37 #include "memory/genOopClosures.inline.hpp" 38 #include "memory/referencePolicy.hpp" 39 #include "memory/resourceArea.hpp" 40 #include "oops/oop.inline.hpp" 41 #include "runtime/handles.inline.hpp" 42 #include "runtime/java.hpp" 43 44 // 45 // CMS Bit Map Wrapper 46 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 48 _bm((uintptr_t*)NULL,0), 49 _shifter(shifter) { 50 _bmStartWord = (HeapWord*)(rs.base()); 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 54 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 56 // For now we'll just commit all of the bit map up fromt. 57 // Later on we'll try to be more parsimonious with swap. 58 guarantee(_virtual_space.initialize(brs, brs.size()), 59 "couldn't reseve backing store for CMS bit map"); 60 assert(_virtual_space.committed_size() == brs.size(), 61 "didn't reserve backing store for all of CMS bit map?"); 62 _bm.set_map((uintptr_t*)_virtual_space.low()); 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 64 _bmWordSize, "inconsistency in bit map sizing"); 65 _bm.set_size(_bmWordSize >> _shifter); 66 } 67 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 69 HeapWord* limit) const { 70 // First we must round addr *up* to a possible object boundary. 71 addr = (HeapWord*)align_size_up((intptr_t)addr, 72 HeapWordSize << _shifter); 73 size_t addrOffset = heapWordToOffset(addr); 74 if (limit == NULL) { 75 limit = _bmStartWord + _bmWordSize; 76 } 77 size_t limitOffset = heapWordToOffset(limit); 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 80 assert(nextAddr >= addr, "get_next_one postcondition"); 81 assert(nextAddr == limit || isMarked(nextAddr), 82 "get_next_one postcondition"); 83 return nextAddr; 84 } 85 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 87 HeapWord* limit) const { 88 size_t addrOffset = heapWordToOffset(addr); 89 if (limit == NULL) { 90 limit = _bmStartWord + _bmWordSize; 91 } 92 size_t limitOffset = heapWordToOffset(limit); 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 95 assert(nextAddr >= addr, "get_next_one postcondition"); 96 assert(nextAddr == limit || !isMarked(nextAddr), 97 "get_next_one postcondition"); 98 return nextAddr; 99 } 100 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 103 return (int) (diff >> _shifter); 104 } 105 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 107 HeapWord* left = MAX2(_bmStartWord, mr.start()); 108 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 109 if (right > left) { 110 // Right-open interval [leftOffset, rightOffset). 111 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 112 } else { 113 return true; 114 } 115 } 116 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 118 size_t from_start_index, 119 HeapWord* to_start_word, 120 size_t word_num) { 121 _bm.mostly_disjoint_range_union(from_bitmap, 122 from_start_index, 123 heapWordToOffset(to_start_word), 124 word_num); 125 } 126 127 #ifndef PRODUCT 128 bool CMBitMapRO::covers(ReservedSpace rs) const { 129 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 130 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 131 "size inconsistency"); 132 return _bmStartWord == (HeapWord*)(rs.base()) && 133 _bmWordSize == rs.size()>>LogHeapWordSize; 134 } 135 #endif 136 137 void CMBitMap::clearAll() { 138 _bm.clear(); 139 return; 140 } 141 142 void CMBitMap::markRange(MemRegion mr) { 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 144 assert(!mr.is_empty(), "unexpected empty region"); 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 146 ((HeapWord *) mr.end())), 147 "markRange memory region end is not card aligned"); 148 // convert address range into offset range 149 _bm.at_put_range(heapWordToOffset(mr.start()), 150 heapWordToOffset(mr.end()), true); 151 } 152 153 void CMBitMap::clearRange(MemRegion mr) { 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 155 assert(!mr.is_empty(), "unexpected empty region"); 156 // convert address range into offset range 157 _bm.at_put_range(heapWordToOffset(mr.start()), 158 heapWordToOffset(mr.end()), false); 159 } 160 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 162 HeapWord* end_addr) { 163 HeapWord* start = getNextMarkedWordAddress(addr); 164 start = MIN2(start, end_addr); 165 HeapWord* end = getNextUnmarkedWordAddress(start); 166 end = MIN2(end, end_addr); 167 assert(start <= end, "Consistency check"); 168 MemRegion mr(start, end); 169 if (!mr.is_empty()) { 170 clearRange(mr); 171 } 172 return mr; 173 } 174 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 176 _base(NULL), _cm(cm) 177 #ifdef ASSERT 178 , _drain_in_progress(false) 179 , _drain_in_progress_yields(false) 180 #endif 181 {} 182 183 void CMMarkStack::allocate(size_t size) { 184 _base = NEW_C_HEAP_ARRAY(oop, size); 185 if (_base == NULL) { 186 vm_exit_during_initialization("Failed to allocate " 187 "CM region mark stack"); 188 } 189 _index = 0; 190 _capacity = (jint) size; 191 _oops_do_bound = -1; 192 NOT_PRODUCT(_max_depth = 0); 193 } 194 195 CMMarkStack::~CMMarkStack() { 196 if (_base != NULL) { 197 FREE_C_HEAP_ARRAY(oop, _base); 198 } 199 } 200 201 void CMMarkStack::par_push(oop ptr) { 202 while (true) { 203 if (isFull()) { 204 _overflow = true; 205 return; 206 } 207 // Otherwise... 208 jint index = _index; 209 jint next_index = index+1; 210 jint res = Atomic::cmpxchg(next_index, &_index, index); 211 if (res == index) { 212 _base[index] = ptr; 213 // Note that we don't maintain this atomically. We could, but it 214 // doesn't seem necessary. 215 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 216 return; 217 } 218 // Otherwise, we need to try again. 219 } 220 } 221 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 223 while (true) { 224 if (isFull()) { 225 _overflow = true; 226 return; 227 } 228 // Otherwise... 229 jint index = _index; 230 jint next_index = index + n; 231 if (next_index > _capacity) { 232 _overflow = true; 233 return; 234 } 235 jint res = Atomic::cmpxchg(next_index, &_index, index); 236 if (res == index) { 237 for (int i = 0; i < n; i++) { 238 int ind = index + i; 239 assert(ind < _capacity, "By overflow test above."); 240 _base[ind] = ptr_arr[i]; 241 } 242 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 243 return; 244 } 245 // Otherwise, we need to try again. 246 } 247 } 248 249 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 252 jint start = _index; 253 jint next_index = start + n; 254 if (next_index > _capacity) { 255 _overflow = true; 256 return; 257 } 258 // Otherwise. 259 _index = next_index; 260 for (int i = 0; i < n; i++) { 261 int ind = start + i; 262 assert(ind < _capacity, "By overflow test above."); 263 _base[ind] = ptr_arr[i]; 264 } 265 } 266 267 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 269 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 270 jint index = _index; 271 if (index == 0) { 272 *n = 0; 273 return false; 274 } else { 275 int k = MIN2(max, index); 276 jint new_ind = index - k; 277 for (int j = 0; j < k; j++) { 278 ptr_arr[j] = _base[new_ind + j]; 279 } 280 _index = new_ind; 281 *n = k; 282 return true; 283 } 284 } 285 286 287 CMRegionStack::CMRegionStack() : _base(NULL) {} 288 289 void CMRegionStack::allocate(size_t size) { 290 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 291 if (_base == NULL) { 292 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 293 } 294 _index = 0; 295 _capacity = (jint) size; 296 } 297 298 CMRegionStack::~CMRegionStack() { 299 if (_base != NULL) { 300 FREE_C_HEAP_ARRAY(oop, _base); 301 } 302 } 303 304 void CMRegionStack::push_lock_free(MemRegion mr) { 305 assert(mr.word_size() > 0, "Precondition"); 306 while (true) { 307 jint index = _index; 308 309 if (index >= _capacity) { 310 _overflow = true; 311 return; 312 } 313 // Otherwise... 314 jint next_index = index+1; 315 jint res = Atomic::cmpxchg(next_index, &_index, index); 316 if (res == index) { 317 _base[index] = mr; 318 return; 319 } 320 // Otherwise, we need to try again. 321 } 322 } 323 324 // Lock-free pop of the region stack. Called during the concurrent 325 // marking / remark phases. Should only be called in tandem with 326 // other lock-free pops. 327 MemRegion CMRegionStack::pop_lock_free() { 328 while (true) { 329 jint index = _index; 330 331 if (index == 0) { 332 return MemRegion(); 333 } 334 // Otherwise... 335 jint next_index = index-1; 336 jint res = Atomic::cmpxchg(next_index, &_index, index); 337 if (res == index) { 338 MemRegion mr = _base[next_index]; 339 if (mr.start() != NULL) { 340 assert(mr.end() != NULL, "invariant"); 341 assert(mr.word_size() > 0, "invariant"); 342 return mr; 343 } else { 344 // that entry was invalidated... let's skip it 345 assert(mr.end() == NULL, "invariant"); 346 } 347 } 348 // Otherwise, we need to try again. 349 } 350 } 351 352 #if 0 353 // The routines that manipulate the region stack with a lock are 354 // not currently used. They should be retained, however, as a 355 // diagnostic aid. 356 357 void CMRegionStack::push_with_lock(MemRegion mr) { 358 assert(mr.word_size() > 0, "Precondition"); 359 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 360 361 if (isFull()) { 362 _overflow = true; 363 return; 364 } 365 366 _base[_index] = mr; 367 _index += 1; 368 } 369 370 MemRegion CMRegionStack::pop_with_lock() { 371 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 372 373 while (true) { 374 if (_index == 0) { 375 return MemRegion(); 376 } 377 _index -= 1; 378 379 MemRegion mr = _base[_index]; 380 if (mr.start() != NULL) { 381 assert(mr.end() != NULL, "invariant"); 382 assert(mr.word_size() > 0, "invariant"); 383 return mr; 384 } else { 385 // that entry was invalidated... let's skip it 386 assert(mr.end() == NULL, "invariant"); 387 } 388 } 389 } 390 #endif 391 392 bool CMRegionStack::invalidate_entries_into_cset() { 393 bool result = false; 394 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 395 for (int i = 0; i < _oops_do_bound; ++i) { 396 MemRegion mr = _base[i]; 397 if (mr.start() != NULL) { 398 assert(mr.end() != NULL, "invariant"); 399 assert(mr.word_size() > 0, "invariant"); 400 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 401 assert(hr != NULL, "invariant"); 402 if (hr->in_collection_set()) { 403 // The region points into the collection set 404 _base[i] = MemRegion(); 405 result = true; 406 } 407 } else { 408 // that entry was invalidated... let's skip it 409 assert(mr.end() == NULL, "invariant"); 410 } 411 } 412 return result; 413 } 414 415 template<class OopClosureClass> 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 417 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 418 || SafepointSynchronize::is_at_safepoint(), 419 "Drain recursion must be yield-safe."); 420 bool res = true; 421 debug_only(_drain_in_progress = true); 422 debug_only(_drain_in_progress_yields = yield_after); 423 while (!isEmpty()) { 424 oop newOop = pop(); 425 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 426 assert(newOop->is_oop(), "Expected an oop"); 427 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 428 "only grey objects on this stack"); 429 // iterate over the oops in this oop, marking and pushing 430 // the ones in CMS generation. 431 newOop->oop_iterate(cl); 432 if (yield_after && _cm->do_yield_check()) { 433 res = false; 434 break; 435 } 436 } 437 debug_only(_drain_in_progress = false); 438 return res; 439 } 440 441 void CMMarkStack::oops_do(OopClosure* f) { 442 if (_index == 0) return; 443 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 444 "Bound must be set."); 445 for (int i = 0; i < _oops_do_bound; i++) { 446 f->do_oop(&_base[i]); 447 } 448 _oops_do_bound = -1; 449 } 450 451 bool ConcurrentMark::not_yet_marked(oop obj) const { 452 return (_g1h->is_obj_ill(obj) 453 || (_g1h->is_in_permanent(obj) 454 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) { 462 return MAX2((n_par_threads + 2) / 4, (size_t)1); 463 } 464 465 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 466 int max_regions) : 467 _markBitMap1(rs, MinObjAlignment - 1), 468 _markBitMap2(rs, MinObjAlignment - 1), 469 470 _parallel_marking_threads(0), 471 _max_parallel_marking_threads(0), 472 _sleep_factor(0.0), 473 _marking_task_overhead(1.0), 474 _cleanup_sleep_factor(0.0), 475 _cleanup_task_overhead(1.0), 476 _cleanup_list("Cleanup List"), 477 _region_bm(max_regions, false /* in_resource_area*/), 478 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 479 CardTableModRefBS::card_shift, 480 false /* in_resource_area*/), 481 _prevMarkBitMap(&_markBitMap1), 482 _nextMarkBitMap(&_markBitMap2), 483 _at_least_one_mark_complete(false), 484 485 _markStack(this), 486 _regionStack(), 487 // _finger set in set_non_marking_state 488 489 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 490 // _active_tasks set in set_non_marking_state 491 // _tasks set inside the constructor 492 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 493 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 494 495 _has_overflown(false), 496 _concurrent(false), 497 _has_aborted(false), 498 _restart_for_overflow(false), 499 _concurrent_marking_in_progress(false), 500 _should_gray_objects(false), 501 502 // _verbose_level set below 503 504 _init_times(), 505 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 506 _cleanup_times(), 507 _total_counting_time(0.0), 508 _total_rs_scrub_time(0.0), 509 510 _parallel_workers(NULL) { 511 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 512 if (verbose_level < no_verbose) { 513 verbose_level = no_verbose; 514 } 515 if (verbose_level > high_verbose) { 516 verbose_level = high_verbose; 517 } 518 _verbose_level = verbose_level; 519 520 if (verbose_low()) { 521 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 522 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 523 } 524 525 _markStack.allocate(MarkStackSize); 526 _regionStack.allocate(G1MarkRegionStackSize); 527 528 // Create & start a ConcurrentMark thread. 529 _cmThread = new ConcurrentMarkThread(this); 530 assert(cmThread() != NULL, "CM Thread should have been created"); 531 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 532 533 _g1h = G1CollectedHeap::heap(); 534 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 535 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 536 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 537 538 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 539 satb_qs.set_buffer_size(G1SATBBufferSize); 540 541 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 542 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 543 544 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 545 _active_tasks = _max_task_num; 546 for (int i = 0; i < (int) _max_task_num; ++i) { 547 CMTaskQueue* task_queue = new CMTaskQueue(); 548 task_queue->initialize(); 549 _task_queues->register_queue(i, task_queue); 550 551 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 552 _accum_task_vtime[i] = 0.0; 553 } 554 555 if (ConcGCThreads > ParallelGCThreads) { 556 vm_exit_during_initialization("Can't have more ConcGCThreads " 557 "than ParallelGCThreads."); 558 } 559 if (ParallelGCThreads == 0) { 560 // if we are not running with any parallel GC threads we will not 561 // spawn any marking threads either 562 _parallel_marking_threads = 0; 563 _max_parallel_marking_threads = 0; 564 _sleep_factor = 0.0; 565 _marking_task_overhead = 1.0; 566 } else { 567 if (ConcGCThreads > 0) { 568 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 569 // if both are set 570 571 _parallel_marking_threads = ConcGCThreads; 572 _max_parallel_marking_threads = _parallel_marking_threads; 573 _sleep_factor = 0.0; 574 _marking_task_overhead = 1.0; 575 } else if (G1MarkingOverheadPercent > 0) { 576 // we will calculate the number of parallel marking threads 577 // based on a target overhead with respect to the soft real-time 578 // goal 579 580 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 581 double overall_cm_overhead = 582 (double) MaxGCPauseMillis * marking_overhead / 583 (double) GCPauseIntervalMillis; 584 double cpu_ratio = 1.0 / (double) os::processor_count(); 585 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 586 double marking_task_overhead = 587 overall_cm_overhead / marking_thread_num * 588 (double) os::processor_count(); 589 double sleep_factor = 590 (1.0 - marking_task_overhead) / marking_task_overhead; 591 592 _parallel_marking_threads = (size_t) marking_thread_num; 593 _max_parallel_marking_threads = _parallel_marking_threads; 594 _sleep_factor = sleep_factor; 595 _marking_task_overhead = marking_task_overhead; 596 } else { 597 _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads); 598 _max_parallel_marking_threads = _parallel_marking_threads; 599 _sleep_factor = 0.0; 600 _marking_task_overhead = 1.0; 601 } 602 603 if (parallel_marking_threads() > 1) { 604 _cleanup_task_overhead = 1.0; 605 } else { 606 _cleanup_task_overhead = marking_task_overhead(); 607 } 608 _cleanup_sleep_factor = 609 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 610 611 #if 0 612 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 613 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 614 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 615 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 616 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 617 #endif 618 619 guarantee(parallel_marking_threads() > 0, "peace of mind"); 620 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 621 (int) _max_parallel_marking_threads, false, true); 622 if (_parallel_workers == NULL) { 623 vm_exit_during_initialization("Failed necessary allocation."); 624 } else { 625 _parallel_workers->initialize_workers(); 626 } 627 } 628 629 // so that the call below can read a sensible value 630 _heap_start = (HeapWord*) rs.base(); 631 set_non_marking_state(); 632 } 633 634 void ConcurrentMark::update_g1_committed(bool force) { 635 // If concurrent marking is not in progress, then we do not need to 636 // update _heap_end. This has a subtle and important 637 // side-effect. Imagine that two evacuation pauses happen between 638 // marking completion and remark. The first one can grow the 639 // heap (hence now the finger is below the heap end). Then, the 640 // second one could unnecessarily push regions on the region 641 // stack. This causes the invariant that the region stack is empty 642 // at the beginning of remark to be false. By ensuring that we do 643 // not observe heap expansions after marking is complete, then we do 644 // not have this problem. 645 if (!concurrent_marking_in_progress() && !force) return; 646 647 MemRegion committed = _g1h->g1_committed(); 648 assert(committed.start() == _heap_start, "start shouldn't change"); 649 HeapWord* new_end = committed.end(); 650 if (new_end > _heap_end) { 651 // The heap has been expanded. 652 653 _heap_end = new_end; 654 } 655 // Notice that the heap can also shrink. However, this only happens 656 // during a Full GC (at least currently) and the entire marking 657 // phase will bail out and the task will not be restarted. So, let's 658 // do nothing. 659 } 660 661 void ConcurrentMark::reset() { 662 // Starting values for these two. This should be called in a STW 663 // phase. CM will be notified of any future g1_committed expansions 664 // will be at the end of evacuation pauses, when tasks are 665 // inactive. 666 MemRegion committed = _g1h->g1_committed(); 667 _heap_start = committed.start(); 668 _heap_end = committed.end(); 669 670 // Separated the asserts so that we know which one fires. 671 assert(_heap_start != NULL, "heap bounds should look ok"); 672 assert(_heap_end != NULL, "heap bounds should look ok"); 673 assert(_heap_start < _heap_end, "heap bounds should look ok"); 674 675 // reset all the marking data structures and any necessary flags 676 clear_marking_state(); 677 678 if (verbose_low()) { 679 gclog_or_tty->print_cr("[global] resetting"); 680 } 681 682 // We do reset all of them, since different phases will use 683 // different number of active threads. So, it's easiest to have all 684 // of them ready. 685 for (int i = 0; i < (int) _max_task_num; ++i) { 686 _tasks[i]->reset(_nextMarkBitMap); 687 } 688 689 // we need this to make sure that the flag is on during the evac 690 // pause with initial mark piggy-backed 691 set_concurrent_marking_in_progress(); 692 } 693 694 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 695 assert(active_tasks <= _max_task_num, "we should not have more"); 696 697 _active_tasks = active_tasks; 698 // Need to update the three data structures below according to the 699 // number of active threads for this phase. 700 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 701 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 702 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 703 704 _concurrent = concurrent; 705 // We propagate this to all tasks, not just the active ones. 706 for (int i = 0; i < (int) _max_task_num; ++i) 707 _tasks[i]->set_concurrent(concurrent); 708 709 if (concurrent) { 710 set_concurrent_marking_in_progress(); 711 } else { 712 // We currently assume that the concurrent flag has been set to 713 // false before we start remark. At this point we should also be 714 // in a STW phase. 715 assert(!concurrent_marking_in_progress(), "invariant"); 716 assert(_finger == _heap_end, "only way to get here"); 717 update_g1_committed(true); 718 } 719 } 720 721 void ConcurrentMark::set_non_marking_state() { 722 // We set the global marking state to some default values when we're 723 // not doing marking. 724 clear_marking_state(); 725 _active_tasks = 0; 726 clear_concurrent_marking_in_progress(); 727 } 728 729 ConcurrentMark::~ConcurrentMark() { 730 for (int i = 0; i < (int) _max_task_num; ++i) { 731 delete _task_queues->queue(i); 732 delete _tasks[i]; 733 } 734 delete _task_queues; 735 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 736 } 737 738 // This closure is used to mark refs into the g1 generation 739 // from external roots in the CMS bit map. 740 // Called at the first checkpoint. 741 // 742 743 void ConcurrentMark::clearNextBitmap() { 744 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 745 G1CollectorPolicy* g1p = g1h->g1_policy(); 746 747 // Make sure that the concurrent mark thread looks to still be in 748 // the current cycle. 749 guarantee(cmThread()->during_cycle(), "invariant"); 750 751 // We are finishing up the current cycle by clearing the next 752 // marking bitmap and getting it ready for the next cycle. During 753 // this time no other cycle can start. So, let's make sure that this 754 // is the case. 755 guarantee(!g1h->mark_in_progress(), "invariant"); 756 757 // clear the mark bitmap (no grey objects to start with). 758 // We need to do this in chunks and offer to yield in between 759 // each chunk. 760 HeapWord* start = _nextMarkBitMap->startWord(); 761 HeapWord* end = _nextMarkBitMap->endWord(); 762 HeapWord* cur = start; 763 size_t chunkSize = M; 764 while (cur < end) { 765 HeapWord* next = cur + chunkSize; 766 if (next > end) { 767 next = end; 768 } 769 MemRegion mr(cur,next); 770 _nextMarkBitMap->clearRange(mr); 771 cur = next; 772 do_yield_check(); 773 774 // Repeat the asserts from above. We'll do them as asserts here to 775 // minimize their overhead on the product. However, we'll have 776 // them as guarantees at the beginning / end of the bitmap 777 // clearing to get some checking in the product. 778 assert(cmThread()->during_cycle(), "invariant"); 779 assert(!g1h->mark_in_progress(), "invariant"); 780 } 781 782 // Repeat the asserts from above. 783 guarantee(cmThread()->during_cycle(), "invariant"); 784 guarantee(!g1h->mark_in_progress(), "invariant"); 785 } 786 787 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 788 public: 789 bool doHeapRegion(HeapRegion* r) { 790 if (!r->continuesHumongous()) { 791 r->note_start_of_marking(true); 792 } 793 return false; 794 } 795 }; 796 797 void ConcurrentMark::checkpointRootsInitialPre() { 798 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 799 G1CollectorPolicy* g1p = g1h->g1_policy(); 800 801 _has_aborted = false; 802 803 #ifndef PRODUCT 804 if (G1PrintReachableAtInitialMark) { 805 print_reachable("at-cycle-start", 806 VerifyOption_G1UsePrevMarking, true /* all */); 807 } 808 #endif 809 810 // Initialise marking structures. This has to be done in a STW phase. 811 reset(); 812 } 813 814 815 void ConcurrentMark::checkpointRootsInitialPost() { 816 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 817 818 // If we force an overflow during remark, the remark operation will 819 // actually abort and we'll restart concurrent marking. If we always 820 // force an oveflow during remark we'll never actually complete the 821 // marking phase. So, we initilize this here, at the start of the 822 // cycle, so that at the remaining overflow number will decrease at 823 // every remark and we'll eventually not need to cause one. 824 force_overflow_stw()->init(); 825 826 // For each region note start of marking. 827 NoteStartOfMarkHRClosure startcl; 828 g1h->heap_region_iterate(&startcl); 829 830 // Start Concurrent Marking weak-reference discovery. 831 ReferenceProcessor* rp = g1h->ref_processor_cm(); 832 // enable ("weak") refs discovery 833 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 834 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 835 836 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 837 // This is the start of the marking cycle, we're expected all 838 // threads to have SATB queues with active set to false. 839 satb_mq_set.set_active_all_threads(true, /* new active value */ 840 false /* expected_active */); 841 842 // update_g1_committed() will be called at the end of an evac pause 843 // when marking is on. So, it's also called at the end of the 844 // initial-mark pause to update the heap end, if the heap expands 845 // during it. No need to call it here. 846 } 847 848 /* 849 * Notice that in the next two methods, we actually leave the STS 850 * during the barrier sync and join it immediately afterwards. If we 851 * do not do this, the following deadlock can occur: one thread could 852 * be in the barrier sync code, waiting for the other thread to also 853 * sync up, whereas another one could be trying to yield, while also 854 * waiting for the other threads to sync up too. 855 * 856 * Note, however, that this code is also used during remark and in 857 * this case we should not attempt to leave / enter the STS, otherwise 858 * we'll either hit an asseert (debug / fastdebug) or deadlock 859 * (product). So we should only leave / enter the STS if we are 860 * operating concurrently. 861 * 862 * Because the thread that does the sync barrier has left the STS, it 863 * is possible to be suspended for a Full GC or an evacuation pause 864 * could occur. This is actually safe, since the entering the sync 865 * barrier is one of the last things do_marking_step() does, and it 866 * doesn't manipulate any data structures afterwards. 867 */ 868 869 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 870 if (verbose_low()) { 871 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 872 } 873 874 if (concurrent()) { 875 ConcurrentGCThread::stsLeave(); 876 } 877 _first_overflow_barrier_sync.enter(); 878 if (concurrent()) { 879 ConcurrentGCThread::stsJoin(); 880 } 881 // at this point everyone should have synced up and not be doing any 882 // more work 883 884 if (verbose_low()) { 885 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 886 } 887 888 // let task 0 do this 889 if (task_num == 0) { 890 // task 0 is responsible for clearing the global data structures 891 // We should be here because of an overflow. During STW we should 892 // not clear the overflow flag since we rely on it being true when 893 // we exit this method to abort the pause and restart concurent 894 // marking. 895 clear_marking_state(concurrent() /* clear_overflow */); 896 force_overflow()->update(); 897 898 if (PrintGC) { 899 gclog_or_tty->date_stamp(PrintGCDateStamps); 900 gclog_or_tty->stamp(PrintGCTimeStamps); 901 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 902 } 903 } 904 905 // after this, each task should reset its own data structures then 906 // then go into the second barrier 907 } 908 909 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 910 if (verbose_low()) { 911 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 912 } 913 914 if (concurrent()) { 915 ConcurrentGCThread::stsLeave(); 916 } 917 _second_overflow_barrier_sync.enter(); 918 if (concurrent()) { 919 ConcurrentGCThread::stsJoin(); 920 } 921 // at this point everything should be re-initialised and ready to go 922 923 if (verbose_low()) { 924 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 925 } 926 } 927 928 #ifndef PRODUCT 929 void ForceOverflowSettings::init() { 930 _num_remaining = G1ConcMarkForceOverflow; 931 _force = false; 932 update(); 933 } 934 935 void ForceOverflowSettings::update() { 936 if (_num_remaining > 0) { 937 _num_remaining -= 1; 938 _force = true; 939 } else { 940 _force = false; 941 } 942 } 943 944 bool ForceOverflowSettings::should_force() { 945 if (_force) { 946 _force = false; 947 return true; 948 } else { 949 return false; 950 } 951 } 952 #endif // !PRODUCT 953 954 void ConcurrentMark::grayRoot(oop p) { 955 HeapWord* addr = (HeapWord*) p; 956 // We can't really check against _heap_start and _heap_end, since it 957 // is possible during an evacuation pause with piggy-backed 958 // initial-mark that the committed space is expanded during the 959 // pause without CM observing this change. So the assertions below 960 // is a bit conservative; but better than nothing. 961 assert(_g1h->g1_committed().contains(addr), 962 "address should be within the heap bounds"); 963 964 if (!_nextMarkBitMap->isMarked(addr)) { 965 _nextMarkBitMap->parMark(addr); 966 } 967 } 968 969 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 970 // The objects on the region have already been marked "in bulk" by 971 // the caller. We only need to decide whether to push the region on 972 // the region stack or not. 973 974 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 975 // We're done with marking and waiting for remark. We do not need to 976 // push anything else on the region stack. 977 return; 978 } 979 980 HeapWord* finger = _finger; 981 982 if (verbose_low()) { 983 gclog_or_tty->print_cr("[global] attempting to push " 984 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 985 PTR_FORMAT, mr.start(), mr.end(), finger); 986 } 987 988 if (mr.start() < finger) { 989 // The finger is always heap region aligned and it is not possible 990 // for mr to span heap regions. 991 assert(mr.end() <= finger, "invariant"); 992 993 // Separated the asserts so that we know which one fires. 994 assert(mr.start() <= mr.end(), 995 "region boundaries should fall within the committed space"); 996 assert(_heap_start <= mr.start(), 997 "region boundaries should fall within the committed space"); 998 assert(mr.end() <= _heap_end, 999 "region boundaries should fall within the committed space"); 1000 if (verbose_low()) { 1001 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1002 "below the finger, pushing it", 1003 mr.start(), mr.end()); 1004 } 1005 1006 if (!region_stack_push_lock_free(mr)) { 1007 if (verbose_low()) { 1008 gclog_or_tty->print_cr("[global] region stack has overflown."); 1009 } 1010 } 1011 } 1012 } 1013 1014 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1015 // The object is not marked by the caller. We need to at least mark 1016 // it and maybe push in on the stack. 1017 1018 HeapWord* addr = (HeapWord*)p; 1019 if (!_nextMarkBitMap->isMarked(addr)) { 1020 // We definitely need to mark it, irrespective whether we bail out 1021 // because we're done with marking. 1022 if (_nextMarkBitMap->parMark(addr)) { 1023 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1024 // If we're done with concurrent marking and we're waiting for 1025 // remark, then we're not pushing anything on the stack. 1026 return; 1027 } 1028 1029 // No OrderAccess:store_load() is needed. It is implicit in the 1030 // CAS done in parMark(addr) above 1031 HeapWord* finger = _finger; 1032 1033 if (addr < finger) { 1034 if (!mark_stack_push(oop(addr))) { 1035 if (verbose_low()) { 1036 gclog_or_tty->print_cr("[global] global stack overflow " 1037 "during parMark"); 1038 } 1039 } 1040 } 1041 } 1042 } 1043 } 1044 1045 class CMConcurrentMarkingTask: public AbstractGangTask { 1046 private: 1047 ConcurrentMark* _cm; 1048 ConcurrentMarkThread* _cmt; 1049 1050 public: 1051 void work(int worker_i) { 1052 assert(Thread::current()->is_ConcurrentGC_thread(), 1053 "this should only be done by a conc GC thread"); 1054 ResourceMark rm; 1055 1056 double start_vtime = os::elapsedVTime(); 1057 1058 ConcurrentGCThread::stsJoin(); 1059 1060 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1061 CMTask* the_task = _cm->task(worker_i); 1062 the_task->record_start_time(); 1063 if (!_cm->has_aborted()) { 1064 do { 1065 double start_vtime_sec = os::elapsedVTime(); 1066 double start_time_sec = os::elapsedTime(); 1067 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1068 1069 the_task->do_marking_step(mark_step_duration_ms, 1070 true /* do_stealing */, 1071 true /* do_termination */); 1072 1073 double end_time_sec = os::elapsedTime(); 1074 double end_vtime_sec = os::elapsedVTime(); 1075 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1076 double elapsed_time_sec = end_time_sec - start_time_sec; 1077 _cm->clear_has_overflown(); 1078 1079 bool ret = _cm->do_yield_check(worker_i); 1080 1081 jlong sleep_time_ms; 1082 if (!_cm->has_aborted() && the_task->has_aborted()) { 1083 sleep_time_ms = 1084 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1085 ConcurrentGCThread::stsLeave(); 1086 os::sleep(Thread::current(), sleep_time_ms, false); 1087 ConcurrentGCThread::stsJoin(); 1088 } 1089 double end_time2_sec = os::elapsedTime(); 1090 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1091 1092 #if 0 1093 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1094 "overhead %1.4lf", 1095 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1096 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1097 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1098 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1099 #endif 1100 } while (!_cm->has_aborted() && the_task->has_aborted()); 1101 } 1102 the_task->record_end_time(); 1103 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1104 1105 ConcurrentGCThread::stsLeave(); 1106 1107 double end_vtime = os::elapsedVTime(); 1108 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1109 } 1110 1111 CMConcurrentMarkingTask(ConcurrentMark* cm, 1112 ConcurrentMarkThread* cmt) : 1113 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1114 1115 ~CMConcurrentMarkingTask() { } 1116 }; 1117 1118 // Calculates the number of active workers for a concurrent 1119 // phase. 1120 size_t ConcurrentMark::calc_parallel_marking_threads() { 1121 if (G1CollectedHeap::use_parallel_gc_threads()) { 1122 size_t n_conc_workers = 0; 1123 if (!UseDynamicNumberOfGCThreads || 1124 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1125 !ForceDynamicNumberOfGCThreads)) { 1126 n_conc_workers = max_parallel_marking_threads(); 1127 } else { 1128 n_conc_workers = 1129 AdaptiveSizePolicy::calc_default_active_workers( 1130 max_parallel_marking_threads(), 1131 1, /* Minimum workers */ 1132 parallel_marking_threads(), 1133 Threads::number_of_non_daemon_threads()); 1134 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1135 // that scaling has already gone into "_max_parallel_marking_threads". 1136 } 1137 assert(n_conc_workers > 0, "Always need at least 1"); 1138 return n_conc_workers; 1139 } 1140 // If we are not running with any parallel GC threads we will not 1141 // have spawned any marking threads either. Hence the number of 1142 // concurrent workers should be 0. 1143 return 0; 1144 } 1145 1146 void ConcurrentMark::markFromRoots() { 1147 // we might be tempted to assert that: 1148 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1149 // "inconsistent argument?"); 1150 // However that wouldn't be right, because it's possible that 1151 // a safepoint is indeed in progress as a younger generation 1152 // stop-the-world GC happens even as we mark in this generation. 1153 1154 _restart_for_overflow = false; 1155 force_overflow_conc()->init(); 1156 1157 // _g1h has _n_par_threads 1158 _parallel_marking_threads = calc_parallel_marking_threads(); 1159 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1160 "Maximum number of marking threads exceeded"); 1161 1162 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1163 1164 // Parallel task terminator is set in "set_phase()" 1165 set_phase(active_workers, true /* concurrent */); 1166 1167 CMConcurrentMarkingTask markingTask(this, cmThread()); 1168 if (parallel_marking_threads() > 0) { 1169 _parallel_workers->set_active_workers((int)active_workers); 1170 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1171 // and the decisions on that MT processing is made elsewhere. 1172 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1173 _parallel_workers->run_task(&markingTask); 1174 } else { 1175 markingTask.work(0); 1176 } 1177 print_stats(); 1178 } 1179 1180 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1181 // world is stopped at this checkpoint 1182 assert(SafepointSynchronize::is_at_safepoint(), 1183 "world should be stopped"); 1184 1185 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1186 1187 // If a full collection has happened, we shouldn't do this. 1188 if (has_aborted()) { 1189 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1190 return; 1191 } 1192 1193 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1194 1195 if (VerifyDuringGC) { 1196 HandleMark hm; // handle scope 1197 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1198 Universe::heap()->prepare_for_verify(); 1199 Universe::verify(/* allow dirty */ true, 1200 /* silent */ false, 1201 /* option */ VerifyOption_G1UsePrevMarking); 1202 } 1203 1204 G1CollectorPolicy* g1p = g1h->g1_policy(); 1205 g1p->record_concurrent_mark_remark_start(); 1206 1207 double start = os::elapsedTime(); 1208 1209 checkpointRootsFinalWork(); 1210 1211 double mark_work_end = os::elapsedTime(); 1212 1213 weakRefsWork(clear_all_soft_refs); 1214 1215 if (has_overflown()) { 1216 // Oops. We overflowed. Restart concurrent marking. 1217 _restart_for_overflow = true; 1218 // Clear the flag. We do not need it any more. 1219 clear_has_overflown(); 1220 if (G1TraceMarkStackOverflow) { 1221 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1222 } 1223 } else { 1224 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1225 // We're done with marking. 1226 // This is the end of the marking cycle, we're expected all 1227 // threads to have SATB queues with active set to true. 1228 satb_mq_set.set_active_all_threads(false, /* new active value */ 1229 true /* expected_active */); 1230 1231 if (VerifyDuringGC) { 1232 1233 HandleMark hm; // handle scope 1234 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1235 Universe::heap()->prepare_for_verify(); 1236 Universe::verify(/* allow dirty */ true, 1237 /* silent */ false, 1238 /* option */ VerifyOption_G1UseNextMarking); 1239 } 1240 assert(!restart_for_overflow(), "sanity"); 1241 } 1242 1243 // Reset the marking state if marking completed 1244 if (!restart_for_overflow()) { 1245 set_non_marking_state(); 1246 } 1247 1248 #if VERIFY_OBJS_PROCESSED 1249 _scan_obj_cl.objs_processed = 0; 1250 ThreadLocalObjQueue::objs_enqueued = 0; 1251 #endif 1252 1253 // Statistics 1254 double now = os::elapsedTime(); 1255 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1256 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1257 _remark_times.add((now - start) * 1000.0); 1258 1259 g1p->record_concurrent_mark_remark_end(); 1260 } 1261 1262 #define CARD_BM_TEST_MODE 0 1263 1264 class CalcLiveObjectsClosure: public HeapRegionClosure { 1265 1266 CMBitMapRO* _bm; 1267 ConcurrentMark* _cm; 1268 bool _changed; 1269 bool _yield; 1270 size_t _words_done; 1271 size_t _tot_live; 1272 size_t _tot_used; 1273 size_t _regions_done; 1274 double _start_vtime_sec; 1275 1276 BitMap* _region_bm; 1277 BitMap* _card_bm; 1278 intptr_t _bottom_card_num; 1279 bool _final; 1280 1281 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1282 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1283 #if CARD_BM_TEST_MODE 1284 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1285 #else 1286 _card_bm->par_at_put(i - _bottom_card_num, 1); 1287 #endif 1288 } 1289 } 1290 1291 public: 1292 CalcLiveObjectsClosure(bool final, 1293 CMBitMapRO *bm, ConcurrentMark *cm, 1294 BitMap* region_bm, BitMap* card_bm) : 1295 _bm(bm), _cm(cm), _changed(false), _yield(true), 1296 _words_done(0), _tot_live(0), _tot_used(0), 1297 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1298 _regions_done(0), _start_vtime_sec(0.0) 1299 { 1300 _bottom_card_num = 1301 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1302 CardTableModRefBS::card_shift); 1303 } 1304 1305 // It takes a region that's not empty (i.e., it has at least one 1306 // live object in it and sets its corresponding bit on the region 1307 // bitmap to 1. If the region is "starts humongous" it will also set 1308 // to 1 the bits on the region bitmap that correspond to its 1309 // associated "continues humongous" regions. 1310 void set_bit_for_region(HeapRegion* hr) { 1311 assert(!hr->continuesHumongous(), "should have filtered those out"); 1312 1313 size_t index = hr->hrs_index(); 1314 if (!hr->startsHumongous()) { 1315 // Normal (non-humongous) case: just set the bit. 1316 _region_bm->par_at_put((BitMap::idx_t) index, true); 1317 } else { 1318 // Starts humongous case: calculate how many regions are part of 1319 // this humongous region and then set the bit range. It might 1320 // have been a bit more efficient to look at the object that 1321 // spans these humongous regions to calculate their number from 1322 // the object's size. However, it's a good idea to calculate 1323 // this based on the metadata itself, and not the region 1324 // contents, so that this code is not aware of what goes into 1325 // the humongous regions (in case this changes in the future). 1326 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1327 size_t end_index = index + 1; 1328 while (end_index < g1h->n_regions()) { 1329 HeapRegion* chr = g1h->region_at(end_index); 1330 if (!chr->continuesHumongous()) break; 1331 end_index += 1; 1332 } 1333 _region_bm->par_at_put_range((BitMap::idx_t) index, 1334 (BitMap::idx_t) end_index, true); 1335 } 1336 } 1337 1338 bool doHeapRegion(HeapRegion* hr) { 1339 if (!_final && _regions_done == 0) { 1340 _start_vtime_sec = os::elapsedVTime(); 1341 } 1342 1343 if (hr->continuesHumongous()) { 1344 // We will ignore these here and process them when their 1345 // associated "starts humongous" region is processed (see 1346 // set_bit_for_heap_region()). Note that we cannot rely on their 1347 // associated "starts humongous" region to have their bit set to 1348 // 1 since, due to the region chunking in the parallel region 1349 // iteration, a "continues humongous" region might be visited 1350 // before its associated "starts humongous". 1351 return false; 1352 } 1353 1354 HeapWord* nextTop = hr->next_top_at_mark_start(); 1355 HeapWord* start = hr->top_at_conc_mark_count(); 1356 assert(hr->bottom() <= start && start <= hr->end() && 1357 hr->bottom() <= nextTop && nextTop <= hr->end() && 1358 start <= nextTop, 1359 "Preconditions."); 1360 // Otherwise, record the number of word's we'll examine. 1361 size_t words_done = (nextTop - start); 1362 // Find the first marked object at or after "start". 1363 start = _bm->getNextMarkedWordAddress(start, nextTop); 1364 size_t marked_bytes = 0; 1365 1366 // Below, the term "card num" means the result of shifting an address 1367 // by the card shift -- address 0 corresponds to card number 0. One 1368 // must subtract the card num of the bottom of the heap to obtain a 1369 // card table index. 1370 // The first card num of the sequence of live cards currently being 1371 // constructed. -1 ==> no sequence. 1372 intptr_t start_card_num = -1; 1373 // The last card num of the sequence of live cards currently being 1374 // constructed. -1 ==> no sequence. 1375 intptr_t last_card_num = -1; 1376 1377 while (start < nextTop) { 1378 if (_yield && _cm->do_yield_check()) { 1379 // We yielded. It might be for a full collection, in which case 1380 // all bets are off; terminate the traversal. 1381 if (_cm->has_aborted()) { 1382 _changed = false; 1383 return true; 1384 } else { 1385 // Otherwise, it might be a collection pause, and the region 1386 // we're looking at might be in the collection set. We'll 1387 // abandon this region. 1388 return false; 1389 } 1390 } 1391 oop obj = oop(start); 1392 int obj_sz = obj->size(); 1393 // The card num of the start of the current object. 1394 intptr_t obj_card_num = 1395 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1396 1397 HeapWord* obj_last = start + obj_sz - 1; 1398 intptr_t obj_last_card_num = 1399 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1400 1401 if (obj_card_num != last_card_num) { 1402 if (start_card_num == -1) { 1403 assert(last_card_num == -1, "Both or neither."); 1404 start_card_num = obj_card_num; 1405 } else { 1406 assert(last_card_num != -1, "Both or neither."); 1407 assert(obj_card_num >= last_card_num, "Inv"); 1408 if ((obj_card_num - last_card_num) > 1) { 1409 // Mark the last run, and start a new one. 1410 mark_card_num_range(start_card_num, last_card_num); 1411 start_card_num = obj_card_num; 1412 } 1413 } 1414 #if CARD_BM_TEST_MODE 1415 /* 1416 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1417 obj_card_num - _bottom_card_num, 1418 obj_last_card_num - _bottom_card_num); 1419 */ 1420 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1421 _card_bm->par_at_put(j - _bottom_card_num, 1); 1422 } 1423 #endif 1424 } 1425 // In any case, we set the last card num. 1426 last_card_num = obj_last_card_num; 1427 1428 marked_bytes += (size_t)obj_sz * HeapWordSize; 1429 // Find the next marked object after this one. 1430 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1431 _changed = true; 1432 } 1433 // Handle the last range, if any. 1434 if (start_card_num != -1) { 1435 mark_card_num_range(start_card_num, last_card_num); 1436 } 1437 if (_final) { 1438 // Mark the allocated-since-marking portion... 1439 HeapWord* tp = hr->top(); 1440 if (nextTop < tp) { 1441 start_card_num = 1442 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1443 last_card_num = 1444 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1445 mark_card_num_range(start_card_num, last_card_num); 1446 // This definitely means the region has live objects. 1447 set_bit_for_region(hr); 1448 } 1449 } 1450 1451 hr->add_to_marked_bytes(marked_bytes); 1452 // Update the live region bitmap. 1453 if (marked_bytes > 0) { 1454 set_bit_for_region(hr); 1455 } 1456 hr->set_top_at_conc_mark_count(nextTop); 1457 _tot_live += hr->next_live_bytes(); 1458 _tot_used += hr->used(); 1459 _words_done = words_done; 1460 1461 if (!_final) { 1462 ++_regions_done; 1463 if (_regions_done % 10 == 0) { 1464 double end_vtime_sec = os::elapsedVTime(); 1465 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1466 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1467 jlong sleep_time_ms = 1468 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1469 os::sleep(Thread::current(), sleep_time_ms, false); 1470 _start_vtime_sec = end_vtime_sec; 1471 } 1472 } 1473 } 1474 1475 return false; 1476 } 1477 1478 bool changed() { return _changed; } 1479 void reset() { _changed = false; _words_done = 0; } 1480 void no_yield() { _yield = false; } 1481 size_t words_done() { return _words_done; } 1482 size_t tot_live() { return _tot_live; } 1483 size_t tot_used() { return _tot_used; } 1484 }; 1485 1486 1487 void ConcurrentMark::calcDesiredRegions() { 1488 _region_bm.clear(); 1489 _card_bm.clear(); 1490 CalcLiveObjectsClosure calccl(false /*final*/, 1491 nextMarkBitMap(), this, 1492 &_region_bm, &_card_bm); 1493 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1494 g1h->heap_region_iterate(&calccl); 1495 1496 do { 1497 calccl.reset(); 1498 g1h->heap_region_iterate(&calccl); 1499 } while (calccl.changed()); 1500 } 1501 1502 class G1ParFinalCountTask: public AbstractGangTask { 1503 protected: 1504 G1CollectedHeap* _g1h; 1505 CMBitMap* _bm; 1506 size_t _n_workers; 1507 size_t *_live_bytes; 1508 size_t *_used_bytes; 1509 BitMap* _region_bm; 1510 BitMap* _card_bm; 1511 public: 1512 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1513 BitMap* region_bm, BitMap* card_bm) 1514 : AbstractGangTask("G1 final counting"), _g1h(g1h), 1515 _bm(bm), _region_bm(region_bm), _card_bm(card_bm), 1516 _n_workers(0) 1517 { 1518 // Use the value already set as the number of active threads 1519 // in the call to run_task(). Needed for the allocation of 1520 // _live_bytes and _used_bytes. 1521 if (G1CollectedHeap::use_parallel_gc_threads()) { 1522 assert( _g1h->workers()->active_workers() > 0, 1523 "Should have been previously set"); 1524 _n_workers = _g1h->workers()->active_workers(); 1525 } else { 1526 _n_workers = 1; 1527 } 1528 1529 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1530 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1531 } 1532 1533 ~G1ParFinalCountTask() { 1534 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1535 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1536 } 1537 1538 void work(int i) { 1539 CalcLiveObjectsClosure calccl(true /*final*/, 1540 _bm, _g1h->concurrent_mark(), 1541 _region_bm, _card_bm); 1542 calccl.no_yield(); 1543 if (G1CollectedHeap::use_parallel_gc_threads()) { 1544 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1545 (int) _n_workers, 1546 HeapRegion::FinalCountClaimValue); 1547 } else { 1548 _g1h->heap_region_iterate(&calccl); 1549 } 1550 assert(calccl.complete(), "Shouldn't have yielded!"); 1551 1552 assert((size_t) i < _n_workers, "invariant"); 1553 _live_bytes[i] = calccl.tot_live(); 1554 _used_bytes[i] = calccl.tot_used(); 1555 } 1556 size_t live_bytes() { 1557 size_t live_bytes = 0; 1558 for (size_t i = 0; i < _n_workers; ++i) 1559 live_bytes += _live_bytes[i]; 1560 return live_bytes; 1561 } 1562 size_t used_bytes() { 1563 size_t used_bytes = 0; 1564 for (size_t i = 0; i < _n_workers; ++i) 1565 used_bytes += _used_bytes[i]; 1566 return used_bytes; 1567 } 1568 }; 1569 1570 class G1ParNoteEndTask; 1571 1572 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1573 G1CollectedHeap* _g1; 1574 int _worker_num; 1575 size_t _max_live_bytes; 1576 size_t _regions_claimed; 1577 size_t _freed_bytes; 1578 FreeRegionList* _local_cleanup_list; 1579 OldRegionSet* _old_proxy_set; 1580 HumongousRegionSet* _humongous_proxy_set; 1581 HRRSCleanupTask* _hrrs_cleanup_task; 1582 double _claimed_region_time; 1583 double _max_region_time; 1584 1585 public: 1586 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1587 int worker_num, 1588 FreeRegionList* local_cleanup_list, 1589 OldRegionSet* old_proxy_set, 1590 HumongousRegionSet* humongous_proxy_set, 1591 HRRSCleanupTask* hrrs_cleanup_task) : 1592 _g1(g1), _worker_num(worker_num), 1593 _max_live_bytes(0), _regions_claimed(0), 1594 _freed_bytes(0), 1595 _claimed_region_time(0.0), _max_region_time(0.0), 1596 _local_cleanup_list(local_cleanup_list), 1597 _old_proxy_set(old_proxy_set), 1598 _humongous_proxy_set(humongous_proxy_set), 1599 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1600 1601 size_t freed_bytes() { return _freed_bytes; } 1602 1603 bool doHeapRegion(HeapRegion *hr) { 1604 // We use a claim value of zero here because all regions 1605 // were claimed with value 1 in the FinalCount task. 1606 hr->reset_gc_time_stamp(); 1607 if (!hr->continuesHumongous()) { 1608 double start = os::elapsedTime(); 1609 _regions_claimed++; 1610 hr->note_end_of_marking(); 1611 _max_live_bytes += hr->max_live_bytes(); 1612 _g1->free_region_if_empty(hr, 1613 &_freed_bytes, 1614 _local_cleanup_list, 1615 _old_proxy_set, 1616 _humongous_proxy_set, 1617 _hrrs_cleanup_task, 1618 true /* par */); 1619 double region_time = (os::elapsedTime() - start); 1620 _claimed_region_time += region_time; 1621 if (region_time > _max_region_time) { 1622 _max_region_time = region_time; 1623 } 1624 } 1625 return false; 1626 } 1627 1628 size_t max_live_bytes() { return _max_live_bytes; } 1629 size_t regions_claimed() { return _regions_claimed; } 1630 double claimed_region_time_sec() { return _claimed_region_time; } 1631 double max_region_time_sec() { return _max_region_time; } 1632 }; 1633 1634 class G1ParNoteEndTask: public AbstractGangTask { 1635 friend class G1NoteEndOfConcMarkClosure; 1636 1637 protected: 1638 G1CollectedHeap* _g1h; 1639 size_t _max_live_bytes; 1640 size_t _freed_bytes; 1641 FreeRegionList* _cleanup_list; 1642 1643 public: 1644 G1ParNoteEndTask(G1CollectedHeap* g1h, 1645 FreeRegionList* cleanup_list) : 1646 AbstractGangTask("G1 note end"), _g1h(g1h), 1647 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1648 1649 void work(int i) { 1650 double start = os::elapsedTime(); 1651 FreeRegionList local_cleanup_list("Local Cleanup List"); 1652 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1653 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1654 HRRSCleanupTask hrrs_cleanup_task; 1655 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1656 &old_proxy_set, 1657 &humongous_proxy_set, 1658 &hrrs_cleanup_task); 1659 if (G1CollectedHeap::use_parallel_gc_threads()) { 1660 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1661 _g1h->workers()->active_workers(), 1662 HeapRegion::NoteEndClaimValue); 1663 } else { 1664 _g1h->heap_region_iterate(&g1_note_end); 1665 } 1666 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1667 1668 // Now update the lists 1669 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1670 NULL /* free_list */, 1671 &old_proxy_set, 1672 &humongous_proxy_set, 1673 true /* par */); 1674 { 1675 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1676 _max_live_bytes += g1_note_end.max_live_bytes(); 1677 _freed_bytes += g1_note_end.freed_bytes(); 1678 1679 // If we iterate over the global cleanup list at the end of 1680 // cleanup to do this printing we will not guarantee to only 1681 // generate output for the newly-reclaimed regions (the list 1682 // might not be empty at the beginning of cleanup; we might 1683 // still be working on its previous contents). So we do the 1684 // printing here, before we append the new regions to the global 1685 // cleanup list. 1686 1687 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1688 if (hr_printer->is_active()) { 1689 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1690 while (iter.more_available()) { 1691 HeapRegion* hr = iter.get_next(); 1692 hr_printer->cleanup(hr); 1693 } 1694 } 1695 1696 _cleanup_list->add_as_tail(&local_cleanup_list); 1697 assert(local_cleanup_list.is_empty(), "post-condition"); 1698 1699 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1700 } 1701 double end = os::elapsedTime(); 1702 if (G1PrintParCleanupStats) { 1703 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1704 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1705 i, start, end, (end-start)*1000.0, 1706 g1_note_end.regions_claimed(), 1707 g1_note_end.claimed_region_time_sec()*1000.0, 1708 g1_note_end.max_region_time_sec()*1000.0); 1709 } 1710 } 1711 size_t max_live_bytes() { return _max_live_bytes; } 1712 size_t freed_bytes() { return _freed_bytes; } 1713 }; 1714 1715 class G1ParScrubRemSetTask: public AbstractGangTask { 1716 protected: 1717 G1RemSet* _g1rs; 1718 BitMap* _region_bm; 1719 BitMap* _card_bm; 1720 public: 1721 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1722 BitMap* region_bm, BitMap* card_bm) : 1723 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1724 _region_bm(region_bm), _card_bm(card_bm) 1725 {} 1726 1727 void work(int i) { 1728 if (G1CollectedHeap::use_parallel_gc_threads()) { 1729 _g1rs->scrub_par(_region_bm, _card_bm, i, 1730 HeapRegion::ScrubRemSetClaimValue); 1731 } else { 1732 _g1rs->scrub(_region_bm, _card_bm); 1733 } 1734 } 1735 1736 }; 1737 1738 void ConcurrentMark::cleanup() { 1739 // world is stopped at this checkpoint 1740 assert(SafepointSynchronize::is_at_safepoint(), 1741 "world should be stopped"); 1742 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1743 1744 // If a full collection has happened, we shouldn't do this. 1745 if (has_aborted()) { 1746 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1747 return; 1748 } 1749 1750 HRSPhaseSetter x(HRSPhaseCleanup); 1751 g1h->verify_region_sets_optional(); 1752 1753 if (VerifyDuringGC) { 1754 HandleMark hm; // handle scope 1755 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1756 Universe::heap()->prepare_for_verify(); 1757 Universe::verify(/* allow dirty */ true, 1758 /* silent */ false, 1759 /* option */ VerifyOption_G1UsePrevMarking); 1760 } 1761 1762 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1763 g1p->record_concurrent_mark_cleanup_start(); 1764 1765 double start = os::elapsedTime(); 1766 1767 HeapRegionRemSet::reset_for_cleanup_tasks(); 1768 1769 size_t n_workers; 1770 1771 // Do counting once more with the world stopped for good measure. 1772 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1773 &_region_bm, &_card_bm); 1774 if (G1CollectedHeap::use_parallel_gc_threads()) { 1775 assert(g1h->check_heap_region_claim_values( 1776 HeapRegion::InitialClaimValue), 1777 "sanity check"); 1778 1779 g1h->set_par_threads(); 1780 n_workers = g1h->n_par_threads(); 1781 assert(g1h->n_par_threads() == (int) n_workers, 1782 "Should not have been reset"); 1783 g1h->workers()->run_task(&g1_par_count_task); 1784 // Done with the parallel phase so reset to 0. 1785 g1h->set_par_threads(0); 1786 1787 assert(g1h->check_heap_region_claim_values( 1788 HeapRegion::FinalCountClaimValue), 1789 "sanity check"); 1790 } else { 1791 n_workers = 1; 1792 g1_par_count_task.work(0); 1793 } 1794 1795 size_t known_garbage_bytes = 1796 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1797 g1p->set_known_garbage_bytes(known_garbage_bytes); 1798 1799 size_t start_used_bytes = g1h->used(); 1800 _at_least_one_mark_complete = true; 1801 g1h->set_marking_complete(); 1802 1803 ergo_verbose4(ErgoConcCycles, 1804 "finish cleanup", 1805 ergo_format_byte("occupancy") 1806 ergo_format_byte("capacity") 1807 ergo_format_byte_perc("known garbage"), 1808 start_used_bytes, g1h->capacity(), 1809 known_garbage_bytes, 1810 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0); 1811 1812 double count_end = os::elapsedTime(); 1813 double this_final_counting_time = (count_end - start); 1814 if (G1PrintParCleanupStats) { 1815 gclog_or_tty->print_cr("Cleanup:"); 1816 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1817 this_final_counting_time*1000.0); 1818 } 1819 _total_counting_time += this_final_counting_time; 1820 1821 if (G1PrintRegionLivenessInfo) { 1822 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1823 _g1h->heap_region_iterate(&cl); 1824 } 1825 1826 // Install newly created mark bitMap as "prev". 1827 swapMarkBitMaps(); 1828 1829 g1h->reset_gc_time_stamp(); 1830 1831 // Note end of marking in all heap regions. 1832 double note_end_start = os::elapsedTime(); 1833 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1834 if (G1CollectedHeap::use_parallel_gc_threads()) { 1835 g1h->set_par_threads((int)n_workers); 1836 g1h->workers()->run_task(&g1_par_note_end_task); 1837 g1h->set_par_threads(0); 1838 1839 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1840 "sanity check"); 1841 } else { 1842 g1_par_note_end_task.work(0); 1843 } 1844 1845 if (!cleanup_list_is_empty()) { 1846 // The cleanup list is not empty, so we'll have to process it 1847 // concurrently. Notify anyone else that might be wanting free 1848 // regions that there will be more free regions coming soon. 1849 g1h->set_free_regions_coming(); 1850 } 1851 double note_end_end = os::elapsedTime(); 1852 if (G1PrintParCleanupStats) { 1853 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1854 (note_end_end - note_end_start)*1000.0); 1855 } 1856 1857 // call below, since it affects the metric by which we sort the heap 1858 // regions. 1859 if (G1ScrubRemSets) { 1860 double rs_scrub_start = os::elapsedTime(); 1861 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1862 if (G1CollectedHeap::use_parallel_gc_threads()) { 1863 g1h->set_par_threads((int)n_workers); 1864 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1865 g1h->set_par_threads(0); 1866 1867 assert(g1h->check_heap_region_claim_values( 1868 HeapRegion::ScrubRemSetClaimValue), 1869 "sanity check"); 1870 } else { 1871 g1_par_scrub_rs_task.work(0); 1872 } 1873 1874 double rs_scrub_end = os::elapsedTime(); 1875 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1876 _total_rs_scrub_time += this_rs_scrub_time; 1877 } 1878 1879 // this will also free any regions totally full of garbage objects, 1880 // and sort the regions. 1881 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1882 1883 // Statistics. 1884 double end = os::elapsedTime(); 1885 _cleanup_times.add((end - start) * 1000.0); 1886 1887 // G1CollectedHeap::heap()->print(); 1888 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1889 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1890 1891 if (PrintGC || PrintGCDetails) { 1892 g1h->print_size_transition(gclog_or_tty, 1893 start_used_bytes, 1894 g1h->used(), 1895 g1h->capacity()); 1896 } 1897 1898 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1899 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1900 1901 // Clean up will have freed any regions completely full of garbage. 1902 // Update the soft reference policy with the new heap occupancy. 1903 Universe::update_heap_info_at_gc(); 1904 1905 // We need to make this be a "collection" so any collection pause that 1906 // races with it goes around and waits for completeCleanup to finish. 1907 g1h->increment_total_collections(); 1908 1909 if (VerifyDuringGC) { 1910 HandleMark hm; // handle scope 1911 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1912 Universe::heap()->prepare_for_verify(); 1913 Universe::verify(/* allow dirty */ true, 1914 /* silent */ false, 1915 /* option */ VerifyOption_G1UsePrevMarking); 1916 } 1917 1918 g1h->verify_region_sets_optional(); 1919 } 1920 1921 void ConcurrentMark::completeCleanup() { 1922 if (has_aborted()) return; 1923 1924 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1925 1926 _cleanup_list.verify_optional(); 1927 FreeRegionList tmp_free_list("Tmp Free List"); 1928 1929 if (G1ConcRegionFreeingVerbose) { 1930 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1931 "cleanup list has "SIZE_FORMAT" entries", 1932 _cleanup_list.length()); 1933 } 1934 1935 // Noone else should be accessing the _cleanup_list at this point, 1936 // so it's not necessary to take any locks 1937 while (!_cleanup_list.is_empty()) { 1938 HeapRegion* hr = _cleanup_list.remove_head(); 1939 assert(hr != NULL, "the list was not empty"); 1940 hr->par_clear(); 1941 tmp_free_list.add_as_tail(hr); 1942 1943 // Instead of adding one region at a time to the secondary_free_list, 1944 // we accumulate them in the local list and move them a few at a 1945 // time. This also cuts down on the number of notify_all() calls 1946 // we do during this process. We'll also append the local list when 1947 // _cleanup_list is empty (which means we just removed the last 1948 // region from the _cleanup_list). 1949 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1950 _cleanup_list.is_empty()) { 1951 if (G1ConcRegionFreeingVerbose) { 1952 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1953 "appending "SIZE_FORMAT" entries to the " 1954 "secondary_free_list, clean list still has " 1955 SIZE_FORMAT" entries", 1956 tmp_free_list.length(), 1957 _cleanup_list.length()); 1958 } 1959 1960 { 1961 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1962 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1963 SecondaryFreeList_lock->notify_all(); 1964 } 1965 1966 if (G1StressConcRegionFreeing) { 1967 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1968 os::sleep(Thread::current(), (jlong) 1, false); 1969 } 1970 } 1971 } 1972 } 1973 assert(tmp_free_list.is_empty(), "post-condition"); 1974 } 1975 1976 // Support closures for reference procssing in G1 1977 1978 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1979 HeapWord* addr = (HeapWord*)obj; 1980 return addr != NULL && 1981 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1982 } 1983 1984 class G1CMKeepAliveClosure: public OopClosure { 1985 G1CollectedHeap* _g1; 1986 ConcurrentMark* _cm; 1987 CMBitMap* _bitMap; 1988 public: 1989 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1990 CMBitMap* bitMap) : 1991 _g1(g1), _cm(cm), 1992 _bitMap(bitMap) {} 1993 1994 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1995 virtual void do_oop( oop* p) { do_oop_work(p); } 1996 1997 template <class T> void do_oop_work(T* p) { 1998 oop obj = oopDesc::load_decode_heap_oop(p); 1999 HeapWord* addr = (HeapWord*)obj; 2000 2001 if (_cm->verbose_high()) { 2002 gclog_or_tty->print_cr("\t[0] we're looking at location " 2003 "*"PTR_FORMAT" = "PTR_FORMAT, 2004 p, (void*) obj); 2005 } 2006 2007 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2008 _bitMap->mark(addr); 2009 _cm->mark_stack_push(obj); 2010 } 2011 } 2012 }; 2013 2014 class G1CMDrainMarkingStackClosure: public VoidClosure { 2015 CMMarkStack* _markStack; 2016 CMBitMap* _bitMap; 2017 G1CMKeepAliveClosure* _oopClosure; 2018 public: 2019 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 2020 G1CMKeepAliveClosure* oopClosure) : 2021 _bitMap(bitMap), 2022 _markStack(markStack), 2023 _oopClosure(oopClosure) 2024 {} 2025 2026 void do_void() { 2027 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 2028 } 2029 }; 2030 2031 // 'Keep Alive' closure used by parallel reference processing. 2032 // An instance of this closure is used in the parallel reference processing 2033 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2034 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2035 // placed on to discovered ref lists once so we can mark and push with no 2036 // need to check whether the object has already been marked. Using the 2037 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2038 // operating on the global mark stack. This means that an individual 2039 // worker would be doing lock-free pushes while it processes its own 2040 // discovered ref list followed by drain call. If the discovered ref lists 2041 // are unbalanced then this could cause interference with the other 2042 // workers. Using a CMTask (and its embedded local data structures) 2043 // avoids that potential interference. 2044 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2045 ConcurrentMark* _cm; 2046 CMTask* _task; 2047 int _ref_counter_limit; 2048 int _ref_counter; 2049 public: 2050 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2051 _cm(cm), _task(task), 2052 _ref_counter_limit(G1RefProcDrainInterval) { 2053 assert(_ref_counter_limit > 0, "sanity"); 2054 _ref_counter = _ref_counter_limit; 2055 } 2056 2057 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2058 virtual void do_oop( oop* p) { do_oop_work(p); } 2059 2060 template <class T> void do_oop_work(T* p) { 2061 if (!_cm->has_overflown()) { 2062 oop obj = oopDesc::load_decode_heap_oop(p); 2063 if (_cm->verbose_high()) { 2064 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2065 "*"PTR_FORMAT" = "PTR_FORMAT, 2066 _task->task_id(), p, (void*) obj); 2067 } 2068 2069 _task->deal_with_reference(obj); 2070 _ref_counter--; 2071 2072 if (_ref_counter == 0) { 2073 // We have dealt with _ref_counter_limit references, pushing them and objects 2074 // reachable from them on to the local stack (and possibly the global stack). 2075 // Call do_marking_step() to process these entries. We call the routine in a 2076 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2077 // with the entries that we've pushed as a result of the deal_with_reference 2078 // calls above) or we overflow. 2079 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2080 // while there may still be some work to do. (See the comment at the 2081 // beginning of CMTask::do_marking_step() for those conditions - one of which 2082 // is reaching the specified time target.) It is only when 2083 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2084 // that the marking has completed. 2085 do { 2086 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2087 _task->do_marking_step(mark_step_duration_ms, 2088 false /* do_stealing */, 2089 false /* do_termination */); 2090 } while (_task->has_aborted() && !_cm->has_overflown()); 2091 _ref_counter = _ref_counter_limit; 2092 } 2093 } else { 2094 if (_cm->verbose_high()) { 2095 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2096 } 2097 } 2098 } 2099 }; 2100 2101 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2102 ConcurrentMark* _cm; 2103 CMTask* _task; 2104 public: 2105 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2106 _cm(cm), _task(task) 2107 {} 2108 2109 void do_void() { 2110 do { 2111 if (_cm->verbose_high()) { 2112 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2113 _task->task_id()); 2114 } 2115 2116 // We call CMTask::do_marking_step() to completely drain the local and 2117 // global marking stacks. The routine is called in a loop, which we'll 2118 // exit if there's nothing more to do (i.e. we'completely drained the 2119 // entries that were pushed as a result of applying the 2120 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2121 // lists above) or we overflow the global marking stack. 2122 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2123 // while there may still be some work to do. (See the comment at the 2124 // beginning of CMTask::do_marking_step() for those conditions - one of which 2125 // is reaching the specified time target.) It is only when 2126 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2127 // that the marking has completed. 2128 2129 _task->do_marking_step(1000000000.0 /* something very large */, 2130 true /* do_stealing */, 2131 true /* do_termination */); 2132 } while (_task->has_aborted() && !_cm->has_overflown()); 2133 } 2134 }; 2135 2136 // Implementation of AbstractRefProcTaskExecutor for parallel 2137 // reference processing at the end of G1 concurrent marking 2138 2139 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2140 private: 2141 G1CollectedHeap* _g1h; 2142 ConcurrentMark* _cm; 2143 WorkGang* _workers; 2144 int _active_workers; 2145 2146 public: 2147 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2148 ConcurrentMark* cm, 2149 WorkGang* workers, 2150 int n_workers) : 2151 _g1h(g1h), _cm(cm), 2152 _workers(workers), _active_workers(n_workers) { } 2153 2154 // Executes the given task using concurrent marking worker threads. 2155 virtual void execute(ProcessTask& task); 2156 virtual void execute(EnqueueTask& task); 2157 }; 2158 2159 class G1CMRefProcTaskProxy: public AbstractGangTask { 2160 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2161 ProcessTask& _proc_task; 2162 G1CollectedHeap* _g1h; 2163 ConcurrentMark* _cm; 2164 2165 public: 2166 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2167 G1CollectedHeap* g1h, 2168 ConcurrentMark* cm) : 2169 AbstractGangTask("Process reference objects in parallel"), 2170 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2171 2172 virtual void work(int i) { 2173 CMTask* marking_task = _cm->task(i); 2174 G1CMIsAliveClosure g1_is_alive(_g1h); 2175 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2176 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2177 2178 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2179 } 2180 }; 2181 2182 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2183 assert(_workers != NULL, "Need parallel worker threads."); 2184 2185 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2186 2187 // We need to reset the phase for each task execution so that 2188 // the termination protocol of CMTask::do_marking_step works. 2189 _cm->set_phase(_active_workers, false /* concurrent */); 2190 _g1h->set_par_threads(_active_workers); 2191 _workers->run_task(&proc_task_proxy); 2192 _g1h->set_par_threads(0); 2193 } 2194 2195 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2196 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2197 EnqueueTask& _enq_task; 2198 2199 public: 2200 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2201 AbstractGangTask("Enqueue reference objects in parallel"), 2202 _enq_task(enq_task) { } 2203 2204 virtual void work(int i) { 2205 _enq_task.work(i); 2206 } 2207 }; 2208 2209 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2210 assert(_workers != NULL, "Need parallel worker threads."); 2211 2212 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2213 2214 _g1h->set_par_threads(_active_workers); 2215 _workers->run_task(&enq_task_proxy); 2216 _g1h->set_par_threads(0); 2217 } 2218 2219 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2220 ResourceMark rm; 2221 HandleMark hm; 2222 2223 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2224 2225 // Is alive closure. 2226 G1CMIsAliveClosure g1_is_alive(g1h); 2227 2228 // Inner scope to exclude the cleaning of the string and symbol 2229 // tables from the displayed time. 2230 { 2231 bool verbose = PrintGC && PrintGCDetails; 2232 if (verbose) { 2233 gclog_or_tty->put(' '); 2234 } 2235 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty); 2236 2237 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2238 2239 // See the comment in G1CollectedHeap::ref_processing_init() 2240 // about how reference processing currently works in G1. 2241 2242 // Process weak references. 2243 rp->setup_policy(clear_all_soft_refs); 2244 assert(_markStack.isEmpty(), "mark stack should be empty"); 2245 2246 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2247 G1CMDrainMarkingStackClosure 2248 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2249 2250 // We use the work gang from the G1CollectedHeap and we utilize all 2251 // the worker threads. 2252 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1; 2253 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2254 2255 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2256 g1h->workers(), active_workers); 2257 2258 if (rp->processing_is_mt()) { 2259 // Set the degree of MT here. If the discovery is done MT, there 2260 // may have been a different number of threads doing the discovery 2261 // and a different number of discovered lists may have Ref objects. 2262 // That is OK as long as the Reference lists are balanced (see 2263 // balance_all_queues() and balance_queues()). 2264 rp->set_active_mt_degree(active_workers); 2265 2266 rp->process_discovered_references(&g1_is_alive, 2267 &g1_keep_alive, 2268 &g1_drain_mark_stack, 2269 &par_task_executor); 2270 2271 // The work routines of the parallel keep_alive and drain_marking_stack 2272 // will set the has_overflown flag if we overflow the global marking 2273 // stack. 2274 } else { 2275 rp->process_discovered_references(&g1_is_alive, 2276 &g1_keep_alive, 2277 &g1_drain_mark_stack, 2278 NULL); 2279 } 2280 2281 assert(_markStack.overflow() || _markStack.isEmpty(), 2282 "mark stack should be empty (unless it overflowed)"); 2283 if (_markStack.overflow()) { 2284 // Should have been done already when we tried to push an 2285 // entry on to the global mark stack. But let's do it again. 2286 set_has_overflown(); 2287 } 2288 2289 if (rp->processing_is_mt()) { 2290 assert(rp->num_q() == active_workers, "why not"); 2291 rp->enqueue_discovered_references(&par_task_executor); 2292 } else { 2293 rp->enqueue_discovered_references(); 2294 } 2295 2296 rp->verify_no_references_recorded(); 2297 assert(!rp->discovery_enabled(), "Post condition"); 2298 } 2299 2300 // Now clean up stale oops in StringTable 2301 StringTable::unlink(&g1_is_alive); 2302 // Clean up unreferenced symbols in symbol table. 2303 SymbolTable::unlink(); 2304 } 2305 2306 void ConcurrentMark::swapMarkBitMaps() { 2307 CMBitMapRO* temp = _prevMarkBitMap; 2308 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2309 _nextMarkBitMap = (CMBitMap*) temp; 2310 } 2311 2312 class CMRemarkTask: public AbstractGangTask { 2313 private: 2314 ConcurrentMark *_cm; 2315 2316 public: 2317 void work(int worker_i) { 2318 // Since all available tasks are actually started, we should 2319 // only proceed if we're supposed to be actived. 2320 if ((size_t)worker_i < _cm->active_tasks()) { 2321 CMTask* task = _cm->task(worker_i); 2322 task->record_start_time(); 2323 do { 2324 task->do_marking_step(1000000000.0 /* something very large */, 2325 true /* do_stealing */, 2326 true /* do_termination */); 2327 } while (task->has_aborted() && !_cm->has_overflown()); 2328 // If we overflow, then we do not want to restart. We instead 2329 // want to abort remark and do concurrent marking again. 2330 task->record_end_time(); 2331 } 2332 } 2333 2334 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2335 AbstractGangTask("Par Remark"), _cm(cm) { 2336 _cm->terminator()->reset_for_reuse(active_workers); 2337 } 2338 }; 2339 2340 void ConcurrentMark::checkpointRootsFinalWork() { 2341 ResourceMark rm; 2342 HandleMark hm; 2343 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2344 2345 g1h->ensure_parsability(false); 2346 2347 if (G1CollectedHeap::use_parallel_gc_threads()) { 2348 G1CollectedHeap::StrongRootsScope srs(g1h); 2349 // this is remark, so we'll use up all active threads 2350 int active_workers = g1h->workers()->active_workers(); 2351 if (active_workers == 0) { 2352 assert(active_workers > 0, "Should have been set earlier"); 2353 active_workers = ParallelGCThreads; 2354 g1h->workers()->set_active_workers(active_workers); 2355 } 2356 set_phase(active_workers, false /* concurrent */); 2357 // Leave _parallel_marking_threads at it's 2358 // value originally calculated in the ConcurrentMark 2359 // constructor and pass values of the active workers 2360 // through the gang in the task. 2361 2362 CMRemarkTask remarkTask(this, active_workers); 2363 g1h->set_par_threads(active_workers); 2364 g1h->workers()->run_task(&remarkTask); 2365 g1h->set_par_threads(0); 2366 } else { 2367 G1CollectedHeap::StrongRootsScope srs(g1h); 2368 // this is remark, so we'll use up all available threads 2369 int active_workers = 1; 2370 set_phase(active_workers, false /* concurrent */); 2371 2372 CMRemarkTask remarkTask(this, active_workers); 2373 // We will start all available threads, even if we decide that the 2374 // active_workers will be fewer. The extra ones will just bail out 2375 // immediately. 2376 remarkTask.work(0); 2377 } 2378 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2379 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2380 2381 print_stats(); 2382 2383 #if VERIFY_OBJS_PROCESSED 2384 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2385 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2386 _scan_obj_cl.objs_processed, 2387 ThreadLocalObjQueue::objs_enqueued); 2388 guarantee(_scan_obj_cl.objs_processed == 2389 ThreadLocalObjQueue::objs_enqueued, 2390 "Different number of objs processed and enqueued."); 2391 } 2392 #endif 2393 } 2394 2395 #ifndef PRODUCT 2396 2397 class PrintReachableOopClosure: public OopClosure { 2398 private: 2399 G1CollectedHeap* _g1h; 2400 outputStream* _out; 2401 VerifyOption _vo; 2402 bool _all; 2403 2404 public: 2405 PrintReachableOopClosure(outputStream* out, 2406 VerifyOption vo, 2407 bool all) : 2408 _g1h(G1CollectedHeap::heap()), 2409 _out(out), _vo(vo), _all(all) { } 2410 2411 void do_oop(narrowOop* p) { do_oop_work(p); } 2412 void do_oop( oop* p) { do_oop_work(p); } 2413 2414 template <class T> void do_oop_work(T* p) { 2415 oop obj = oopDesc::load_decode_heap_oop(p); 2416 const char* str = NULL; 2417 const char* str2 = ""; 2418 2419 if (obj == NULL) { 2420 str = ""; 2421 } else if (!_g1h->is_in_g1_reserved(obj)) { 2422 str = " O"; 2423 } else { 2424 HeapRegion* hr = _g1h->heap_region_containing(obj); 2425 guarantee(hr != NULL, "invariant"); 2426 bool over_tams = false; 2427 bool marked = false; 2428 2429 switch (_vo) { 2430 case VerifyOption_G1UsePrevMarking: 2431 over_tams = hr->obj_allocated_since_prev_marking(obj); 2432 marked = _g1h->isMarkedPrev(obj); 2433 break; 2434 case VerifyOption_G1UseNextMarking: 2435 over_tams = hr->obj_allocated_since_next_marking(obj); 2436 marked = _g1h->isMarkedNext(obj); 2437 break; 2438 case VerifyOption_G1UseMarkWord: 2439 marked = obj->is_gc_marked(); 2440 break; 2441 default: 2442 ShouldNotReachHere(); 2443 } 2444 2445 if (over_tams) { 2446 str = " >"; 2447 if (marked) { 2448 str2 = " AND MARKED"; 2449 } 2450 } else if (marked) { 2451 str = " M"; 2452 } else { 2453 str = " NOT"; 2454 } 2455 } 2456 2457 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2458 p, (void*) obj, str, str2); 2459 } 2460 }; 2461 2462 class PrintReachableObjectClosure : public ObjectClosure { 2463 private: 2464 G1CollectedHeap* _g1h; 2465 outputStream* _out; 2466 VerifyOption _vo; 2467 bool _all; 2468 HeapRegion* _hr; 2469 2470 public: 2471 PrintReachableObjectClosure(outputStream* out, 2472 VerifyOption vo, 2473 bool all, 2474 HeapRegion* hr) : 2475 _g1h(G1CollectedHeap::heap()), 2476 _out(out), _vo(vo), _all(all), _hr(hr) { } 2477 2478 void do_object(oop o) { 2479 bool over_tams = false; 2480 bool marked = false; 2481 2482 switch (_vo) { 2483 case VerifyOption_G1UsePrevMarking: 2484 over_tams = _hr->obj_allocated_since_prev_marking(o); 2485 marked = _g1h->isMarkedPrev(o); 2486 break; 2487 case VerifyOption_G1UseNextMarking: 2488 over_tams = _hr->obj_allocated_since_next_marking(o); 2489 marked = _g1h->isMarkedNext(o); 2490 break; 2491 case VerifyOption_G1UseMarkWord: 2492 marked = o->is_gc_marked(); 2493 break; 2494 default: 2495 ShouldNotReachHere(); 2496 } 2497 bool print_it = _all || over_tams || marked; 2498 2499 if (print_it) { 2500 _out->print_cr(" "PTR_FORMAT"%s", 2501 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2502 PrintReachableOopClosure oopCl(_out, _vo, _all); 2503 o->oop_iterate(&oopCl); 2504 } 2505 } 2506 }; 2507 2508 class PrintReachableRegionClosure : public HeapRegionClosure { 2509 private: 2510 outputStream* _out; 2511 VerifyOption _vo; 2512 bool _all; 2513 2514 public: 2515 bool doHeapRegion(HeapRegion* hr) { 2516 HeapWord* b = hr->bottom(); 2517 HeapWord* e = hr->end(); 2518 HeapWord* t = hr->top(); 2519 HeapWord* p = NULL; 2520 2521 switch (_vo) { 2522 case VerifyOption_G1UsePrevMarking: 2523 p = hr->prev_top_at_mark_start(); 2524 break; 2525 case VerifyOption_G1UseNextMarking: 2526 p = hr->next_top_at_mark_start(); 2527 break; 2528 case VerifyOption_G1UseMarkWord: 2529 // When we are verifying marking using the mark word 2530 // TAMS has no relevance. 2531 assert(p == NULL, "post-condition"); 2532 break; 2533 default: 2534 ShouldNotReachHere(); 2535 } 2536 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2537 "TAMS: "PTR_FORMAT, b, e, t, p); 2538 _out->cr(); 2539 2540 HeapWord* from = b; 2541 HeapWord* to = t; 2542 2543 if (to > from) { 2544 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2545 _out->cr(); 2546 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2547 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2548 _out->cr(); 2549 } 2550 2551 return false; 2552 } 2553 2554 PrintReachableRegionClosure(outputStream* out, 2555 VerifyOption vo, 2556 bool all) : 2557 _out(out), _vo(vo), _all(all) { } 2558 }; 2559 2560 static const char* verify_option_to_tams(VerifyOption vo) { 2561 switch (vo) { 2562 case VerifyOption_G1UsePrevMarking: 2563 return "PTAMS"; 2564 case VerifyOption_G1UseNextMarking: 2565 return "NTAMS"; 2566 default: 2567 return "NONE"; 2568 } 2569 } 2570 2571 void ConcurrentMark::print_reachable(const char* str, 2572 VerifyOption vo, 2573 bool all) { 2574 gclog_or_tty->cr(); 2575 gclog_or_tty->print_cr("== Doing heap dump... "); 2576 2577 if (G1PrintReachableBaseFile == NULL) { 2578 gclog_or_tty->print_cr(" #### error: no base file defined"); 2579 return; 2580 } 2581 2582 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2583 (JVM_MAXPATHLEN - 1)) { 2584 gclog_or_tty->print_cr(" #### error: file name too long"); 2585 return; 2586 } 2587 2588 char file_name[JVM_MAXPATHLEN]; 2589 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2590 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2591 2592 fileStream fout(file_name); 2593 if (!fout.is_open()) { 2594 gclog_or_tty->print_cr(" #### error: could not open file"); 2595 return; 2596 } 2597 2598 outputStream* out = &fout; 2599 out->print_cr("-- USING %s", verify_option_to_tams(vo)); 2600 out->cr(); 2601 2602 out->print_cr("--- ITERATING OVER REGIONS"); 2603 out->cr(); 2604 PrintReachableRegionClosure rcl(out, vo, all); 2605 _g1h->heap_region_iterate(&rcl); 2606 out->cr(); 2607 2608 gclog_or_tty->print_cr(" done"); 2609 gclog_or_tty->flush(); 2610 } 2611 2612 #endif // PRODUCT 2613 2614 // This note is for drainAllSATBBuffers and the code in between. 2615 // In the future we could reuse a task to do this work during an 2616 // evacuation pause (since now tasks are not active and can be claimed 2617 // during an evacuation pause). This was a late change to the code and 2618 // is currently not being taken advantage of. 2619 2620 class CMGlobalObjectClosure : public ObjectClosure { 2621 private: 2622 ConcurrentMark* _cm; 2623 2624 public: 2625 void do_object(oop obj) { 2626 _cm->deal_with_reference(obj); 2627 } 2628 2629 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2630 }; 2631 2632 void ConcurrentMark::deal_with_reference(oop obj) { 2633 if (verbose_high()) { 2634 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2635 (void*) obj); 2636 } 2637 2638 HeapWord* objAddr = (HeapWord*) obj; 2639 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2640 if (_g1h->is_in_g1_reserved(objAddr)) { 2641 assert(obj != NULL, "null check is implicit"); 2642 if (!_nextMarkBitMap->isMarked(objAddr)) { 2643 // Only get the containing region if the object is not marked on the 2644 // bitmap (otherwise, it's a waste of time since we won't do 2645 // anything with it). 2646 HeapRegion* hr = _g1h->heap_region_containing_raw(obj); 2647 if (!hr->obj_allocated_since_next_marking(obj)) { 2648 if (verbose_high()) { 2649 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2650 "marked", (void*) obj); 2651 } 2652 2653 // we need to mark it first 2654 if (_nextMarkBitMap->parMark(objAddr)) { 2655 // No OrderAccess:store_load() is needed. It is implicit in the 2656 // CAS done in parMark(objAddr) above 2657 HeapWord* finger = _finger; 2658 if (objAddr < finger) { 2659 if (verbose_high()) { 2660 gclog_or_tty->print_cr("[global] below the global finger " 2661 "("PTR_FORMAT"), pushing it", finger); 2662 } 2663 if (!mark_stack_push(obj)) { 2664 if (verbose_low()) { 2665 gclog_or_tty->print_cr("[global] global stack overflow during " 2666 "deal_with_reference"); 2667 } 2668 } 2669 } 2670 } 2671 } 2672 } 2673 } 2674 } 2675 2676 void ConcurrentMark::drainAllSATBBuffers() { 2677 CMGlobalObjectClosure oc(this); 2678 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2679 satb_mq_set.set_closure(&oc); 2680 2681 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2682 if (verbose_medium()) { 2683 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2684 } 2685 } 2686 2687 // no need to check whether we should do this, as this is only 2688 // called during an evacuation pause 2689 satb_mq_set.iterate_closure_all_threads(); 2690 2691 satb_mq_set.set_closure(NULL); 2692 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2693 } 2694 2695 void ConcurrentMark::markPrev(oop p) { 2696 // Note we are overriding the read-only view of the prev map here, via 2697 // the cast. 2698 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2699 } 2700 2701 void ConcurrentMark::clear(oop p) { 2702 assert(p != NULL && p->is_oop(), "expected an oop"); 2703 HeapWord* addr = (HeapWord*)p; 2704 assert(addr >= _nextMarkBitMap->startWord() || 2705 addr < _nextMarkBitMap->endWord(), "in a region"); 2706 2707 _nextMarkBitMap->clear(addr); 2708 } 2709 2710 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2711 // Note we are overriding the read-only view of the prev map here, via 2712 // the cast. 2713 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2714 _nextMarkBitMap->clearRange(mr); 2715 } 2716 2717 HeapRegion* 2718 ConcurrentMark::claim_region(int task_num) { 2719 // "checkpoint" the finger 2720 HeapWord* finger = _finger; 2721 2722 // _heap_end will not change underneath our feet; it only changes at 2723 // yield points. 2724 while (finger < _heap_end) { 2725 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2726 2727 // Note on how this code handles humongous regions. In the 2728 // normal case the finger will reach the start of a "starts 2729 // humongous" (SH) region. Its end will either be the end of the 2730 // last "continues humongous" (CH) region in the sequence, or the 2731 // standard end of the SH region (if the SH is the only region in 2732 // the sequence). That way claim_region() will skip over the CH 2733 // regions. However, there is a subtle race between a CM thread 2734 // executing this method and a mutator thread doing a humongous 2735 // object allocation. The two are not mutually exclusive as the CM 2736 // thread does not need to hold the Heap_lock when it gets 2737 // here. So there is a chance that claim_region() will come across 2738 // a free region that's in the progress of becoming a SH or a CH 2739 // region. In the former case, it will either 2740 // a) Miss the update to the region's end, in which case it will 2741 // visit every subsequent CH region, will find their bitmaps 2742 // empty, and do nothing, or 2743 // b) Will observe the update of the region's end (in which case 2744 // it will skip the subsequent CH regions). 2745 // If it comes across a region that suddenly becomes CH, the 2746 // scenario will be similar to b). So, the race between 2747 // claim_region() and a humongous object allocation might force us 2748 // to do a bit of unnecessary work (due to some unnecessary bitmap 2749 // iterations) but it should not introduce and correctness issues. 2750 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2751 HeapWord* bottom = curr_region->bottom(); 2752 HeapWord* end = curr_region->end(); 2753 HeapWord* limit = curr_region->next_top_at_mark_start(); 2754 2755 if (verbose_low()) { 2756 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2757 "["PTR_FORMAT", "PTR_FORMAT"), " 2758 "limit = "PTR_FORMAT, 2759 task_num, curr_region, bottom, end, limit); 2760 } 2761 2762 // Is the gap between reading the finger and doing the CAS too long? 2763 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2764 if (res == finger) { 2765 // we succeeded 2766 2767 // notice that _finger == end cannot be guaranteed here since, 2768 // someone else might have moved the finger even further 2769 assert(_finger >= end, "the finger should have moved forward"); 2770 2771 if (verbose_low()) { 2772 gclog_or_tty->print_cr("[%d] we were successful with region = " 2773 PTR_FORMAT, task_num, curr_region); 2774 } 2775 2776 if (limit > bottom) { 2777 if (verbose_low()) { 2778 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2779 "returning it ", task_num, curr_region); 2780 } 2781 return curr_region; 2782 } else { 2783 assert(limit == bottom, 2784 "the region limit should be at bottom"); 2785 if (verbose_low()) { 2786 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2787 "returning NULL", task_num, curr_region); 2788 } 2789 // we return NULL and the caller should try calling 2790 // claim_region() again. 2791 return NULL; 2792 } 2793 } else { 2794 assert(_finger > finger, "the finger should have moved forward"); 2795 if (verbose_low()) { 2796 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2797 "global finger = "PTR_FORMAT", " 2798 "our finger = "PTR_FORMAT, 2799 task_num, _finger, finger); 2800 } 2801 2802 // read it again 2803 finger = _finger; 2804 } 2805 } 2806 2807 return NULL; 2808 } 2809 2810 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2811 bool result = false; 2812 for (int i = 0; i < (int)_max_task_num; ++i) { 2813 CMTask* the_task = _tasks[i]; 2814 MemRegion mr = the_task->aborted_region(); 2815 if (mr.start() != NULL) { 2816 assert(mr.end() != NULL, "invariant"); 2817 assert(mr.word_size() > 0, "invariant"); 2818 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2819 assert(hr != NULL, "invariant"); 2820 if (hr->in_collection_set()) { 2821 // The region points into the collection set 2822 the_task->set_aborted_region(MemRegion()); 2823 result = true; 2824 } 2825 } 2826 } 2827 return result; 2828 } 2829 2830 bool ConcurrentMark::has_aborted_regions() { 2831 for (int i = 0; i < (int)_max_task_num; ++i) { 2832 CMTask* the_task = _tasks[i]; 2833 MemRegion mr = the_task->aborted_region(); 2834 if (mr.start() != NULL) { 2835 assert(mr.end() != NULL, "invariant"); 2836 assert(mr.word_size() > 0, "invariant"); 2837 return true; 2838 } 2839 } 2840 return false; 2841 } 2842 2843 void ConcurrentMark::oops_do(OopClosure* cl) { 2844 if (_markStack.size() > 0 && verbose_low()) { 2845 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2846 "size = %d", _markStack.size()); 2847 } 2848 // we first iterate over the contents of the mark stack... 2849 _markStack.oops_do(cl); 2850 2851 for (int i = 0; i < (int)_max_task_num; ++i) { 2852 OopTaskQueue* queue = _task_queues->queue((int)i); 2853 2854 if (queue->size() > 0 && verbose_low()) { 2855 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2856 "size = %d", i, queue->size()); 2857 } 2858 2859 // ...then over the contents of the all the task queues. 2860 queue->oops_do(cl); 2861 } 2862 2863 // Invalidate any entries, that are in the region stack, that 2864 // point into the collection set 2865 if (_regionStack.invalidate_entries_into_cset()) { 2866 // otherwise, any gray objects copied during the evacuation pause 2867 // might not be visited. 2868 assert(_should_gray_objects, "invariant"); 2869 } 2870 2871 // Invalidate any aborted regions, recorded in the individual CM 2872 // tasks, that point into the collection set. 2873 if (invalidate_aborted_regions_in_cset()) { 2874 // otherwise, any gray objects copied during the evacuation pause 2875 // might not be visited. 2876 assert(_should_gray_objects, "invariant"); 2877 } 2878 2879 } 2880 2881 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2882 _markStack.setEmpty(); 2883 _markStack.clear_overflow(); 2884 _regionStack.setEmpty(); 2885 _regionStack.clear_overflow(); 2886 if (clear_overflow) { 2887 clear_has_overflown(); 2888 } else { 2889 assert(has_overflown(), "pre-condition"); 2890 } 2891 _finger = _heap_start; 2892 2893 for (int i = 0; i < (int)_max_task_num; ++i) { 2894 OopTaskQueue* queue = _task_queues->queue(i); 2895 queue->set_empty(); 2896 // Clear any partial regions from the CMTasks 2897 _tasks[i]->clear_aborted_region(); 2898 } 2899 } 2900 2901 void ConcurrentMark::print_stats() { 2902 if (verbose_stats()) { 2903 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2904 for (size_t i = 0; i < _active_tasks; ++i) { 2905 _tasks[i]->print_stats(); 2906 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2907 } 2908 } 2909 } 2910 2911 // Closures used by ConcurrentMark::complete_marking_in_collection_set(). 2912 2913 class CSetMarkOopClosure: public OopClosure { 2914 friend class CSetMarkBitMapClosure; 2915 2916 G1CollectedHeap* _g1h; 2917 CMBitMap* _bm; 2918 ConcurrentMark* _cm; 2919 oop* _ms; 2920 jint* _array_ind_stack; 2921 int _ms_size; 2922 int _ms_ind; 2923 int _array_increment; 2924 int _worker_i; 2925 2926 bool push(oop obj, int arr_ind = 0) { 2927 if (_ms_ind == _ms_size) { 2928 gclog_or_tty->print_cr("Mark stack is full."); 2929 return false; 2930 } 2931 _ms[_ms_ind] = obj; 2932 if (obj->is_objArray()) { 2933 _array_ind_stack[_ms_ind] = arr_ind; 2934 } 2935 _ms_ind++; 2936 return true; 2937 } 2938 2939 oop pop() { 2940 if (_ms_ind == 0) { 2941 return NULL; 2942 } else { 2943 _ms_ind--; 2944 return _ms[_ms_ind]; 2945 } 2946 } 2947 2948 template <class T> bool drain() { 2949 while (_ms_ind > 0) { 2950 oop obj = pop(); 2951 assert(obj != NULL, "Since index was non-zero."); 2952 if (obj->is_objArray()) { 2953 jint arr_ind = _array_ind_stack[_ms_ind]; 2954 objArrayOop aobj = objArrayOop(obj); 2955 jint len = aobj->length(); 2956 jint next_arr_ind = arr_ind + _array_increment; 2957 if (next_arr_ind < len) { 2958 push(obj, next_arr_ind); 2959 } 2960 // Now process this portion of this one. 2961 int lim = MIN2(next_arr_ind, len); 2962 for (int j = arr_ind; j < lim; j++) { 2963 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2964 } 2965 } else { 2966 obj->oop_iterate(this); 2967 } 2968 if (abort()) return false; 2969 } 2970 return true; 2971 } 2972 2973 public: 2974 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 2975 _g1h(G1CollectedHeap::heap()), 2976 _cm(cm), 2977 _bm(cm->nextMarkBitMap()), 2978 _ms_size(ms_size), _ms_ind(0), 2979 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2980 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2981 _array_increment(MAX2(ms_size/8, 16)), 2982 _worker_i(worker_i) { } 2983 2984 ~CSetMarkOopClosure() { 2985 FREE_C_HEAP_ARRAY(oop, _ms); 2986 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2987 } 2988 2989 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2990 virtual void do_oop( oop* p) { do_oop_work(p); } 2991 2992 template <class T> void do_oop_work(T* p) { 2993 T heap_oop = oopDesc::load_heap_oop(p); 2994 if (oopDesc::is_null(heap_oop)) return; 2995 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2996 if (obj->is_forwarded()) { 2997 // If the object has already been forwarded, we have to make sure 2998 // that it's marked. So follow the forwarding pointer. Note that 2999 // this does the right thing for self-forwarding pointers in the 3000 // evacuation failure case. 3001 obj = obj->forwardee(); 3002 } 3003 HeapRegion* hr = _g1h->heap_region_containing(obj); 3004 if (hr != NULL) { 3005 if (hr->in_collection_set()) { 3006 if (_g1h->is_obj_ill(obj)) { 3007 if (_bm->parMark((HeapWord*)obj)) { 3008 if (!push(obj)) { 3009 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); 3010 set_abort(); 3011 } 3012 } 3013 } 3014 } else { 3015 // Outside the collection set; we need to gray it 3016 _cm->deal_with_reference(obj); 3017 } 3018 } 3019 } 3020 }; 3021 3022 class CSetMarkBitMapClosure: public BitMapClosure { 3023 G1CollectedHeap* _g1h; 3024 CMBitMap* _bitMap; 3025 ConcurrentMark* _cm; 3026 CSetMarkOopClosure _oop_cl; 3027 int _worker_i; 3028 3029 public: 3030 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 3031 _g1h(G1CollectedHeap::heap()), 3032 _bitMap(cm->nextMarkBitMap()), 3033 _oop_cl(cm, ms_size, worker_i), 3034 _worker_i(worker_i) { } 3035 3036 bool do_bit(size_t offset) { 3037 // convert offset into a HeapWord* 3038 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 3039 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 3040 "address out of range"); 3041 assert(_bitMap->isMarked(addr), "tautology"); 3042 oop obj = oop(addr); 3043 if (!obj->is_forwarded()) { 3044 if (!_oop_cl.push(obj)) return false; 3045 if (UseCompressedOops) { 3046 if (!_oop_cl.drain<narrowOop>()) return false; 3047 } else { 3048 if (!_oop_cl.drain<oop>()) return false; 3049 } 3050 } 3051 // Otherwise... 3052 return true; 3053 } 3054 }; 3055 3056 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure { 3057 CMBitMap* _bm; 3058 CSetMarkBitMapClosure _bit_cl; 3059 int _worker_i; 3060 3061 enum SomePrivateConstants { 3062 MSSize = 1000 3063 }; 3064 3065 public: 3066 CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) : 3067 _bm(cm->nextMarkBitMap()), 3068 _bit_cl(cm, MSSize, worker_i), 3069 _worker_i(worker_i) { } 3070 3071 bool doHeapRegion(HeapRegion* hr) { 3072 if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) { 3073 // The current worker has successfully claimed the region. 3074 if (!hr->evacuation_failed()) { 3075 MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start()); 3076 if (!mr.is_empty()) { 3077 bool done = false; 3078 while (!done) { 3079 done = _bm->iterate(&_bit_cl, mr); 3080 } 3081 } 3082 } 3083 } 3084 return false; 3085 } 3086 }; 3087 3088 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure { 3089 jint _claim_value; 3090 3091 public: 3092 SetClaimValuesInCSetHRClosure(jint claim_value) : 3093 _claim_value(claim_value) { } 3094 3095 bool doHeapRegion(HeapRegion* hr) { 3096 hr->set_claim_value(_claim_value); 3097 return false; 3098 } 3099 }; 3100 3101 class G1ParCompleteMarkInCSetTask: public AbstractGangTask { 3102 protected: 3103 G1CollectedHeap* _g1h; 3104 ConcurrentMark* _cm; 3105 3106 public: 3107 G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h, 3108 ConcurrentMark* cm) : 3109 AbstractGangTask("Complete Mark in CSet"), 3110 _g1h(g1h), _cm(cm) { } 3111 3112 void work(int worker_i) { 3113 CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i); 3114 HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i); 3115 _g1h->collection_set_iterate_from(hr, &cmplt); 3116 } 3117 }; 3118 3119 void ConcurrentMark::complete_marking_in_collection_set() { 3120 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3121 3122 if (!g1h->mark_in_progress()) { 3123 g1h->g1_policy()->record_mark_closure_time(0.0); 3124 return; 3125 } 3126 3127 double start = os::elapsedTime(); 3128 G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this); 3129 3130 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 3131 3132 if (G1CollectedHeap::use_parallel_gc_threads()) { 3133 int n_workers = g1h->workers()->active_workers(); 3134 g1h->set_par_threads(n_workers); 3135 g1h->workers()->run_task(&complete_mark_task); 3136 g1h->set_par_threads(0); 3137 } else { 3138 complete_mark_task.work(0); 3139 } 3140 3141 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity"); 3142 3143 // Now reset the claim values in the regions in the collection set. 3144 SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue); 3145 g1h->collection_set_iterate(&set_cv_cl); 3146 3147 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 3148 3149 double end_time = os::elapsedTime(); 3150 double elapsed_time_ms = (end_time - start) * 1000.0; 3151 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 3152 } 3153 3154 // The next two methods deal with the following optimisation. Some 3155 // objects are gray by being marked and located above the finger. If 3156 // they are copied, during an evacuation pause, below the finger then 3157 // the need to be pushed on the stack. The observation is that, if 3158 // there are no regions in the collection set located above the 3159 // finger, then the above cannot happen, hence we do not need to 3160 // explicitly gray any objects when copying them to below the 3161 // finger. The global stack will be scanned to ensure that, if it 3162 // points to objects being copied, it will update their 3163 // location. There is a tricky situation with the gray objects in 3164 // region stack that are being coped, however. See the comment in 3165 // newCSet(). 3166 3167 void ConcurrentMark::newCSet() { 3168 if (!concurrent_marking_in_progress()) { 3169 // nothing to do if marking is not in progress 3170 return; 3171 } 3172 3173 // find what the lowest finger is among the global and local fingers 3174 _min_finger = _finger; 3175 for (int i = 0; i < (int)_max_task_num; ++i) { 3176 CMTask* task = _tasks[i]; 3177 HeapWord* task_finger = task->finger(); 3178 if (task_finger != NULL && task_finger < _min_finger) { 3179 _min_finger = task_finger; 3180 } 3181 } 3182 3183 _should_gray_objects = false; 3184 3185 // This fixes a very subtle and fustrating bug. It might be the case 3186 // that, during en evacuation pause, heap regions that contain 3187 // objects that are gray (by being in regions contained in the 3188 // region stack) are included in the collection set. Since such gray 3189 // objects will be moved, and because it's not easy to redirect 3190 // region stack entries to point to a new location (because objects 3191 // in one region might be scattered to multiple regions after they 3192 // are copied), one option is to ensure that all marked objects 3193 // copied during a pause are pushed on the stack. Notice, however, 3194 // that this problem can only happen when the region stack is not 3195 // empty during an evacuation pause. So, we make the fix a bit less 3196 // conservative and ensure that regions are pushed on the stack, 3197 // irrespective whether all collection set regions are below the 3198 // finger, if the region stack is not empty. This is expected to be 3199 // a rare case, so I don't think it's necessary to be smarted about it. 3200 if (!region_stack_empty() || has_aborted_regions()) { 3201 _should_gray_objects = true; 3202 } 3203 } 3204 3205 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3206 if (!concurrent_marking_in_progress()) return; 3207 3208 HeapWord* region_end = hr->end(); 3209 if (region_end > _min_finger) { 3210 _should_gray_objects = true; 3211 } 3212 } 3213 3214 // Resets the region fields of active CMTasks whose values point 3215 // into the collection set. 3216 void ConcurrentMark::reset_active_task_region_fields_in_cset() { 3217 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW"); 3218 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 3219 3220 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) { 3221 CMTask* task = _tasks[i]; 3222 HeapWord* task_finger = task->finger(); 3223 if (task_finger != NULL) { 3224 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap"); 3225 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger); 3226 if (finger_region->in_collection_set()) { 3227 // The task's current region is in the collection set. 3228 // This region will be evacuated in the current GC and 3229 // the region fields in the task will be stale. 3230 task->giveup_current_region(); 3231 } 3232 } 3233 } 3234 } 3235 3236 // abandon current marking iteration due to a Full GC 3237 void ConcurrentMark::abort() { 3238 // Clear all marks to force marking thread to do nothing 3239 _nextMarkBitMap->clearAll(); 3240 // Empty mark stack 3241 clear_marking_state(); 3242 for (int i = 0; i < (int)_max_task_num; ++i) { 3243 _tasks[i]->clear_region_fields(); 3244 } 3245 _has_aborted = true; 3246 3247 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3248 satb_mq_set.abandon_partial_marking(); 3249 // This can be called either during or outside marking, we'll read 3250 // the expected_active value from the SATB queue set. 3251 satb_mq_set.set_active_all_threads( 3252 false, /* new active value */ 3253 satb_mq_set.is_active() /* expected_active */); 3254 } 3255 3256 static void print_ms_time_info(const char* prefix, const char* name, 3257 NumberSeq& ns) { 3258 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3259 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3260 if (ns.num() > 0) { 3261 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3262 prefix, ns.sd(), ns.maximum()); 3263 } 3264 } 3265 3266 void ConcurrentMark::print_summary_info() { 3267 gclog_or_tty->print_cr(" Concurrent marking:"); 3268 print_ms_time_info(" ", "init marks", _init_times); 3269 print_ms_time_info(" ", "remarks", _remark_times); 3270 { 3271 print_ms_time_info(" ", "final marks", _remark_mark_times); 3272 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3273 3274 } 3275 print_ms_time_info(" ", "cleanups", _cleanup_times); 3276 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3277 _total_counting_time, 3278 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3279 (double)_cleanup_times.num() 3280 : 0.0)); 3281 if (G1ScrubRemSets) { 3282 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3283 _total_rs_scrub_time, 3284 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3285 (double)_cleanup_times.num() 3286 : 0.0)); 3287 } 3288 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3289 (_init_times.sum() + _remark_times.sum() + 3290 _cleanup_times.sum())/1000.0); 3291 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3292 "(%8.2f s marking, %8.2f s counting).", 3293 cmThread()->vtime_accum(), 3294 cmThread()->vtime_mark_accum(), 3295 cmThread()->vtime_count_accum()); 3296 } 3297 3298 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3299 _parallel_workers->print_worker_threads_on(st); 3300 } 3301 3302 // Closures 3303 // XXX: there seems to be a lot of code duplication here; 3304 // should refactor and consolidate the shared code. 3305 3306 // This closure is used to mark refs into the CMS generation in 3307 // the CMS bit map. Called at the first checkpoint. 3308 3309 // We take a break if someone is trying to stop the world. 3310 bool ConcurrentMark::do_yield_check(int worker_i) { 3311 if (should_yield()) { 3312 if (worker_i == 0) { 3313 _g1h->g1_policy()->record_concurrent_pause(); 3314 } 3315 cmThread()->yield(); 3316 if (worker_i == 0) { 3317 _g1h->g1_policy()->record_concurrent_pause_end(); 3318 } 3319 return true; 3320 } else { 3321 return false; 3322 } 3323 } 3324 3325 bool ConcurrentMark::should_yield() { 3326 return cmThread()->should_yield(); 3327 } 3328 3329 bool ConcurrentMark::containing_card_is_marked(void* p) { 3330 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3331 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3332 } 3333 3334 bool ConcurrentMark::containing_cards_are_marked(void* start, 3335 void* last) { 3336 return containing_card_is_marked(start) && 3337 containing_card_is_marked(last); 3338 } 3339 3340 #ifndef PRODUCT 3341 // for debugging purposes 3342 void ConcurrentMark::print_finger() { 3343 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3344 _heap_start, _heap_end, _finger); 3345 for (int i = 0; i < (int) _max_task_num; ++i) { 3346 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3347 } 3348 gclog_or_tty->print_cr(""); 3349 } 3350 #endif 3351 3352 void CMTask::scan_object(oop obj) { 3353 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3354 3355 if (_cm->verbose_high()) { 3356 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3357 _task_id, (void*) obj); 3358 } 3359 3360 size_t obj_size = obj->size(); 3361 _words_scanned += obj_size; 3362 3363 obj->oop_iterate(_cm_oop_closure); 3364 statsOnly( ++_objs_scanned ); 3365 check_limits(); 3366 } 3367 3368 // Closure for iteration over bitmaps 3369 class CMBitMapClosure : public BitMapClosure { 3370 private: 3371 // the bitmap that is being iterated over 3372 CMBitMap* _nextMarkBitMap; 3373 ConcurrentMark* _cm; 3374 CMTask* _task; 3375 // true if we're scanning a heap region claimed by the task (so that 3376 // we move the finger along), false if we're not, i.e. currently when 3377 // scanning a heap region popped from the region stack (so that we 3378 // do not move the task finger along; it'd be a mistake if we did so). 3379 bool _scanning_heap_region; 3380 3381 public: 3382 CMBitMapClosure(CMTask *task, 3383 ConcurrentMark* cm, 3384 CMBitMap* nextMarkBitMap) 3385 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3386 3387 void set_scanning_heap_region(bool scanning_heap_region) { 3388 _scanning_heap_region = scanning_heap_region; 3389 } 3390 3391 bool do_bit(size_t offset) { 3392 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3393 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3394 assert( addr < _cm->finger(), "invariant"); 3395 3396 if (_scanning_heap_region) { 3397 statsOnly( _task->increase_objs_found_on_bitmap() ); 3398 assert(addr >= _task->finger(), "invariant"); 3399 // We move that task's local finger along. 3400 _task->move_finger_to(addr); 3401 } else { 3402 // We move the task's region finger along. 3403 _task->move_region_finger_to(addr); 3404 } 3405 3406 _task->scan_object(oop(addr)); 3407 // we only partially drain the local queue and global stack 3408 _task->drain_local_queue(true); 3409 _task->drain_global_stack(true); 3410 3411 // if the has_aborted flag has been raised, we need to bail out of 3412 // the iteration 3413 return !_task->has_aborted(); 3414 } 3415 }; 3416 3417 // Closure for iterating over objects, currently only used for 3418 // processing SATB buffers. 3419 class CMObjectClosure : public ObjectClosure { 3420 private: 3421 CMTask* _task; 3422 3423 public: 3424 void do_object(oop obj) { 3425 _task->deal_with_reference(obj); 3426 } 3427 3428 CMObjectClosure(CMTask* task) : _task(task) { } 3429 }; 3430 3431 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3432 ConcurrentMark* cm, 3433 CMTask* task) 3434 : _g1h(g1h), _cm(cm), _task(task) { 3435 assert(_ref_processor == NULL, "should be initialized to NULL"); 3436 3437 if (G1UseConcMarkReferenceProcessing) { 3438 _ref_processor = g1h->ref_processor_cm(); 3439 assert(_ref_processor != NULL, "should not be NULL"); 3440 } 3441 } 3442 3443 void CMTask::setup_for_region(HeapRegion* hr) { 3444 // Separated the asserts so that we know which one fires. 3445 assert(hr != NULL, 3446 "claim_region() should have filtered out continues humongous regions"); 3447 assert(!hr->continuesHumongous(), 3448 "claim_region() should have filtered out continues humongous regions"); 3449 3450 if (_cm->verbose_low()) { 3451 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3452 _task_id, hr); 3453 } 3454 3455 _curr_region = hr; 3456 _finger = hr->bottom(); 3457 update_region_limit(); 3458 } 3459 3460 void CMTask::update_region_limit() { 3461 HeapRegion* hr = _curr_region; 3462 HeapWord* bottom = hr->bottom(); 3463 HeapWord* limit = hr->next_top_at_mark_start(); 3464 3465 if (limit == bottom) { 3466 if (_cm->verbose_low()) { 3467 gclog_or_tty->print_cr("[%d] found an empty region " 3468 "["PTR_FORMAT", "PTR_FORMAT")", 3469 _task_id, bottom, limit); 3470 } 3471 // The region was collected underneath our feet. 3472 // We set the finger to bottom to ensure that the bitmap 3473 // iteration that will follow this will not do anything. 3474 // (this is not a condition that holds when we set the region up, 3475 // as the region is not supposed to be empty in the first place) 3476 _finger = bottom; 3477 } else if (limit >= _region_limit) { 3478 assert(limit >= _finger, "peace of mind"); 3479 } else { 3480 assert(limit < _region_limit, "only way to get here"); 3481 // This can happen under some pretty unusual circumstances. An 3482 // evacuation pause empties the region underneath our feet (NTAMS 3483 // at bottom). We then do some allocation in the region (NTAMS 3484 // stays at bottom), followed by the region being used as a GC 3485 // alloc region (NTAMS will move to top() and the objects 3486 // originally below it will be grayed). All objects now marked in 3487 // the region are explicitly grayed, if below the global finger, 3488 // and we do not need in fact to scan anything else. So, we simply 3489 // set _finger to be limit to ensure that the bitmap iteration 3490 // doesn't do anything. 3491 _finger = limit; 3492 } 3493 3494 _region_limit = limit; 3495 } 3496 3497 void CMTask::giveup_current_region() { 3498 assert(_curr_region != NULL, "invariant"); 3499 if (_cm->verbose_low()) { 3500 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3501 _task_id, _curr_region); 3502 } 3503 clear_region_fields(); 3504 } 3505 3506 void CMTask::clear_region_fields() { 3507 // Values for these three fields that indicate that we're not 3508 // holding on to a region. 3509 _curr_region = NULL; 3510 _finger = NULL; 3511 _region_limit = NULL; 3512 3513 _region_finger = NULL; 3514 } 3515 3516 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3517 if (cm_oop_closure == NULL) { 3518 assert(_cm_oop_closure != NULL, "invariant"); 3519 } else { 3520 assert(_cm_oop_closure == NULL, "invariant"); 3521 } 3522 _cm_oop_closure = cm_oop_closure; 3523 } 3524 3525 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3526 guarantee(nextMarkBitMap != NULL, "invariant"); 3527 3528 if (_cm->verbose_low()) { 3529 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3530 } 3531 3532 _nextMarkBitMap = nextMarkBitMap; 3533 clear_region_fields(); 3534 assert(_aborted_region.is_empty(), "should have been cleared"); 3535 3536 _calls = 0; 3537 _elapsed_time_ms = 0.0; 3538 _termination_time_ms = 0.0; 3539 _termination_start_time_ms = 0.0; 3540 3541 #if _MARKING_STATS_ 3542 _local_pushes = 0; 3543 _local_pops = 0; 3544 _local_max_size = 0; 3545 _objs_scanned = 0; 3546 _global_pushes = 0; 3547 _global_pops = 0; 3548 _global_max_size = 0; 3549 _global_transfers_to = 0; 3550 _global_transfers_from = 0; 3551 _region_stack_pops = 0; 3552 _regions_claimed = 0; 3553 _objs_found_on_bitmap = 0; 3554 _satb_buffers_processed = 0; 3555 _steal_attempts = 0; 3556 _steals = 0; 3557 _aborted = 0; 3558 _aborted_overflow = 0; 3559 _aborted_cm_aborted = 0; 3560 _aborted_yield = 0; 3561 _aborted_timed_out = 0; 3562 _aborted_satb = 0; 3563 _aborted_termination = 0; 3564 #endif // _MARKING_STATS_ 3565 } 3566 3567 bool CMTask::should_exit_termination() { 3568 regular_clock_call(); 3569 // This is called when we are in the termination protocol. We should 3570 // quit if, for some reason, this task wants to abort or the global 3571 // stack is not empty (this means that we can get work from it). 3572 return !_cm->mark_stack_empty() || has_aborted(); 3573 } 3574 3575 void CMTask::reached_limit() { 3576 assert(_words_scanned >= _words_scanned_limit || 3577 _refs_reached >= _refs_reached_limit , 3578 "shouldn't have been called otherwise"); 3579 regular_clock_call(); 3580 } 3581 3582 void CMTask::regular_clock_call() { 3583 if (has_aborted()) return; 3584 3585 // First, we need to recalculate the words scanned and refs reached 3586 // limits for the next clock call. 3587 recalculate_limits(); 3588 3589 // During the regular clock call we do the following 3590 3591 // (1) If an overflow has been flagged, then we abort. 3592 if (_cm->has_overflown()) { 3593 set_has_aborted(); 3594 return; 3595 } 3596 3597 // If we are not concurrent (i.e. we're doing remark) we don't need 3598 // to check anything else. The other steps are only needed during 3599 // the concurrent marking phase. 3600 if (!concurrent()) return; 3601 3602 // (2) If marking has been aborted for Full GC, then we also abort. 3603 if (_cm->has_aborted()) { 3604 set_has_aborted(); 3605 statsOnly( ++_aborted_cm_aborted ); 3606 return; 3607 } 3608 3609 double curr_time_ms = os::elapsedVTime() * 1000.0; 3610 3611 // (3) If marking stats are enabled, then we update the step history. 3612 #if _MARKING_STATS_ 3613 if (_words_scanned >= _words_scanned_limit) { 3614 ++_clock_due_to_scanning; 3615 } 3616 if (_refs_reached >= _refs_reached_limit) { 3617 ++_clock_due_to_marking; 3618 } 3619 3620 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3621 _interval_start_time_ms = curr_time_ms; 3622 _all_clock_intervals_ms.add(last_interval_ms); 3623 3624 if (_cm->verbose_medium()) { 3625 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3626 "scanned = %d%s, refs reached = %d%s", 3627 _task_id, last_interval_ms, 3628 _words_scanned, 3629 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3630 _refs_reached, 3631 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3632 } 3633 #endif // _MARKING_STATS_ 3634 3635 // (4) We check whether we should yield. If we have to, then we abort. 3636 if (_cm->should_yield()) { 3637 // We should yield. To do this we abort the task. The caller is 3638 // responsible for yielding. 3639 set_has_aborted(); 3640 statsOnly( ++_aborted_yield ); 3641 return; 3642 } 3643 3644 // (5) We check whether we've reached our time quota. If we have, 3645 // then we abort. 3646 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3647 if (elapsed_time_ms > _time_target_ms) { 3648 set_has_aborted(); 3649 _has_timed_out = true; 3650 statsOnly( ++_aborted_timed_out ); 3651 return; 3652 } 3653 3654 // (6) Finally, we check whether there are enough completed STAB 3655 // buffers available for processing. If there are, we abort. 3656 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3657 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3658 if (_cm->verbose_low()) { 3659 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3660 _task_id); 3661 } 3662 // we do need to process SATB buffers, we'll abort and restart 3663 // the marking task to do so 3664 set_has_aborted(); 3665 statsOnly( ++_aborted_satb ); 3666 return; 3667 } 3668 } 3669 3670 void CMTask::recalculate_limits() { 3671 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3672 _words_scanned_limit = _real_words_scanned_limit; 3673 3674 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3675 _refs_reached_limit = _real_refs_reached_limit; 3676 } 3677 3678 void CMTask::decrease_limits() { 3679 // This is called when we believe that we're going to do an infrequent 3680 // operation which will increase the per byte scanned cost (i.e. move 3681 // entries to/from the global stack). It basically tries to decrease the 3682 // scanning limit so that the clock is called earlier. 3683 3684 if (_cm->verbose_medium()) { 3685 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3686 } 3687 3688 _words_scanned_limit = _real_words_scanned_limit - 3689 3 * words_scanned_period / 4; 3690 _refs_reached_limit = _real_refs_reached_limit - 3691 3 * refs_reached_period / 4; 3692 } 3693 3694 void CMTask::move_entries_to_global_stack() { 3695 // local array where we'll store the entries that will be popped 3696 // from the local queue 3697 oop buffer[global_stack_transfer_size]; 3698 3699 int n = 0; 3700 oop obj; 3701 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3702 buffer[n] = obj; 3703 ++n; 3704 } 3705 3706 if (n > 0) { 3707 // we popped at least one entry from the local queue 3708 3709 statsOnly( ++_global_transfers_to; _local_pops += n ); 3710 3711 if (!_cm->mark_stack_push(buffer, n)) { 3712 if (_cm->verbose_low()) { 3713 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3714 _task_id); 3715 } 3716 set_has_aborted(); 3717 } else { 3718 // the transfer was successful 3719 3720 if (_cm->verbose_medium()) { 3721 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3722 _task_id, n); 3723 } 3724 statsOnly( int tmp_size = _cm->mark_stack_size(); 3725 if (tmp_size > _global_max_size) { 3726 _global_max_size = tmp_size; 3727 } 3728 _global_pushes += n ); 3729 } 3730 } 3731 3732 // this operation was quite expensive, so decrease the limits 3733 decrease_limits(); 3734 } 3735 3736 void CMTask::get_entries_from_global_stack() { 3737 // local array where we'll store the entries that will be popped 3738 // from the global stack. 3739 oop buffer[global_stack_transfer_size]; 3740 int n; 3741 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3742 assert(n <= global_stack_transfer_size, 3743 "we should not pop more than the given limit"); 3744 if (n > 0) { 3745 // yes, we did actually pop at least one entry 3746 3747 statsOnly( ++_global_transfers_from; _global_pops += n ); 3748 if (_cm->verbose_medium()) { 3749 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3750 _task_id, n); 3751 } 3752 for (int i = 0; i < n; ++i) { 3753 bool success = _task_queue->push(buffer[i]); 3754 // We only call this when the local queue is empty or under a 3755 // given target limit. So, we do not expect this push to fail. 3756 assert(success, "invariant"); 3757 } 3758 3759 statsOnly( int tmp_size = _task_queue->size(); 3760 if (tmp_size > _local_max_size) { 3761 _local_max_size = tmp_size; 3762 } 3763 _local_pushes += n ); 3764 } 3765 3766 // this operation was quite expensive, so decrease the limits 3767 decrease_limits(); 3768 } 3769 3770 void CMTask::drain_local_queue(bool partially) { 3771 if (has_aborted()) return; 3772 3773 // Decide what the target size is, depending whether we're going to 3774 // drain it partially (so that other tasks can steal if they run out 3775 // of things to do) or totally (at the very end). 3776 size_t target_size; 3777 if (partially) { 3778 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3779 } else { 3780 target_size = 0; 3781 } 3782 3783 if (_task_queue->size() > target_size) { 3784 if (_cm->verbose_high()) { 3785 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3786 _task_id, target_size); 3787 } 3788 3789 oop obj; 3790 bool ret = _task_queue->pop_local(obj); 3791 while (ret) { 3792 statsOnly( ++_local_pops ); 3793 3794 if (_cm->verbose_high()) { 3795 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3796 (void*) obj); 3797 } 3798 3799 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3800 assert(!_g1h->is_on_master_free_list( 3801 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3802 3803 scan_object(obj); 3804 3805 if (_task_queue->size() <= target_size || has_aborted()) { 3806 ret = false; 3807 } else { 3808 ret = _task_queue->pop_local(obj); 3809 } 3810 } 3811 3812 if (_cm->verbose_high()) { 3813 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3814 _task_id, _task_queue->size()); 3815 } 3816 } 3817 } 3818 3819 void CMTask::drain_global_stack(bool partially) { 3820 if (has_aborted()) return; 3821 3822 // We have a policy to drain the local queue before we attempt to 3823 // drain the global stack. 3824 assert(partially || _task_queue->size() == 0, "invariant"); 3825 3826 // Decide what the target size is, depending whether we're going to 3827 // drain it partially (so that other tasks can steal if they run out 3828 // of things to do) or totally (at the very end). Notice that, 3829 // because we move entries from the global stack in chunks or 3830 // because another task might be doing the same, we might in fact 3831 // drop below the target. But, this is not a problem. 3832 size_t target_size; 3833 if (partially) { 3834 target_size = _cm->partial_mark_stack_size_target(); 3835 } else { 3836 target_size = 0; 3837 } 3838 3839 if (_cm->mark_stack_size() > target_size) { 3840 if (_cm->verbose_low()) { 3841 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3842 _task_id, target_size); 3843 } 3844 3845 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3846 get_entries_from_global_stack(); 3847 drain_local_queue(partially); 3848 } 3849 3850 if (_cm->verbose_low()) { 3851 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3852 _task_id, _cm->mark_stack_size()); 3853 } 3854 } 3855 } 3856 3857 // SATB Queue has several assumptions on whether to call the par or 3858 // non-par versions of the methods. this is why some of the code is 3859 // replicated. We should really get rid of the single-threaded version 3860 // of the code to simplify things. 3861 void CMTask::drain_satb_buffers() { 3862 if (has_aborted()) return; 3863 3864 // We set this so that the regular clock knows that we're in the 3865 // middle of draining buffers and doesn't set the abort flag when it 3866 // notices that SATB buffers are available for draining. It'd be 3867 // very counter productive if it did that. :-) 3868 _draining_satb_buffers = true; 3869 3870 CMObjectClosure oc(this); 3871 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3872 if (G1CollectedHeap::use_parallel_gc_threads()) { 3873 satb_mq_set.set_par_closure(_task_id, &oc); 3874 } else { 3875 satb_mq_set.set_closure(&oc); 3876 } 3877 3878 // This keeps claiming and applying the closure to completed buffers 3879 // until we run out of buffers or we need to abort. 3880 if (G1CollectedHeap::use_parallel_gc_threads()) { 3881 while (!has_aborted() && 3882 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3883 if (_cm->verbose_medium()) { 3884 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3885 } 3886 statsOnly( ++_satb_buffers_processed ); 3887 regular_clock_call(); 3888 } 3889 } else { 3890 while (!has_aborted() && 3891 satb_mq_set.apply_closure_to_completed_buffer()) { 3892 if (_cm->verbose_medium()) { 3893 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3894 } 3895 statsOnly( ++_satb_buffers_processed ); 3896 regular_clock_call(); 3897 } 3898 } 3899 3900 if (!concurrent() && !has_aborted()) { 3901 // We should only do this during remark. 3902 if (G1CollectedHeap::use_parallel_gc_threads()) { 3903 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3904 } else { 3905 satb_mq_set.iterate_closure_all_threads(); 3906 } 3907 } 3908 3909 _draining_satb_buffers = false; 3910 3911 assert(has_aborted() || 3912 concurrent() || 3913 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3914 3915 if (G1CollectedHeap::use_parallel_gc_threads()) { 3916 satb_mq_set.set_par_closure(_task_id, NULL); 3917 } else { 3918 satb_mq_set.set_closure(NULL); 3919 } 3920 3921 // again, this was a potentially expensive operation, decrease the 3922 // limits to get the regular clock call early 3923 decrease_limits(); 3924 } 3925 3926 void CMTask::drain_region_stack(BitMapClosure* bc) { 3927 if (has_aborted()) return; 3928 3929 assert(_region_finger == NULL, 3930 "it should be NULL when we're not scanning a region"); 3931 3932 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3933 if (_cm->verbose_low()) { 3934 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3935 _task_id, _cm->region_stack_size()); 3936 } 3937 3938 MemRegion mr; 3939 3940 if (!_aborted_region.is_empty()) { 3941 mr = _aborted_region; 3942 _aborted_region = MemRegion(); 3943 3944 if (_cm->verbose_low()) { 3945 gclog_or_tty->print_cr("[%d] scanning aborted region " 3946 "[ " PTR_FORMAT ", " PTR_FORMAT " )", 3947 _task_id, mr.start(), mr.end()); 3948 } 3949 } else { 3950 mr = _cm->region_stack_pop_lock_free(); 3951 // it returns MemRegion() if the pop fails 3952 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3953 } 3954 3955 while (mr.start() != NULL) { 3956 if (_cm->verbose_medium()) { 3957 gclog_or_tty->print_cr("[%d] we are scanning region " 3958 "["PTR_FORMAT", "PTR_FORMAT")", 3959 _task_id, mr.start(), mr.end()); 3960 } 3961 3962 assert(mr.end() <= _cm->finger(), 3963 "otherwise the region shouldn't be on the stack"); 3964 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3965 if (_nextMarkBitMap->iterate(bc, mr)) { 3966 assert(!has_aborted(), 3967 "cannot abort the task without aborting the bitmap iteration"); 3968 3969 // We finished iterating over the region without aborting. 3970 regular_clock_call(); 3971 if (has_aborted()) { 3972 mr = MemRegion(); 3973 } else { 3974 mr = _cm->region_stack_pop_lock_free(); 3975 // it returns MemRegion() if the pop fails 3976 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3977 } 3978 } else { 3979 assert(has_aborted(), "currently the only way to do so"); 3980 3981 // The only way to abort the bitmap iteration is to return 3982 // false from the do_bit() method. However, inside the 3983 // do_bit() method we move the _region_finger to point to the 3984 // object currently being looked at. So, if we bail out, we 3985 // have definitely set _region_finger to something non-null. 3986 assert(_region_finger != NULL, "invariant"); 3987 3988 // Make sure that any previously aborted region has been 3989 // cleared. 3990 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3991 3992 // The iteration was actually aborted. So now _region_finger 3993 // points to the address of the object we last scanned. If we 3994 // leave it there, when we restart this task, we will rescan 3995 // the object. It is easy to avoid this. We move the finger by 3996 // enough to point to the next possible object header (the 3997 // bitmap knows by how much we need to move it as it knows its 3998 // granularity). 3999 MemRegion newRegion = 4000 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 4001 4002 if (!newRegion.is_empty()) { 4003 if (_cm->verbose_low()) { 4004 gclog_or_tty->print_cr("[%d] recording unscanned region" 4005 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 4006 _task_id, 4007 newRegion.start(), newRegion.end()); 4008 } 4009 // Now record the part of the region we didn't scan to 4010 // make sure this task scans it later. 4011 _aborted_region = newRegion; 4012 } 4013 // break from while 4014 mr = MemRegion(); 4015 } 4016 _region_finger = NULL; 4017 } 4018 4019 if (_cm->verbose_low()) { 4020 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 4021 _task_id, _cm->region_stack_size()); 4022 } 4023 } 4024 } 4025 4026 void CMTask::print_stats() { 4027 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 4028 _task_id, _calls); 4029 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4030 _elapsed_time_ms, _termination_time_ms); 4031 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4032 _step_times_ms.num(), _step_times_ms.avg(), 4033 _step_times_ms.sd()); 4034 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4035 _step_times_ms.maximum(), _step_times_ms.sum()); 4036 4037 #if _MARKING_STATS_ 4038 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4039 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4040 _all_clock_intervals_ms.sd()); 4041 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4042 _all_clock_intervals_ms.maximum(), 4043 _all_clock_intervals_ms.sum()); 4044 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4045 _clock_due_to_scanning, _clock_due_to_marking); 4046 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4047 _objs_scanned, _objs_found_on_bitmap); 4048 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4049 _local_pushes, _local_pops, _local_max_size); 4050 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4051 _global_pushes, _global_pops, _global_max_size); 4052 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4053 _global_transfers_to,_global_transfers_from); 4054 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 4055 _regions_claimed, _region_stack_pops); 4056 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4057 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4058 _steal_attempts, _steals); 4059 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4060 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4061 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4062 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4063 _aborted_timed_out, _aborted_satb, _aborted_termination); 4064 #endif // _MARKING_STATS_ 4065 } 4066 4067 /***************************************************************************** 4068 4069 The do_marking_step(time_target_ms) method is the building block 4070 of the parallel marking framework. It can be called in parallel 4071 with other invocations of do_marking_step() on different tasks 4072 (but only one per task, obviously) and concurrently with the 4073 mutator threads, or during remark, hence it eliminates the need 4074 for two versions of the code. When called during remark, it will 4075 pick up from where the task left off during the concurrent marking 4076 phase. Interestingly, tasks are also claimable during evacuation 4077 pauses too, since do_marking_step() ensures that it aborts before 4078 it needs to yield. 4079 4080 The data structures that is uses to do marking work are the 4081 following: 4082 4083 (1) Marking Bitmap. If there are gray objects that appear only 4084 on the bitmap (this happens either when dealing with an overflow 4085 or when the initial marking phase has simply marked the roots 4086 and didn't push them on the stack), then tasks claim heap 4087 regions whose bitmap they then scan to find gray objects. A 4088 global finger indicates where the end of the last claimed region 4089 is. A local finger indicates how far into the region a task has 4090 scanned. The two fingers are used to determine how to gray an 4091 object (i.e. whether simply marking it is OK, as it will be 4092 visited by a task in the future, or whether it needs to be also 4093 pushed on a stack). 4094 4095 (2) Local Queue. The local queue of the task which is accessed 4096 reasonably efficiently by the task. Other tasks can steal from 4097 it when they run out of work. Throughout the marking phase, a 4098 task attempts to keep its local queue short but not totally 4099 empty, so that entries are available for stealing by other 4100 tasks. Only when there is no more work, a task will totally 4101 drain its local queue. 4102 4103 (3) Global Mark Stack. This handles local queue overflow. During 4104 marking only sets of entries are moved between it and the local 4105 queues, as access to it requires a mutex and more fine-grain 4106 interaction with it which might cause contention. If it 4107 overflows, then the marking phase should restart and iterate 4108 over the bitmap to identify gray objects. Throughout the marking 4109 phase, tasks attempt to keep the global mark stack at a small 4110 length but not totally empty, so that entries are available for 4111 popping by other tasks. Only when there is no more work, tasks 4112 will totally drain the global mark stack. 4113 4114 (4) Global Region Stack. Entries on it correspond to areas of 4115 the bitmap that need to be scanned since they contain gray 4116 objects. Pushes on the region stack only happen during 4117 evacuation pauses and typically correspond to areas covered by 4118 GC LABS. If it overflows, then the marking phase should restart 4119 and iterate over the bitmap to identify gray objects. Tasks will 4120 try to totally drain the region stack as soon as possible. 4121 4122 (5) SATB Buffer Queue. This is where completed SATB buffers are 4123 made available. Buffers are regularly removed from this queue 4124 and scanned for roots, so that the queue doesn't get too 4125 long. During remark, all completed buffers are processed, as 4126 well as the filled in parts of any uncompleted buffers. 4127 4128 The do_marking_step() method tries to abort when the time target 4129 has been reached. There are a few other cases when the 4130 do_marking_step() method also aborts: 4131 4132 (1) When the marking phase has been aborted (after a Full GC). 4133 4134 (2) When a global overflow (either on the global stack or the 4135 region stack) has been triggered. Before the task aborts, it 4136 will actually sync up with the other tasks to ensure that all 4137 the marking data structures (local queues, stacks, fingers etc.) 4138 are re-initialised so that when do_marking_step() completes, 4139 the marking phase can immediately restart. 4140 4141 (3) When enough completed SATB buffers are available. The 4142 do_marking_step() method only tries to drain SATB buffers right 4143 at the beginning. So, if enough buffers are available, the 4144 marking step aborts and the SATB buffers are processed at 4145 the beginning of the next invocation. 4146 4147 (4) To yield. when we have to yield then we abort and yield 4148 right at the end of do_marking_step(). This saves us from a lot 4149 of hassle as, by yielding we might allow a Full GC. If this 4150 happens then objects will be compacted underneath our feet, the 4151 heap might shrink, etc. We save checking for this by just 4152 aborting and doing the yield right at the end. 4153 4154 From the above it follows that the do_marking_step() method should 4155 be called in a loop (or, otherwise, regularly) until it completes. 4156 4157 If a marking step completes without its has_aborted() flag being 4158 true, it means it has completed the current marking phase (and 4159 also all other marking tasks have done so and have all synced up). 4160 4161 A method called regular_clock_call() is invoked "regularly" (in 4162 sub ms intervals) throughout marking. It is this clock method that 4163 checks all the abort conditions which were mentioned above and 4164 decides when the task should abort. A work-based scheme is used to 4165 trigger this clock method: when the number of object words the 4166 marking phase has scanned or the number of references the marking 4167 phase has visited reach a given limit. Additional invocations to 4168 the method clock have been planted in a few other strategic places 4169 too. The initial reason for the clock method was to avoid calling 4170 vtime too regularly, as it is quite expensive. So, once it was in 4171 place, it was natural to piggy-back all the other conditions on it 4172 too and not constantly check them throughout the code. 4173 4174 *****************************************************************************/ 4175 4176 void CMTask::do_marking_step(double time_target_ms, 4177 bool do_stealing, 4178 bool do_termination) { 4179 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4180 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4181 4182 assert(concurrent() || _cm->region_stack_empty(), 4183 "the region stack should have been cleared before remark"); 4184 assert(concurrent() || !_cm->has_aborted_regions(), 4185 "aborted regions should have been cleared before remark"); 4186 assert(_region_finger == NULL, 4187 "this should be non-null only when a region is being scanned"); 4188 4189 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4190 assert(_task_queues != NULL, "invariant"); 4191 assert(_task_queue != NULL, "invariant"); 4192 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4193 4194 assert(!_claimed, 4195 "only one thread should claim this task at any one time"); 4196 4197 // OK, this doesn't safeguard again all possible scenarios, as it is 4198 // possible for two threads to set the _claimed flag at the same 4199 // time. But it is only for debugging purposes anyway and it will 4200 // catch most problems. 4201 _claimed = true; 4202 4203 _start_time_ms = os::elapsedVTime() * 1000.0; 4204 statsOnly( _interval_start_time_ms = _start_time_ms ); 4205 4206 double diff_prediction_ms = 4207 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4208 _time_target_ms = time_target_ms - diff_prediction_ms; 4209 4210 // set up the variables that are used in the work-based scheme to 4211 // call the regular clock method 4212 _words_scanned = 0; 4213 _refs_reached = 0; 4214 recalculate_limits(); 4215 4216 // clear all flags 4217 clear_has_aborted(); 4218 _has_timed_out = false; 4219 _draining_satb_buffers = false; 4220 4221 ++_calls; 4222 4223 if (_cm->verbose_low()) { 4224 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4225 "target = %1.2lfms >>>>>>>>>>", 4226 _task_id, _calls, _time_target_ms); 4227 } 4228 4229 // Set up the bitmap and oop closures. Anything that uses them is 4230 // eventually called from this method, so it is OK to allocate these 4231 // statically. 4232 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4233 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4234 set_cm_oop_closure(&cm_oop_closure); 4235 4236 if (_cm->has_overflown()) { 4237 // This can happen if the region stack or the mark stack overflows 4238 // during a GC pause and this task, after a yield point, 4239 // restarts. We have to abort as we need to get into the overflow 4240 // protocol which happens right at the end of this task. 4241 set_has_aborted(); 4242 } 4243 4244 // First drain any available SATB buffers. After this, we will not 4245 // look at SATB buffers before the next invocation of this method. 4246 // If enough completed SATB buffers are queued up, the regular clock 4247 // will abort this task so that it restarts. 4248 drain_satb_buffers(); 4249 // ...then partially drain the local queue and the global stack 4250 drain_local_queue(true); 4251 drain_global_stack(true); 4252 4253 // Then totally drain the region stack. We will not look at 4254 // it again before the next invocation of this method. Entries on 4255 // the region stack are only added during evacuation pauses, for 4256 // which we have to yield. When we do, we abort the task anyway so 4257 // it will look at the region stack again when it restarts. 4258 bitmap_closure.set_scanning_heap_region(false); 4259 drain_region_stack(&bitmap_closure); 4260 // ...then partially drain the local queue and the global stack 4261 drain_local_queue(true); 4262 drain_global_stack(true); 4263 4264 do { 4265 if (!has_aborted() && _curr_region != NULL) { 4266 // This means that we're already holding on to a region. 4267 assert(_finger != NULL, "if region is not NULL, then the finger " 4268 "should not be NULL either"); 4269 4270 // We might have restarted this task after an evacuation pause 4271 // which might have evacuated the region we're holding on to 4272 // underneath our feet. Let's read its limit again to make sure 4273 // that we do not iterate over a region of the heap that 4274 // contains garbage (update_region_limit() will also move 4275 // _finger to the start of the region if it is found empty). 4276 update_region_limit(); 4277 // We will start from _finger not from the start of the region, 4278 // as we might be restarting this task after aborting half-way 4279 // through scanning this region. In this case, _finger points to 4280 // the address where we last found a marked object. If this is a 4281 // fresh region, _finger points to start(). 4282 MemRegion mr = MemRegion(_finger, _region_limit); 4283 4284 if (_cm->verbose_low()) { 4285 gclog_or_tty->print_cr("[%d] we're scanning part " 4286 "["PTR_FORMAT", "PTR_FORMAT") " 4287 "of region "PTR_FORMAT, 4288 _task_id, _finger, _region_limit, _curr_region); 4289 } 4290 4291 // Let's iterate over the bitmap of the part of the 4292 // region that is left. 4293 bitmap_closure.set_scanning_heap_region(true); 4294 if (mr.is_empty() || 4295 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4296 // We successfully completed iterating over the region. Now, 4297 // let's give up the region. 4298 giveup_current_region(); 4299 regular_clock_call(); 4300 } else { 4301 assert(has_aborted(), "currently the only way to do so"); 4302 // The only way to abort the bitmap iteration is to return 4303 // false from the do_bit() method. However, inside the 4304 // do_bit() method we move the _finger to point to the 4305 // object currently being looked at. So, if we bail out, we 4306 // have definitely set _finger to something non-null. 4307 assert(_finger != NULL, "invariant"); 4308 4309 // Region iteration was actually aborted. So now _finger 4310 // points to the address of the object we last scanned. If we 4311 // leave it there, when we restart this task, we will rescan 4312 // the object. It is easy to avoid this. We move the finger by 4313 // enough to point to the next possible object header (the 4314 // bitmap knows by how much we need to move it as it knows its 4315 // granularity). 4316 assert(_finger < _region_limit, "invariant"); 4317 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4318 // Check if bitmap iteration was aborted while scanning the last object 4319 if (new_finger >= _region_limit) { 4320 giveup_current_region(); 4321 } else { 4322 move_finger_to(new_finger); 4323 } 4324 } 4325 } 4326 // At this point we have either completed iterating over the 4327 // region we were holding on to, or we have aborted. 4328 4329 // We then partially drain the local queue and the global stack. 4330 // (Do we really need this?) 4331 drain_local_queue(true); 4332 drain_global_stack(true); 4333 4334 // Read the note on the claim_region() method on why it might 4335 // return NULL with potentially more regions available for 4336 // claiming and why we have to check out_of_regions() to determine 4337 // whether we're done or not. 4338 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4339 // We are going to try to claim a new region. We should have 4340 // given up on the previous one. 4341 // Separated the asserts so that we know which one fires. 4342 assert(_curr_region == NULL, "invariant"); 4343 assert(_finger == NULL, "invariant"); 4344 assert(_region_limit == NULL, "invariant"); 4345 if (_cm->verbose_low()) { 4346 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4347 } 4348 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4349 if (claimed_region != NULL) { 4350 // Yes, we managed to claim one 4351 statsOnly( ++_regions_claimed ); 4352 4353 if (_cm->verbose_low()) { 4354 gclog_or_tty->print_cr("[%d] we successfully claimed " 4355 "region "PTR_FORMAT, 4356 _task_id, claimed_region); 4357 } 4358 4359 setup_for_region(claimed_region); 4360 assert(_curr_region == claimed_region, "invariant"); 4361 } 4362 // It is important to call the regular clock here. It might take 4363 // a while to claim a region if, for example, we hit a large 4364 // block of empty regions. So we need to call the regular clock 4365 // method once round the loop to make sure it's called 4366 // frequently enough. 4367 regular_clock_call(); 4368 } 4369 4370 if (!has_aborted() && _curr_region == NULL) { 4371 assert(_cm->out_of_regions(), 4372 "at this point we should be out of regions"); 4373 } 4374 } while ( _curr_region != NULL && !has_aborted()); 4375 4376 if (!has_aborted()) { 4377 // We cannot check whether the global stack is empty, since other 4378 // tasks might be pushing objects to it concurrently. We also cannot 4379 // check if the region stack is empty because if a thread is aborting 4380 // it can push a partially done region back. 4381 assert(_cm->out_of_regions(), 4382 "at this point we should be out of regions"); 4383 4384 if (_cm->verbose_low()) { 4385 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4386 } 4387 4388 // Try to reduce the number of available SATB buffers so that 4389 // remark has less work to do. 4390 drain_satb_buffers(); 4391 } 4392 4393 // Since we've done everything else, we can now totally drain the 4394 // local queue and global stack. 4395 drain_local_queue(false); 4396 drain_global_stack(false); 4397 4398 // Attempt at work stealing from other task's queues. 4399 if (do_stealing && !has_aborted()) { 4400 // We have not aborted. This means that we have finished all that 4401 // we could. Let's try to do some stealing... 4402 4403 // We cannot check whether the global stack is empty, since other 4404 // tasks might be pushing objects to it concurrently. We also cannot 4405 // check if the region stack is empty because if a thread is aborting 4406 // it can push a partially done region back. 4407 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4408 "only way to reach here"); 4409 4410 if (_cm->verbose_low()) { 4411 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4412 } 4413 4414 while (!has_aborted()) { 4415 oop obj; 4416 statsOnly( ++_steal_attempts ); 4417 4418 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4419 if (_cm->verbose_medium()) { 4420 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4421 _task_id, (void*) obj); 4422 } 4423 4424 statsOnly( ++_steals ); 4425 4426 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4427 "any stolen object should be marked"); 4428 scan_object(obj); 4429 4430 // And since we're towards the end, let's totally drain the 4431 // local queue and global stack. 4432 drain_local_queue(false); 4433 drain_global_stack(false); 4434 } else { 4435 break; 4436 } 4437 } 4438 } 4439 4440 // If we are about to wrap up and go into termination, check if we 4441 // should raise the overflow flag. 4442 if (do_termination && !has_aborted()) { 4443 if (_cm->force_overflow()->should_force()) { 4444 _cm->set_has_overflown(); 4445 regular_clock_call(); 4446 } 4447 } 4448 4449 // We still haven't aborted. Now, let's try to get into the 4450 // termination protocol. 4451 if (do_termination && !has_aborted()) { 4452 // We cannot check whether the global stack is empty, since other 4453 // tasks might be concurrently pushing objects on it. We also cannot 4454 // check if the region stack is empty because if a thread is aborting 4455 // it can push a partially done region back. 4456 // Separated the asserts so that we know which one fires. 4457 assert(_cm->out_of_regions(), "only way to reach here"); 4458 assert(_task_queue->size() == 0, "only way to reach here"); 4459 4460 if (_cm->verbose_low()) { 4461 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4462 } 4463 4464 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4465 // The CMTask class also extends the TerminatorTerminator class, 4466 // hence its should_exit_termination() method will also decide 4467 // whether to exit the termination protocol or not. 4468 bool finished = _cm->terminator()->offer_termination(this); 4469 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4470 _termination_time_ms += 4471 termination_end_time_ms - _termination_start_time_ms; 4472 4473 if (finished) { 4474 // We're all done. 4475 4476 if (_task_id == 0) { 4477 // let's allow task 0 to do this 4478 if (concurrent()) { 4479 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4480 // we need to set this to false before the next 4481 // safepoint. This way we ensure that the marking phase 4482 // doesn't observe any more heap expansions. 4483 _cm->clear_concurrent_marking_in_progress(); 4484 } 4485 } 4486 4487 // We can now guarantee that the global stack is empty, since 4488 // all other tasks have finished. We separated the guarantees so 4489 // that, if a condition is false, we can immediately find out 4490 // which one. 4491 guarantee(_cm->out_of_regions(), "only way to reach here"); 4492 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4493 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4494 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4495 guarantee(_task_queue->size() == 0, "only way to reach here"); 4496 guarantee(!_cm->has_overflown(), "only way to reach here"); 4497 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4498 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4499 4500 if (_cm->verbose_low()) { 4501 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4502 } 4503 } else { 4504 // Apparently there's more work to do. Let's abort this task. It 4505 // will restart it and we can hopefully find more things to do. 4506 4507 if (_cm->verbose_low()) { 4508 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4509 _task_id); 4510 } 4511 4512 set_has_aborted(); 4513 statsOnly( ++_aborted_termination ); 4514 } 4515 } 4516 4517 // Mainly for debugging purposes to make sure that a pointer to the 4518 // closure which was statically allocated in this frame doesn't 4519 // escape it by accident. 4520 set_cm_oop_closure(NULL); 4521 double end_time_ms = os::elapsedVTime() * 1000.0; 4522 double elapsed_time_ms = end_time_ms - _start_time_ms; 4523 // Update the step history. 4524 _step_times_ms.add(elapsed_time_ms); 4525 4526 if (has_aborted()) { 4527 // The task was aborted for some reason. 4528 4529 statsOnly( ++_aborted ); 4530 4531 if (_has_timed_out) { 4532 double diff_ms = elapsed_time_ms - _time_target_ms; 4533 // Keep statistics of how well we did with respect to hitting 4534 // our target only if we actually timed out (if we aborted for 4535 // other reasons, then the results might get skewed). 4536 _marking_step_diffs_ms.add(diff_ms); 4537 } 4538 4539 if (_cm->has_overflown()) { 4540 // This is the interesting one. We aborted because a global 4541 // overflow was raised. This means we have to restart the 4542 // marking phase and start iterating over regions. However, in 4543 // order to do this we have to make sure that all tasks stop 4544 // what they are doing and re-initialise in a safe manner. We 4545 // will achieve this with the use of two barrier sync points. 4546 4547 if (_cm->verbose_low()) { 4548 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4549 } 4550 4551 _cm->enter_first_sync_barrier(_task_id); 4552 // When we exit this sync barrier we know that all tasks have 4553 // stopped doing marking work. So, it's now safe to 4554 // re-initialise our data structures. At the end of this method, 4555 // task 0 will clear the global data structures. 4556 4557 statsOnly( ++_aborted_overflow ); 4558 4559 // We clear the local state of this task... 4560 clear_region_fields(); 4561 4562 // ...and enter the second barrier. 4563 _cm->enter_second_sync_barrier(_task_id); 4564 // At this point everything has bee re-initialised and we're 4565 // ready to restart. 4566 } 4567 4568 if (_cm->verbose_low()) { 4569 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4570 "elapsed = %1.2lfms <<<<<<<<<<", 4571 _task_id, _time_target_ms, elapsed_time_ms); 4572 if (_cm->has_aborted()) { 4573 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4574 _task_id); 4575 } 4576 } 4577 } else { 4578 if (_cm->verbose_low()) { 4579 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4580 "elapsed = %1.2lfms <<<<<<<<<<", 4581 _task_id, _time_target_ms, elapsed_time_ms); 4582 } 4583 } 4584 4585 _claimed = false; 4586 } 4587 4588 CMTask::CMTask(int task_id, 4589 ConcurrentMark* cm, 4590 CMTaskQueue* task_queue, 4591 CMTaskQueueSet* task_queues) 4592 : _g1h(G1CollectedHeap::heap()), 4593 _task_id(task_id), _cm(cm), 4594 _claimed(false), 4595 _nextMarkBitMap(NULL), _hash_seed(17), 4596 _task_queue(task_queue), 4597 _task_queues(task_queues), 4598 _cm_oop_closure(NULL), 4599 _aborted_region(MemRegion()) { 4600 guarantee(task_queue != NULL, "invariant"); 4601 guarantee(task_queues != NULL, "invariant"); 4602 4603 statsOnly( _clock_due_to_scanning = 0; 4604 _clock_due_to_marking = 0 ); 4605 4606 _marking_step_diffs_ms.add(0.5); 4607 } 4608 4609 // These are formatting macros that are used below to ensure 4610 // consistent formatting. The *_H_* versions are used to format the 4611 // header for a particular value and they should be kept consistent 4612 // with the corresponding macro. Also note that most of the macros add 4613 // the necessary white space (as a prefix) which makes them a bit 4614 // easier to compose. 4615 4616 // All the output lines are prefixed with this string to be able to 4617 // identify them easily in a large log file. 4618 #define G1PPRL_LINE_PREFIX "###" 4619 4620 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4621 #ifdef _LP64 4622 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4623 #else // _LP64 4624 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4625 #endif // _LP64 4626 4627 // For per-region info 4628 #define G1PPRL_TYPE_FORMAT " %-4s" 4629 #define G1PPRL_TYPE_H_FORMAT " %4s" 4630 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4631 #define G1PPRL_BYTE_H_FORMAT " %9s" 4632 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4633 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4634 4635 // For summary info 4636 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4637 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4638 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4639 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4640 4641 G1PrintRegionLivenessInfoClosure:: 4642 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4643 : _out(out), 4644 _total_used_bytes(0), _total_capacity_bytes(0), 4645 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4646 _hum_used_bytes(0), _hum_capacity_bytes(0), 4647 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4648 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4649 MemRegion g1_committed = g1h->g1_committed(); 4650 MemRegion g1_reserved = g1h->g1_reserved(); 4651 double now = os::elapsedTime(); 4652 4653 // Print the header of the output. 4654 _out->cr(); 4655 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4656 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4657 G1PPRL_SUM_ADDR_FORMAT("committed") 4658 G1PPRL_SUM_ADDR_FORMAT("reserved") 4659 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4660 g1_committed.start(), g1_committed.end(), 4661 g1_reserved.start(), g1_reserved.end(), 4662 HeapRegion::GrainBytes); 4663 _out->print_cr(G1PPRL_LINE_PREFIX); 4664 _out->print_cr(G1PPRL_LINE_PREFIX 4665 G1PPRL_TYPE_H_FORMAT 4666 G1PPRL_ADDR_BASE_H_FORMAT 4667 G1PPRL_BYTE_H_FORMAT 4668 G1PPRL_BYTE_H_FORMAT 4669 G1PPRL_BYTE_H_FORMAT 4670 G1PPRL_DOUBLE_H_FORMAT, 4671 "type", "address-range", 4672 "used", "prev-live", "next-live", "gc-eff"); 4673 _out->print_cr(G1PPRL_LINE_PREFIX 4674 G1PPRL_TYPE_H_FORMAT 4675 G1PPRL_ADDR_BASE_H_FORMAT 4676 G1PPRL_BYTE_H_FORMAT 4677 G1PPRL_BYTE_H_FORMAT 4678 G1PPRL_BYTE_H_FORMAT 4679 G1PPRL_DOUBLE_H_FORMAT, 4680 "", "", 4681 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4682 } 4683 4684 // It takes as a parameter a reference to one of the _hum_* fields, it 4685 // deduces the corresponding value for a region in a humongous region 4686 // series (either the region size, or what's left if the _hum_* field 4687 // is < the region size), and updates the _hum_* field accordingly. 4688 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4689 size_t bytes = 0; 4690 // The > 0 check is to deal with the prev and next live bytes which 4691 // could be 0. 4692 if (*hum_bytes > 0) { 4693 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4694 *hum_bytes -= bytes; 4695 } 4696 return bytes; 4697 } 4698 4699 // It deduces the values for a region in a humongous region series 4700 // from the _hum_* fields and updates those accordingly. It assumes 4701 // that that _hum_* fields have already been set up from the "starts 4702 // humongous" region and we visit the regions in address order. 4703 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4704 size_t* capacity_bytes, 4705 size_t* prev_live_bytes, 4706 size_t* next_live_bytes) { 4707 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4708 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4709 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4710 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4711 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4712 } 4713 4714 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4715 const char* type = ""; 4716 HeapWord* bottom = r->bottom(); 4717 HeapWord* end = r->end(); 4718 size_t capacity_bytes = r->capacity(); 4719 size_t used_bytes = r->used(); 4720 size_t prev_live_bytes = r->live_bytes(); 4721 size_t next_live_bytes = r->next_live_bytes(); 4722 double gc_eff = r->gc_efficiency(); 4723 if (r->used() == 0) { 4724 type = "FREE"; 4725 } else if (r->is_survivor()) { 4726 type = "SURV"; 4727 } else if (r->is_young()) { 4728 type = "EDEN"; 4729 } else if (r->startsHumongous()) { 4730 type = "HUMS"; 4731 4732 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4733 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4734 "they should have been zeroed after the last time we used them"); 4735 // Set up the _hum_* fields. 4736 _hum_capacity_bytes = capacity_bytes; 4737 _hum_used_bytes = used_bytes; 4738 _hum_prev_live_bytes = prev_live_bytes; 4739 _hum_next_live_bytes = next_live_bytes; 4740 get_hum_bytes(&used_bytes, &capacity_bytes, 4741 &prev_live_bytes, &next_live_bytes); 4742 end = bottom + HeapRegion::GrainWords; 4743 } else if (r->continuesHumongous()) { 4744 type = "HUMC"; 4745 get_hum_bytes(&used_bytes, &capacity_bytes, 4746 &prev_live_bytes, &next_live_bytes); 4747 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4748 } else { 4749 type = "OLD"; 4750 } 4751 4752 _total_used_bytes += used_bytes; 4753 _total_capacity_bytes += capacity_bytes; 4754 _total_prev_live_bytes += prev_live_bytes; 4755 _total_next_live_bytes += next_live_bytes; 4756 4757 // Print a line for this particular region. 4758 _out->print_cr(G1PPRL_LINE_PREFIX 4759 G1PPRL_TYPE_FORMAT 4760 G1PPRL_ADDR_BASE_FORMAT 4761 G1PPRL_BYTE_FORMAT 4762 G1PPRL_BYTE_FORMAT 4763 G1PPRL_BYTE_FORMAT 4764 G1PPRL_DOUBLE_FORMAT, 4765 type, bottom, end, 4766 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4767 4768 return false; 4769 } 4770 4771 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4772 // Print the footer of the output. 4773 _out->print_cr(G1PPRL_LINE_PREFIX); 4774 _out->print_cr(G1PPRL_LINE_PREFIX 4775 " SUMMARY" 4776 G1PPRL_SUM_MB_FORMAT("capacity") 4777 G1PPRL_SUM_MB_PERC_FORMAT("used") 4778 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4779 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4780 bytes_to_mb(_total_capacity_bytes), 4781 bytes_to_mb(_total_used_bytes), 4782 perc(_total_used_bytes, _total_capacity_bytes), 4783 bytes_to_mb(_total_prev_live_bytes), 4784 perc(_total_prev_live_bytes, _total_capacity_bytes), 4785 bytes_to_mb(_total_next_live_bytes), 4786 perc(_total_next_live_bytes, _total_capacity_bytes)); 4787 _out->cr(); 4788 }