1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1RemSet.hpp" 32 #include "gc_implementation/g1/heapRegionRemSet.hpp" 33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 34 #include "gc_implementation/shared/vmGCOperations.hpp" 35 #include "memory/genOopClosures.inline.hpp" 36 #include "memory/referencePolicy.hpp" 37 #include "memory/resourceArea.hpp" 38 #include "oops/oop.inline.hpp" 39 #include "runtime/handles.inline.hpp" 40 #include "runtime/java.hpp" 41 42 // 43 // CMS Bit Map Wrapper 44 45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 46 _bm((uintptr_t*)NULL,0), 47 _shifter(shifter) { 48 _bmStartWord = (HeapWord*)(rs.base()); 49 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 50 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 51 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 52 53 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 54 // For now we'll just commit all of the bit map up fromt. 55 // Later on we'll try to be more parsimonious with swap. 56 guarantee(_virtual_space.initialize(brs, brs.size()), 57 "couldn't reseve backing store for CMS bit map"); 58 assert(_virtual_space.committed_size() == brs.size(), 59 "didn't reserve backing store for all of CMS bit map?"); 60 _bm.set_map((uintptr_t*)_virtual_space.low()); 61 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 62 _bmWordSize, "inconsistency in bit map sizing"); 63 _bm.set_size(_bmWordSize >> _shifter); 64 } 65 66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 67 HeapWord* limit) const { 68 // First we must round addr *up* to a possible object boundary. 69 addr = (HeapWord*)align_size_up((intptr_t)addr, 70 HeapWordSize << _shifter); 71 size_t addrOffset = heapWordToOffset(addr); 72 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 73 size_t limitOffset = heapWordToOffset(limit); 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 76 assert(nextAddr >= addr, "get_next_one postcondition"); 77 assert(nextAddr == limit || isMarked(nextAddr), 78 "get_next_one postcondition"); 79 return nextAddr; 80 } 81 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 83 HeapWord* limit) const { 84 size_t addrOffset = heapWordToOffset(addr); 85 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 86 size_t limitOffset = heapWordToOffset(limit); 87 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 88 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 89 assert(nextAddr >= addr, "get_next_one postcondition"); 90 assert(nextAddr == limit || !isMarked(nextAddr), 91 "get_next_one postcondition"); 92 return nextAddr; 93 } 94 95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 96 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 97 return (int) (diff >> _shifter); 98 } 99 100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 101 HeapWord* left = MAX2(_bmStartWord, mr.start()); 102 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 103 if (right > left) { 104 // Right-open interval [leftOffset, rightOffset). 105 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 106 } else { 107 return true; 108 } 109 } 110 111 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 112 size_t from_start_index, 113 HeapWord* to_start_word, 114 size_t word_num) { 115 _bm.mostly_disjoint_range_union(from_bitmap, 116 from_start_index, 117 heapWordToOffset(to_start_word), 118 word_num); 119 } 120 121 #ifndef PRODUCT 122 bool CMBitMapRO::covers(ReservedSpace rs) const { 123 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 124 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 125 "size inconsistency"); 126 return _bmStartWord == (HeapWord*)(rs.base()) && 127 _bmWordSize == rs.size()>>LogHeapWordSize; 128 } 129 #endif 130 131 void CMBitMap::clearAll() { 132 _bm.clear(); 133 return; 134 } 135 136 void CMBitMap::markRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 140 ((HeapWord *) mr.end())), 141 "markRange memory region end is not card aligned"); 142 // convert address range into offset range 143 _bm.at_put_range(heapWordToOffset(mr.start()), 144 heapWordToOffset(mr.end()), true); 145 } 146 147 void CMBitMap::clearRange(MemRegion mr) { 148 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 149 assert(!mr.is_empty(), "unexpected empty region"); 150 // convert address range into offset range 151 _bm.at_put_range(heapWordToOffset(mr.start()), 152 heapWordToOffset(mr.end()), false); 153 } 154 155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 156 HeapWord* end_addr) { 157 HeapWord* start = getNextMarkedWordAddress(addr); 158 start = MIN2(start, end_addr); 159 HeapWord* end = getNextUnmarkedWordAddress(start); 160 end = MIN2(end, end_addr); 161 assert(start <= end, "Consistency check"); 162 MemRegion mr(start, end); 163 if (!mr.is_empty()) { 164 clearRange(mr); 165 } 166 return mr; 167 } 168 169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 170 _base(NULL), _cm(cm) 171 #ifdef ASSERT 172 , _drain_in_progress(false) 173 , _drain_in_progress_yields(false) 174 #endif 175 {} 176 177 void CMMarkStack::allocate(size_t size) { 178 _base = NEW_C_HEAP_ARRAY(oop, size); 179 if (_base == NULL) 180 vm_exit_during_initialization("Failed to allocate " 181 "CM region mark stack"); 182 _index = 0; 183 // QQQQ cast ... 184 _capacity = (jint) size; 185 _oops_do_bound = -1; 186 NOT_PRODUCT(_max_depth = 0); 187 } 188 189 CMMarkStack::~CMMarkStack() { 190 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 191 } 192 193 void CMMarkStack::par_push(oop ptr) { 194 while (true) { 195 if (isFull()) { 196 _overflow = true; 197 return; 198 } 199 // Otherwise... 200 jint index = _index; 201 jint next_index = index+1; 202 jint res = Atomic::cmpxchg(next_index, &_index, index); 203 if (res == index) { 204 _base[index] = ptr; 205 // Note that we don't maintain this atomically. We could, but it 206 // doesn't seem necessary. 207 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 208 return; 209 } 210 // Otherwise, we need to try again. 211 } 212 } 213 214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 215 while (true) { 216 if (isFull()) { 217 _overflow = true; 218 return; 219 } 220 // Otherwise... 221 jint index = _index; 222 jint next_index = index + n; 223 if (next_index > _capacity) { 224 _overflow = true; 225 return; 226 } 227 jint res = Atomic::cmpxchg(next_index, &_index, index); 228 if (res == index) { 229 for (int i = 0; i < n; i++) { 230 int ind = index + i; 231 assert(ind < _capacity, "By overflow test above."); 232 _base[ind] = ptr_arr[i]; 233 } 234 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 235 return; 236 } 237 // Otherwise, we need to try again. 238 } 239 } 240 241 242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 243 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 244 jint start = _index; 245 jint next_index = start + n; 246 if (next_index > _capacity) { 247 _overflow = true; 248 return; 249 } 250 // Otherwise. 251 _index = next_index; 252 for (int i = 0; i < n; i++) { 253 int ind = start + i; 254 assert(ind < _capacity, "By overflow test above."); 255 _base[ind] = ptr_arr[i]; 256 } 257 } 258 259 260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 261 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 262 jint index = _index; 263 if (index == 0) { 264 *n = 0; 265 return false; 266 } else { 267 int k = MIN2(max, index); 268 jint new_ind = index - k; 269 for (int j = 0; j < k; j++) { 270 ptr_arr[j] = _base[new_ind + j]; 271 } 272 _index = new_ind; 273 *n = k; 274 return true; 275 } 276 } 277 278 279 CMRegionStack::CMRegionStack() : _base(NULL) {} 280 281 void CMRegionStack::allocate(size_t size) { 282 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 283 if (_base == NULL) 284 vm_exit_during_initialization("Failed to allocate " 285 "CM region mark stack"); 286 _index = 0; 287 // QQQQ cast ... 288 _capacity = (jint) size; 289 } 290 291 CMRegionStack::~CMRegionStack() { 292 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 293 } 294 295 void CMRegionStack::push_lock_free(MemRegion mr) { 296 assert(mr.word_size() > 0, "Precondition"); 297 while (true) { 298 jint index = _index; 299 300 if (index >= _capacity) { 301 _overflow = true; 302 return; 303 } 304 // Otherwise... 305 jint next_index = index+1; 306 jint res = Atomic::cmpxchg(next_index, &_index, index); 307 if (res == index) { 308 _base[index] = mr; 309 return; 310 } 311 // Otherwise, we need to try again. 312 } 313 } 314 315 // Lock-free pop of the region stack. Called during the concurrent 316 // marking / remark phases. Should only be called in tandem with 317 // other lock-free pops. 318 MemRegion CMRegionStack::pop_lock_free() { 319 while (true) { 320 jint index = _index; 321 322 if (index == 0) { 323 return MemRegion(); 324 } 325 // Otherwise... 326 jint next_index = index-1; 327 jint res = Atomic::cmpxchg(next_index, &_index, index); 328 if (res == index) { 329 MemRegion mr = _base[next_index]; 330 if (mr.start() != NULL) { 331 assert(mr.end() != NULL, "invariant"); 332 assert(mr.word_size() > 0, "invariant"); 333 return mr; 334 } else { 335 // that entry was invalidated... let's skip it 336 assert(mr.end() == NULL, "invariant"); 337 } 338 } 339 // Otherwise, we need to try again. 340 } 341 } 342 343 #if 0 344 // The routines that manipulate the region stack with a lock are 345 // not currently used. They should be retained, however, as a 346 // diagnostic aid. 347 348 void CMRegionStack::push_with_lock(MemRegion mr) { 349 assert(mr.word_size() > 0, "Precondition"); 350 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 351 352 if (isFull()) { 353 _overflow = true; 354 return; 355 } 356 357 _base[_index] = mr; 358 _index += 1; 359 } 360 361 MemRegion CMRegionStack::pop_with_lock() { 362 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 363 364 while (true) { 365 if (_index == 0) { 366 return MemRegion(); 367 } 368 _index -= 1; 369 370 MemRegion mr = _base[_index]; 371 if (mr.start() != NULL) { 372 assert(mr.end() != NULL, "invariant"); 373 assert(mr.word_size() > 0, "invariant"); 374 return mr; 375 } else { 376 // that entry was invalidated... let's skip it 377 assert(mr.end() == NULL, "invariant"); 378 } 379 } 380 } 381 #endif 382 383 bool CMRegionStack::invalidate_entries_into_cset() { 384 bool result = false; 385 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 386 for (int i = 0; i < _oops_do_bound; ++i) { 387 MemRegion mr = _base[i]; 388 if (mr.start() != NULL) { 389 assert(mr.end() != NULL, "invariant"); 390 assert(mr.word_size() > 0, "invariant"); 391 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 392 assert(hr != NULL, "invariant"); 393 if (hr->in_collection_set()) { 394 // The region points into the collection set 395 _base[i] = MemRegion(); 396 result = true; 397 } 398 } else { 399 // that entry was invalidated... let's skip it 400 assert(mr.end() == NULL, "invariant"); 401 } 402 } 403 return result; 404 } 405 406 template<class OopClosureClass> 407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 408 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 409 || SafepointSynchronize::is_at_safepoint(), 410 "Drain recursion must be yield-safe."); 411 bool res = true; 412 debug_only(_drain_in_progress = true); 413 debug_only(_drain_in_progress_yields = yield_after); 414 while (!isEmpty()) { 415 oop newOop = pop(); 416 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 417 assert(newOop->is_oop(), "Expected an oop"); 418 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 419 "only grey objects on this stack"); 420 // iterate over the oops in this oop, marking and pushing 421 // the ones in CMS generation. 422 newOop->oop_iterate(cl); 423 if (yield_after && _cm->do_yield_check()) { 424 res = false; break; 425 } 426 } 427 debug_only(_drain_in_progress = false); 428 return res; 429 } 430 431 void CMMarkStack::oops_do(OopClosure* f) { 432 if (_index == 0) return; 433 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 434 "Bound must be set."); 435 for (int i = 0; i < _oops_do_bound; i++) { 436 f->do_oop(&_base[i]); 437 } 438 _oops_do_bound = -1; 439 } 440 441 bool ConcurrentMark::not_yet_marked(oop obj) const { 442 return (_g1h->is_obj_ill(obj) 443 || (_g1h->is_in_permanent(obj) 444 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 445 } 446 447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 449 #endif // _MSC_VER 450 451 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 452 int max_regions) : 453 _markBitMap1(rs, MinObjAlignment - 1), 454 _markBitMap2(rs, MinObjAlignment - 1), 455 456 _parallel_marking_threads(0), 457 _sleep_factor(0.0), 458 _marking_task_overhead(1.0), 459 _cleanup_sleep_factor(0.0), 460 _cleanup_task_overhead(1.0), 461 _cleanup_list("Cleanup List"), 462 _region_bm(max_regions, false /* in_resource_area*/), 463 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 464 CardTableModRefBS::card_shift, 465 false /* in_resource_area*/), 466 _prevMarkBitMap(&_markBitMap1), 467 _nextMarkBitMap(&_markBitMap2), 468 _at_least_one_mark_complete(false), 469 470 _markStack(this), 471 _regionStack(), 472 // _finger set in set_non_marking_state 473 474 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 475 // _active_tasks set in set_non_marking_state 476 // _tasks set inside the constructor 477 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 478 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 479 480 _has_overflown(false), 481 _concurrent(false), 482 _has_aborted(false), 483 _restart_for_overflow(false), 484 _concurrent_marking_in_progress(false), 485 _should_gray_objects(false), 486 487 // _verbose_level set below 488 489 _init_times(), 490 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 491 _cleanup_times(), 492 _total_counting_time(0.0), 493 _total_rs_scrub_time(0.0), 494 495 _parallel_workers(NULL) 496 { 497 CMVerboseLevel verbose_level = 498 (CMVerboseLevel) G1MarkingVerboseLevel; 499 if (verbose_level < no_verbose) 500 verbose_level = no_verbose; 501 if (verbose_level > high_verbose) 502 verbose_level = high_verbose; 503 _verbose_level = verbose_level; 504 505 if (verbose_low()) 506 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 507 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 508 509 _markStack.allocate(MarkStackSize); 510 _regionStack.allocate(G1MarkRegionStackSize); 511 512 // Create & start a ConcurrentMark thread. 513 _cmThread = new ConcurrentMarkThread(this); 514 assert(cmThread() != NULL, "CM Thread should have been created"); 515 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 516 517 _g1h = G1CollectedHeap::heap(); 518 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 519 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 520 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 521 522 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 523 satb_qs.set_buffer_size(G1SATBBufferSize); 524 525 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 526 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 527 528 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 529 _active_tasks = _max_task_num; 530 for (int i = 0; i < (int) _max_task_num; ++i) { 531 CMTaskQueue* task_queue = new CMTaskQueue(); 532 task_queue->initialize(); 533 _task_queues->register_queue(i, task_queue); 534 535 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 536 _accum_task_vtime[i] = 0.0; 537 } 538 539 if (ConcGCThreads > ParallelGCThreads) { 540 vm_exit_during_initialization("Can't have more ConcGCThreads " 541 "than ParallelGCThreads."); 542 } 543 if (ParallelGCThreads == 0) { 544 // if we are not running with any parallel GC threads we will not 545 // spawn any marking threads either 546 _parallel_marking_threads = 0; 547 _sleep_factor = 0.0; 548 _marking_task_overhead = 1.0; 549 } else { 550 if (ConcGCThreads > 0) { 551 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 552 // if both are set 553 554 _parallel_marking_threads = ConcGCThreads; 555 _sleep_factor = 0.0; 556 _marking_task_overhead = 1.0; 557 } else if (G1MarkingOverheadPercent > 0) { 558 // we will calculate the number of parallel marking threads 559 // based on a target overhead with respect to the soft real-time 560 // goal 561 562 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 563 double overall_cm_overhead = 564 (double) MaxGCPauseMillis * marking_overhead / 565 (double) GCPauseIntervalMillis; 566 double cpu_ratio = 1.0 / (double) os::processor_count(); 567 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 568 double marking_task_overhead = 569 overall_cm_overhead / marking_thread_num * 570 (double) os::processor_count(); 571 double sleep_factor = 572 (1.0 - marking_task_overhead) / marking_task_overhead; 573 574 _parallel_marking_threads = (size_t) marking_thread_num; 575 _sleep_factor = sleep_factor; 576 _marking_task_overhead = marking_task_overhead; 577 } else { 578 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 579 _sleep_factor = 0.0; 580 _marking_task_overhead = 1.0; 581 } 582 583 if (parallel_marking_threads() > 1) 584 _cleanup_task_overhead = 1.0; 585 else 586 _cleanup_task_overhead = marking_task_overhead(); 587 _cleanup_sleep_factor = 588 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 589 590 #if 0 591 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 592 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 593 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 594 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 595 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 596 #endif 597 598 guarantee(parallel_marking_threads() > 0, "peace of mind"); 599 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 600 (int) _parallel_marking_threads, false, true); 601 if (_parallel_workers == NULL) { 602 vm_exit_during_initialization("Failed necessary allocation."); 603 } else { 604 _parallel_workers->initialize_workers(); 605 } 606 } 607 608 // so that the call below can read a sensible value 609 _heap_start = (HeapWord*) rs.base(); 610 set_non_marking_state(); 611 } 612 613 void ConcurrentMark::update_g1_committed(bool force) { 614 // If concurrent marking is not in progress, then we do not need to 615 // update _heap_end. This has a subtle and important 616 // side-effect. Imagine that two evacuation pauses happen between 617 // marking completion and remark. The first one can grow the 618 // heap (hence now the finger is below the heap end). Then, the 619 // second one could unnecessarily push regions on the region 620 // stack. This causes the invariant that the region stack is empty 621 // at the beginning of remark to be false. By ensuring that we do 622 // not observe heap expansions after marking is complete, then we do 623 // not have this problem. 624 if (!concurrent_marking_in_progress() && !force) 625 return; 626 627 MemRegion committed = _g1h->g1_committed(); 628 assert(committed.start() == _heap_start, "start shouldn't change"); 629 HeapWord* new_end = committed.end(); 630 if (new_end > _heap_end) { 631 // The heap has been expanded. 632 633 _heap_end = new_end; 634 } 635 // Notice that the heap can also shrink. However, this only happens 636 // during a Full GC (at least currently) and the entire marking 637 // phase will bail out and the task will not be restarted. So, let's 638 // do nothing. 639 } 640 641 void ConcurrentMark::reset() { 642 // Starting values for these two. This should be called in a STW 643 // phase. CM will be notified of any future g1_committed expansions 644 // will be at the end of evacuation pauses, when tasks are 645 // inactive. 646 MemRegion committed = _g1h->g1_committed(); 647 _heap_start = committed.start(); 648 _heap_end = committed.end(); 649 650 // Separated the asserts so that we know which one fires. 651 assert(_heap_start != NULL, "heap bounds should look ok"); 652 assert(_heap_end != NULL, "heap bounds should look ok"); 653 assert(_heap_start < _heap_end, "heap bounds should look ok"); 654 655 // reset all the marking data structures and any necessary flags 656 clear_marking_state(); 657 658 if (verbose_low()) 659 gclog_or_tty->print_cr("[global] resetting"); 660 661 // We do reset all of them, since different phases will use 662 // different number of active threads. So, it's easiest to have all 663 // of them ready. 664 for (int i = 0; i < (int) _max_task_num; ++i) { 665 _tasks[i]->reset(_nextMarkBitMap); 666 } 667 668 // we need this to make sure that the flag is on during the evac 669 // pause with initial mark piggy-backed 670 set_concurrent_marking_in_progress(); 671 } 672 673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 674 assert(active_tasks <= _max_task_num, "we should not have more"); 675 676 _active_tasks = active_tasks; 677 // Need to update the three data structures below according to the 678 // number of active threads for this phase. 679 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 680 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 681 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 682 683 _concurrent = concurrent; 684 // We propagate this to all tasks, not just the active ones. 685 for (int i = 0; i < (int) _max_task_num; ++i) 686 _tasks[i]->set_concurrent(concurrent); 687 688 if (concurrent) { 689 set_concurrent_marking_in_progress(); 690 } else { 691 // We currently assume that the concurrent flag has been set to 692 // false before we start remark. At this point we should also be 693 // in a STW phase. 694 assert(!concurrent_marking_in_progress(), "invariant"); 695 assert(_finger == _heap_end, "only way to get here"); 696 update_g1_committed(true); 697 } 698 } 699 700 void ConcurrentMark::set_non_marking_state() { 701 // We set the global marking state to some default values when we're 702 // not doing marking. 703 clear_marking_state(); 704 _active_tasks = 0; 705 clear_concurrent_marking_in_progress(); 706 } 707 708 ConcurrentMark::~ConcurrentMark() { 709 for (int i = 0; i < (int) _max_task_num; ++i) { 710 delete _task_queues->queue(i); 711 delete _tasks[i]; 712 } 713 delete _task_queues; 714 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 715 } 716 717 // This closure is used to mark refs into the g1 generation 718 // from external roots in the CMS bit map. 719 // Called at the first checkpoint. 720 // 721 722 void ConcurrentMark::clearNextBitmap() { 723 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 724 G1CollectorPolicy* g1p = g1h->g1_policy(); 725 726 // Make sure that the concurrent mark thread looks to still be in 727 // the current cycle. 728 guarantee(cmThread()->during_cycle(), "invariant"); 729 730 // We are finishing up the current cycle by clearing the next 731 // marking bitmap and getting it ready for the next cycle. During 732 // this time no other cycle can start. So, let's make sure that this 733 // is the case. 734 guarantee(!g1h->mark_in_progress(), "invariant"); 735 736 // clear the mark bitmap (no grey objects to start with). 737 // We need to do this in chunks and offer to yield in between 738 // each chunk. 739 HeapWord* start = _nextMarkBitMap->startWord(); 740 HeapWord* end = _nextMarkBitMap->endWord(); 741 HeapWord* cur = start; 742 size_t chunkSize = M; 743 while (cur < end) { 744 HeapWord* next = cur + chunkSize; 745 if (next > end) 746 next = end; 747 MemRegion mr(cur,next); 748 _nextMarkBitMap->clearRange(mr); 749 cur = next; 750 do_yield_check(); 751 752 // Repeat the asserts from above. We'll do them as asserts here to 753 // minimize their overhead on the product. However, we'll have 754 // them as guarantees at the beginning / end of the bitmap 755 // clearing to get some checking in the product. 756 assert(cmThread()->during_cycle(), "invariant"); 757 assert(!g1h->mark_in_progress(), "invariant"); 758 } 759 760 // Repeat the asserts from above. 761 guarantee(cmThread()->during_cycle(), "invariant"); 762 guarantee(!g1h->mark_in_progress(), "invariant"); 763 } 764 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 766 public: 767 bool doHeapRegion(HeapRegion* r) { 768 if (!r->continuesHumongous()) { 769 r->note_start_of_marking(true); 770 } 771 return false; 772 } 773 }; 774 775 void ConcurrentMark::checkpointRootsInitialPre() { 776 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 777 G1CollectorPolicy* g1p = g1h->g1_policy(); 778 779 _has_aborted = false; 780 781 #ifndef PRODUCT 782 if (G1PrintReachableAtInitialMark) { 783 print_reachable("at-cycle-start", 784 true /* use_prev_marking */, true /* all */); 785 } 786 #endif 787 788 // Initialise marking structures. This has to be done in a STW phase. 789 reset(); 790 } 791 792 class CMMarkRootsClosure: public OopsInGenClosure { 793 private: 794 ConcurrentMark* _cm; 795 G1CollectedHeap* _g1h; 796 bool _do_barrier; 797 798 public: 799 CMMarkRootsClosure(ConcurrentMark* cm, 800 G1CollectedHeap* g1h, 801 bool do_barrier) : _cm(cm), _g1h(g1h), 802 _do_barrier(do_barrier) { } 803 804 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 805 virtual void do_oop( oop* p) { do_oop_work(p); } 806 807 template <class T> void do_oop_work(T* p) { 808 T heap_oop = oopDesc::load_heap_oop(p); 809 if (!oopDesc::is_null(heap_oop)) { 810 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 811 assert(obj->is_oop() || obj->mark() == NULL, 812 "expected an oop, possibly with mark word displaced"); 813 HeapWord* addr = (HeapWord*)obj; 814 if (_g1h->is_in_g1_reserved(addr)) { 815 _cm->grayRoot(obj); 816 } 817 } 818 if (_do_barrier) { 819 assert(!_g1h->is_in_g1_reserved(p), 820 "Should be called on external roots"); 821 do_barrier(p); 822 } 823 } 824 }; 825 826 void ConcurrentMark::checkpointRootsInitialPost() { 827 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 828 829 // If we force an overflow during remark, the remark operation will 830 // actually abort and we'll restart concurrent marking. If we always 831 // force an oveflow during remark we'll never actually complete the 832 // marking phase. So, we initilize this here, at the start of the 833 // cycle, so that at the remaining overflow number will decrease at 834 // every remark and we'll eventually not need to cause one. 835 force_overflow_stw()->init(); 836 837 // For each region note start of marking. 838 NoteStartOfMarkHRClosure startcl; 839 g1h->heap_region_iterate(&startcl); 840 841 // Start weak-reference discovery. 842 ReferenceProcessor* rp = g1h->ref_processor(); 843 rp->verify_no_references_recorded(); 844 rp->enable_discovery(); // enable ("weak") refs discovery 845 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 846 847 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 848 // This is the start of the marking cycle, we're expected all 849 // threads to have SATB queues with active set to false. 850 satb_mq_set.set_active_all_threads(true, /* new active value */ 851 false /* expected_active */); 852 853 // update_g1_committed() will be called at the end of an evac pause 854 // when marking is on. So, it's also called at the end of the 855 // initial-mark pause to update the heap end, if the heap expands 856 // during it. No need to call it here. 857 } 858 859 // Checkpoint the roots into this generation from outside 860 // this generation. [Note this initial checkpoint need only 861 // be approximate -- we'll do a catch up phase subsequently.] 862 void ConcurrentMark::checkpointRootsInitial() { 863 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 864 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 865 866 double start = os::elapsedTime(); 867 868 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 869 g1p->record_concurrent_mark_init_start(); 870 checkpointRootsInitialPre(); 871 872 // YSR: when concurrent precleaning is in place, we'll 873 // need to clear the cached card table here 874 875 ResourceMark rm; 876 HandleMark hm; 877 878 g1h->ensure_parsability(false); 879 g1h->perm_gen()->save_marks(); 880 881 CMMarkRootsClosure notOlder(this, g1h, false); 882 CMMarkRootsClosure older(this, g1h, true); 883 884 g1h->set_marking_started(); 885 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 886 887 g1h->process_strong_roots(true, // activate StrongRootsScope 888 false, // fake perm gen collection 889 SharedHeap::SO_AllClasses, 890 ¬Older, // Regular roots 891 NULL, // do not visit active blobs 892 &older // Perm Gen Roots 893 ); 894 checkpointRootsInitialPost(); 895 896 // Statistics. 897 double end = os::elapsedTime(); 898 _init_times.add((end - start) * 1000.0); 899 900 g1p->record_concurrent_mark_init_end(); 901 } 902 903 /* 904 * Notice that in the next two methods, we actually leave the STS 905 * during the barrier sync and join it immediately afterwards. If we 906 * do not do this, the following deadlock can occur: one thread could 907 * be in the barrier sync code, waiting for the other thread to also 908 * sync up, whereas another one could be trying to yield, while also 909 * waiting for the other threads to sync up too. 910 * 911 * Note, however, that this code is also used during remark and in 912 * this case we should not attempt to leave / enter the STS, otherwise 913 * we'll either hit an asseert (debug / fastdebug) or deadlock 914 * (product). So we should only leave / enter the STS if we are 915 * operating concurrently. 916 * 917 * Because the thread that does the sync barrier has left the STS, it 918 * is possible to be suspended for a Full GC or an evacuation pause 919 * could occur. This is actually safe, since the entering the sync 920 * barrier is one of the last things do_marking_step() does, and it 921 * doesn't manipulate any data structures afterwards. 922 */ 923 924 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 925 if (verbose_low()) 926 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 927 928 if (concurrent()) { 929 ConcurrentGCThread::stsLeave(); 930 } 931 _first_overflow_barrier_sync.enter(); 932 if (concurrent()) { 933 ConcurrentGCThread::stsJoin(); 934 } 935 // at this point everyone should have synced up and not be doing any 936 // more work 937 938 if (verbose_low()) 939 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 940 941 // let task 0 do this 942 if (task_num == 0) { 943 // task 0 is responsible for clearing the global data structures 944 // We should be here because of an overflow. During STW we should 945 // not clear the overflow flag since we rely on it being true when 946 // we exit this method to abort the pause and restart concurent 947 // marking. 948 clear_marking_state(concurrent() /* clear_overflow */); 949 force_overflow()->update(); 950 951 if (PrintGC) { 952 gclog_or_tty->date_stamp(PrintGCDateStamps); 953 gclog_or_tty->stamp(PrintGCTimeStamps); 954 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 955 } 956 } 957 958 // after this, each task should reset its own data structures then 959 // then go into the second barrier 960 } 961 962 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 963 if (verbose_low()) 964 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 965 966 if (concurrent()) { 967 ConcurrentGCThread::stsLeave(); 968 } 969 _second_overflow_barrier_sync.enter(); 970 if (concurrent()) { 971 ConcurrentGCThread::stsJoin(); 972 } 973 // at this point everything should be re-initialised and ready to go 974 975 if (verbose_low()) 976 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 977 } 978 979 #ifndef PRODUCT 980 void ForceOverflowSettings::init() { 981 _num_remaining = G1ConcMarkForceOverflow; 982 _force = false; 983 update(); 984 } 985 986 void ForceOverflowSettings::update() { 987 if (_num_remaining > 0) { 988 _num_remaining -= 1; 989 _force = true; 990 } else { 991 _force = false; 992 } 993 } 994 995 bool ForceOverflowSettings::should_force() { 996 if (_force) { 997 _force = false; 998 return true; 999 } else { 1000 return false; 1001 } 1002 } 1003 #endif // !PRODUCT 1004 1005 void ConcurrentMark::grayRoot(oop p) { 1006 HeapWord* addr = (HeapWord*) p; 1007 // We can't really check against _heap_start and _heap_end, since it 1008 // is possible during an evacuation pause with piggy-backed 1009 // initial-mark that the committed space is expanded during the 1010 // pause without CM observing this change. So the assertions below 1011 // is a bit conservative; but better than nothing. 1012 assert(_g1h->g1_committed().contains(addr), 1013 "address should be within the heap bounds"); 1014 1015 if (!_nextMarkBitMap->isMarked(addr)) 1016 _nextMarkBitMap->parMark(addr); 1017 } 1018 1019 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 1020 // The objects on the region have already been marked "in bulk" by 1021 // the caller. We only need to decide whether to push the region on 1022 // the region stack or not. 1023 1024 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1025 // We're done with marking and waiting for remark. We do not need to 1026 // push anything else on the region stack. 1027 return; 1028 1029 HeapWord* finger = _finger; 1030 1031 if (verbose_low()) 1032 gclog_or_tty->print_cr("[global] attempting to push " 1033 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 1034 PTR_FORMAT, mr.start(), mr.end(), finger); 1035 1036 if (mr.start() < finger) { 1037 // The finger is always heap region aligned and it is not possible 1038 // for mr to span heap regions. 1039 assert(mr.end() <= finger, "invariant"); 1040 1041 // Separated the asserts so that we know which one fires. 1042 assert(mr.start() <= mr.end(), 1043 "region boundaries should fall within the committed space"); 1044 assert(_heap_start <= mr.start(), 1045 "region boundaries should fall within the committed space"); 1046 assert(mr.end() <= _heap_end, 1047 "region boundaries should fall within the committed space"); 1048 if (verbose_low()) 1049 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1050 "below the finger, pushing it", 1051 mr.start(), mr.end()); 1052 1053 if (!region_stack_push_lock_free(mr)) { 1054 if (verbose_low()) 1055 gclog_or_tty->print_cr("[global] region stack has overflown."); 1056 } 1057 } 1058 } 1059 1060 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1061 // The object is not marked by the caller. We need to at least mark 1062 // it and maybe push in on the stack. 1063 1064 HeapWord* addr = (HeapWord*)p; 1065 if (!_nextMarkBitMap->isMarked(addr)) { 1066 // We definitely need to mark it, irrespective whether we bail out 1067 // because we're done with marking. 1068 if (_nextMarkBitMap->parMark(addr)) { 1069 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1070 // If we're done with concurrent marking and we're waiting for 1071 // remark, then we're not pushing anything on the stack. 1072 return; 1073 1074 // No OrderAccess:store_load() is needed. It is implicit in the 1075 // CAS done in parMark(addr) above 1076 HeapWord* finger = _finger; 1077 1078 if (addr < finger) { 1079 if (!mark_stack_push(oop(addr))) { 1080 if (verbose_low()) 1081 gclog_or_tty->print_cr("[global] global stack overflow " 1082 "during parMark"); 1083 } 1084 } 1085 } 1086 } 1087 } 1088 1089 class CMConcurrentMarkingTask: public AbstractGangTask { 1090 private: 1091 ConcurrentMark* _cm; 1092 ConcurrentMarkThread* _cmt; 1093 1094 public: 1095 void work(int worker_i) { 1096 assert(Thread::current()->is_ConcurrentGC_thread(), 1097 "this should only be done by a conc GC thread"); 1098 ResourceMark rm; 1099 1100 double start_vtime = os::elapsedVTime(); 1101 1102 ConcurrentGCThread::stsJoin(); 1103 1104 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1105 CMTask* the_task = _cm->task(worker_i); 1106 the_task->record_start_time(); 1107 if (!_cm->has_aborted()) { 1108 do { 1109 double start_vtime_sec = os::elapsedVTime(); 1110 double start_time_sec = os::elapsedTime(); 1111 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1112 1113 the_task->do_marking_step(mark_step_duration_ms, 1114 true /* do_stealing */, 1115 true /* do_termination */); 1116 1117 double end_time_sec = os::elapsedTime(); 1118 double end_vtime_sec = os::elapsedVTime(); 1119 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1120 double elapsed_time_sec = end_time_sec - start_time_sec; 1121 _cm->clear_has_overflown(); 1122 1123 bool ret = _cm->do_yield_check(worker_i); 1124 1125 jlong sleep_time_ms; 1126 if (!_cm->has_aborted() && the_task->has_aborted()) { 1127 sleep_time_ms = 1128 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1129 ConcurrentGCThread::stsLeave(); 1130 os::sleep(Thread::current(), sleep_time_ms, false); 1131 ConcurrentGCThread::stsJoin(); 1132 } 1133 double end_time2_sec = os::elapsedTime(); 1134 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1135 1136 #if 0 1137 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1138 "overhead %1.4lf", 1139 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1140 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1141 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1142 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1143 #endif 1144 } while (!_cm->has_aborted() && the_task->has_aborted()); 1145 } 1146 the_task->record_end_time(); 1147 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1148 1149 ConcurrentGCThread::stsLeave(); 1150 1151 double end_vtime = os::elapsedVTime(); 1152 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1153 } 1154 1155 CMConcurrentMarkingTask(ConcurrentMark* cm, 1156 ConcurrentMarkThread* cmt) : 1157 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1158 1159 ~CMConcurrentMarkingTask() { } 1160 }; 1161 1162 void ConcurrentMark::markFromRoots() { 1163 // we might be tempted to assert that: 1164 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1165 // "inconsistent argument?"); 1166 // However that wouldn't be right, because it's possible that 1167 // a safepoint is indeed in progress as a younger generation 1168 // stop-the-world GC happens even as we mark in this generation. 1169 1170 _restart_for_overflow = false; 1171 1172 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1173 force_overflow_conc()->init(); 1174 set_phase(active_workers, true /* concurrent */); 1175 1176 CMConcurrentMarkingTask markingTask(this, cmThread()); 1177 if (parallel_marking_threads() > 0) 1178 _parallel_workers->run_task(&markingTask); 1179 else 1180 markingTask.work(0); 1181 print_stats(); 1182 } 1183 1184 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1185 // world is stopped at this checkpoint 1186 assert(SafepointSynchronize::is_at_safepoint(), 1187 "world should be stopped"); 1188 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1189 1190 // If a full collection has happened, we shouldn't do this. 1191 if (has_aborted()) { 1192 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1193 return; 1194 } 1195 1196 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1197 1198 if (VerifyDuringGC) { 1199 HandleMark hm; // handle scope 1200 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1201 Universe::heap()->prepare_for_verify(); 1202 Universe::verify(true, false, true); 1203 } 1204 1205 G1CollectorPolicy* g1p = g1h->g1_policy(); 1206 g1p->record_concurrent_mark_remark_start(); 1207 1208 double start = os::elapsedTime(); 1209 1210 checkpointRootsFinalWork(); 1211 1212 double mark_work_end = os::elapsedTime(); 1213 1214 weakRefsWork(clear_all_soft_refs); 1215 1216 if (has_overflown()) { 1217 // Oops. We overflowed. Restart concurrent marking. 1218 _restart_for_overflow = true; 1219 // Clear the flag. We do not need it any more. 1220 clear_has_overflown(); 1221 if (G1TraceMarkStackOverflow) 1222 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1223 } else { 1224 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1225 // We're done with marking. 1226 // This is the end of the marking cycle, we're expected all 1227 // threads to have SATB queues with active set to true. 1228 satb_mq_set.set_active_all_threads(false, /* new active value */ 1229 true /* expected_active */); 1230 1231 if (VerifyDuringGC) { 1232 HandleMark hm; // handle scope 1233 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1234 Universe::heap()->prepare_for_verify(); 1235 Universe::heap()->verify(/* allow_dirty */ true, 1236 /* silent */ false, 1237 /* use_prev_marking */ false); 1238 } 1239 assert(!restart_for_overflow(), "sanity"); 1240 } 1241 1242 // Reset the marking state if marking completed 1243 if (!restart_for_overflow()) { 1244 set_non_marking_state(); 1245 } 1246 1247 #if VERIFY_OBJS_PROCESSED 1248 _scan_obj_cl.objs_processed = 0; 1249 ThreadLocalObjQueue::objs_enqueued = 0; 1250 #endif 1251 1252 // Statistics 1253 double now = os::elapsedTime(); 1254 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1255 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1256 _remark_times.add((now - start) * 1000.0); 1257 1258 g1p->record_concurrent_mark_remark_end(); 1259 } 1260 1261 #define CARD_BM_TEST_MODE 0 1262 1263 class CalcLiveObjectsClosure: public HeapRegionClosure { 1264 1265 CMBitMapRO* _bm; 1266 ConcurrentMark* _cm; 1267 bool _changed; 1268 bool _yield; 1269 size_t _words_done; 1270 size_t _tot_live; 1271 size_t _tot_used; 1272 size_t _regions_done; 1273 double _start_vtime_sec; 1274 1275 BitMap* _region_bm; 1276 BitMap* _card_bm; 1277 intptr_t _bottom_card_num; 1278 bool _final; 1279 1280 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1281 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1282 #if CARD_BM_TEST_MODE 1283 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1284 #else 1285 _card_bm->par_at_put(i - _bottom_card_num, 1); 1286 #endif 1287 } 1288 } 1289 1290 public: 1291 CalcLiveObjectsClosure(bool final, 1292 CMBitMapRO *bm, ConcurrentMark *cm, 1293 BitMap* region_bm, BitMap* card_bm) : 1294 _bm(bm), _cm(cm), _changed(false), _yield(true), 1295 _words_done(0), _tot_live(0), _tot_used(0), 1296 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1297 _regions_done(0), _start_vtime_sec(0.0) 1298 { 1299 _bottom_card_num = 1300 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1301 CardTableModRefBS::card_shift); 1302 } 1303 1304 // It takes a region that's not empty (i.e., it has at least one 1305 // live object in it and sets its corresponding bit on the region 1306 // bitmap to 1. If the region is "starts humongous" it will also set 1307 // to 1 the bits on the region bitmap that correspond to its 1308 // associated "continues humongous" regions. 1309 void set_bit_for_region(HeapRegion* hr) { 1310 assert(!hr->continuesHumongous(), "should have filtered those out"); 1311 1312 size_t index = hr->hrs_index(); 1313 if (!hr->startsHumongous()) { 1314 // Normal (non-humongous) case: just set the bit. 1315 _region_bm->par_at_put((BitMap::idx_t) index, true); 1316 } else { 1317 // Starts humongous case: calculate how many regions are part of 1318 // this humongous region and then set the bit range. It might 1319 // have been a bit more efficient to look at the object that 1320 // spans these humongous regions to calculate their number from 1321 // the object's size. However, it's a good idea to calculate 1322 // this based on the metadata itself, and not the region 1323 // contents, so that this code is not aware of what goes into 1324 // the humongous regions (in case this changes in the future). 1325 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1326 size_t end_index = index + 1; 1327 while (end_index < g1h->n_regions()) { 1328 HeapRegion* chr = g1h->region_at(end_index); 1329 if (!chr->continuesHumongous()) { 1330 break; 1331 } 1332 end_index += 1; 1333 } 1334 _region_bm->par_at_put_range((BitMap::idx_t) index, 1335 (BitMap::idx_t) end_index, true); 1336 } 1337 } 1338 1339 bool doHeapRegion(HeapRegion* hr) { 1340 if (!_final && _regions_done == 0) 1341 _start_vtime_sec = os::elapsedVTime(); 1342 1343 if (hr->continuesHumongous()) { 1344 // We will ignore these here and process them when their 1345 // associated "starts humongous" region is processed (see 1346 // set_bit_for_heap_region()). Note that we cannot rely on their 1347 // associated "starts humongous" region to have their bit set to 1348 // 1 since, due to the region chunking in the parallel region 1349 // iteration, a "continues humongous" region might be visited 1350 // before its associated "starts humongous". 1351 return false; 1352 } 1353 1354 HeapWord* nextTop = hr->next_top_at_mark_start(); 1355 HeapWord* start = hr->top_at_conc_mark_count(); 1356 assert(hr->bottom() <= start && start <= hr->end() && 1357 hr->bottom() <= nextTop && nextTop <= hr->end() && 1358 start <= nextTop, 1359 "Preconditions."); 1360 // Otherwise, record the number of word's we'll examine. 1361 size_t words_done = (nextTop - start); 1362 // Find the first marked object at or after "start". 1363 start = _bm->getNextMarkedWordAddress(start, nextTop); 1364 size_t marked_bytes = 0; 1365 1366 // Below, the term "card num" means the result of shifting an address 1367 // by the card shift -- address 0 corresponds to card number 0. One 1368 // must subtract the card num of the bottom of the heap to obtain a 1369 // card table index. 1370 // The first card num of the sequence of live cards currently being 1371 // constructed. -1 ==> no sequence. 1372 intptr_t start_card_num = -1; 1373 // The last card num of the sequence of live cards currently being 1374 // constructed. -1 ==> no sequence. 1375 intptr_t last_card_num = -1; 1376 1377 while (start < nextTop) { 1378 if (_yield && _cm->do_yield_check()) { 1379 // We yielded. It might be for a full collection, in which case 1380 // all bets are off; terminate the traversal. 1381 if (_cm->has_aborted()) { 1382 _changed = false; 1383 return true; 1384 } else { 1385 // Otherwise, it might be a collection pause, and the region 1386 // we're looking at might be in the collection set. We'll 1387 // abandon this region. 1388 return false; 1389 } 1390 } 1391 oop obj = oop(start); 1392 int obj_sz = obj->size(); 1393 // The card num of the start of the current object. 1394 intptr_t obj_card_num = 1395 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1396 1397 HeapWord* obj_last = start + obj_sz - 1; 1398 intptr_t obj_last_card_num = 1399 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1400 1401 if (obj_card_num != last_card_num) { 1402 if (start_card_num == -1) { 1403 assert(last_card_num == -1, "Both or neither."); 1404 start_card_num = obj_card_num; 1405 } else { 1406 assert(last_card_num != -1, "Both or neither."); 1407 assert(obj_card_num >= last_card_num, "Inv"); 1408 if ((obj_card_num - last_card_num) > 1) { 1409 // Mark the last run, and start a new one. 1410 mark_card_num_range(start_card_num, last_card_num); 1411 start_card_num = obj_card_num; 1412 } 1413 } 1414 #if CARD_BM_TEST_MODE 1415 /* 1416 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1417 obj_card_num - _bottom_card_num, 1418 obj_last_card_num - _bottom_card_num); 1419 */ 1420 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1421 _card_bm->par_at_put(j - _bottom_card_num, 1); 1422 } 1423 #endif 1424 } 1425 // In any case, we set the last card num. 1426 last_card_num = obj_last_card_num; 1427 1428 marked_bytes += (size_t)obj_sz * HeapWordSize; 1429 // Find the next marked object after this one. 1430 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1431 _changed = true; 1432 } 1433 // Handle the last range, if any. 1434 if (start_card_num != -1) 1435 mark_card_num_range(start_card_num, last_card_num); 1436 if (_final) { 1437 // Mark the allocated-since-marking portion... 1438 HeapWord* tp = hr->top(); 1439 if (nextTop < tp) { 1440 start_card_num = 1441 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1442 last_card_num = 1443 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1444 mark_card_num_range(start_card_num, last_card_num); 1445 // This definitely means the region has live objects. 1446 set_bit_for_region(hr); 1447 } 1448 } 1449 1450 hr->add_to_marked_bytes(marked_bytes); 1451 // Update the live region bitmap. 1452 if (marked_bytes > 0) { 1453 set_bit_for_region(hr); 1454 } 1455 hr->set_top_at_conc_mark_count(nextTop); 1456 _tot_live += hr->next_live_bytes(); 1457 _tot_used += hr->used(); 1458 _words_done = words_done; 1459 1460 if (!_final) { 1461 ++_regions_done; 1462 if (_regions_done % 10 == 0) { 1463 double end_vtime_sec = os::elapsedVTime(); 1464 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1465 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1466 jlong sleep_time_ms = 1467 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1468 os::sleep(Thread::current(), sleep_time_ms, false); 1469 _start_vtime_sec = end_vtime_sec; 1470 } 1471 } 1472 } 1473 1474 return false; 1475 } 1476 1477 bool changed() { return _changed; } 1478 void reset() { _changed = false; _words_done = 0; } 1479 void no_yield() { _yield = false; } 1480 size_t words_done() { return _words_done; } 1481 size_t tot_live() { return _tot_live; } 1482 size_t tot_used() { return _tot_used; } 1483 }; 1484 1485 1486 void ConcurrentMark::calcDesiredRegions() { 1487 _region_bm.clear(); 1488 _card_bm.clear(); 1489 CalcLiveObjectsClosure calccl(false /*final*/, 1490 nextMarkBitMap(), this, 1491 &_region_bm, &_card_bm); 1492 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1493 g1h->heap_region_iterate(&calccl); 1494 1495 do { 1496 calccl.reset(); 1497 g1h->heap_region_iterate(&calccl); 1498 } while (calccl.changed()); 1499 } 1500 1501 class G1ParFinalCountTask: public AbstractGangTask { 1502 protected: 1503 G1CollectedHeap* _g1h; 1504 CMBitMap* _bm; 1505 size_t _n_workers; 1506 size_t *_live_bytes; 1507 size_t *_used_bytes; 1508 BitMap* _region_bm; 1509 BitMap* _card_bm; 1510 public: 1511 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1512 BitMap* region_bm, BitMap* card_bm) : 1513 AbstractGangTask("G1 final counting"), _g1h(g1h), 1514 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) 1515 { 1516 if (ParallelGCThreads > 0) 1517 _n_workers = _g1h->workers()->total_workers(); 1518 else 1519 _n_workers = 1; 1520 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1521 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1522 } 1523 1524 ~G1ParFinalCountTask() { 1525 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1526 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1527 } 1528 1529 void work(int i) { 1530 CalcLiveObjectsClosure calccl(true /*final*/, 1531 _bm, _g1h->concurrent_mark(), 1532 _region_bm, _card_bm); 1533 calccl.no_yield(); 1534 if (G1CollectedHeap::use_parallel_gc_threads()) { 1535 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1536 HeapRegion::FinalCountClaimValue); 1537 } else { 1538 _g1h->heap_region_iterate(&calccl); 1539 } 1540 assert(calccl.complete(), "Shouldn't have yielded!"); 1541 1542 assert((size_t) i < _n_workers, "invariant"); 1543 _live_bytes[i] = calccl.tot_live(); 1544 _used_bytes[i] = calccl.tot_used(); 1545 } 1546 size_t live_bytes() { 1547 size_t live_bytes = 0; 1548 for (size_t i = 0; i < _n_workers; ++i) 1549 live_bytes += _live_bytes[i]; 1550 return live_bytes; 1551 } 1552 size_t used_bytes() { 1553 size_t used_bytes = 0; 1554 for (size_t i = 0; i < _n_workers; ++i) 1555 used_bytes += _used_bytes[i]; 1556 return used_bytes; 1557 } 1558 }; 1559 1560 class G1ParNoteEndTask; 1561 1562 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1563 G1CollectedHeap* _g1; 1564 int _worker_num; 1565 size_t _max_live_bytes; 1566 size_t _regions_claimed; 1567 size_t _freed_bytes; 1568 FreeRegionList* _local_cleanup_list; 1569 HumongousRegionSet* _humongous_proxy_set; 1570 HRRSCleanupTask* _hrrs_cleanup_task; 1571 double _claimed_region_time; 1572 double _max_region_time; 1573 1574 public: 1575 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1576 int worker_num, 1577 FreeRegionList* local_cleanup_list, 1578 HumongousRegionSet* humongous_proxy_set, 1579 HRRSCleanupTask* hrrs_cleanup_task); 1580 size_t freed_bytes() { return _freed_bytes; } 1581 1582 bool doHeapRegion(HeapRegion *r); 1583 1584 size_t max_live_bytes() { return _max_live_bytes; } 1585 size_t regions_claimed() { return _regions_claimed; } 1586 double claimed_region_time_sec() { return _claimed_region_time; } 1587 double max_region_time_sec() { return _max_region_time; } 1588 }; 1589 1590 class G1ParNoteEndTask: public AbstractGangTask { 1591 friend class G1NoteEndOfConcMarkClosure; 1592 1593 protected: 1594 G1CollectedHeap* _g1h; 1595 size_t _max_live_bytes; 1596 size_t _freed_bytes; 1597 FreeRegionList* _cleanup_list; 1598 1599 public: 1600 G1ParNoteEndTask(G1CollectedHeap* g1h, 1601 FreeRegionList* cleanup_list) : 1602 AbstractGangTask("G1 note end"), _g1h(g1h), 1603 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1604 1605 void work(int i) { 1606 double start = os::elapsedTime(); 1607 FreeRegionList local_cleanup_list("Local Cleanup List"); 1608 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1609 HRRSCleanupTask hrrs_cleanup_task; 1610 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1611 &humongous_proxy_set, 1612 &hrrs_cleanup_task); 1613 if (G1CollectedHeap::use_parallel_gc_threads()) { 1614 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1615 HeapRegion::NoteEndClaimValue); 1616 } else { 1617 _g1h->heap_region_iterate(&g1_note_end); 1618 } 1619 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1620 1621 // Now update the lists 1622 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1623 NULL /* free_list */, 1624 &humongous_proxy_set, 1625 true /* par */); 1626 { 1627 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1628 _max_live_bytes += g1_note_end.max_live_bytes(); 1629 _freed_bytes += g1_note_end.freed_bytes(); 1630 1631 _cleanup_list->add_as_tail(&local_cleanup_list); 1632 assert(local_cleanup_list.is_empty(), "post-condition"); 1633 1634 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1635 } 1636 double end = os::elapsedTime(); 1637 if (G1PrintParCleanupStats) { 1638 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1639 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1640 i, start, end, (end-start)*1000.0, 1641 g1_note_end.regions_claimed(), 1642 g1_note_end.claimed_region_time_sec()*1000.0, 1643 g1_note_end.max_region_time_sec()*1000.0); 1644 } 1645 } 1646 size_t max_live_bytes() { return _max_live_bytes; } 1647 size_t freed_bytes() { return _freed_bytes; } 1648 }; 1649 1650 class G1ParScrubRemSetTask: public AbstractGangTask { 1651 protected: 1652 G1RemSet* _g1rs; 1653 BitMap* _region_bm; 1654 BitMap* _card_bm; 1655 public: 1656 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1657 BitMap* region_bm, BitMap* card_bm) : 1658 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1659 _region_bm(region_bm), _card_bm(card_bm) 1660 {} 1661 1662 void work(int i) { 1663 if (G1CollectedHeap::use_parallel_gc_threads()) { 1664 _g1rs->scrub_par(_region_bm, _card_bm, i, 1665 HeapRegion::ScrubRemSetClaimValue); 1666 } else { 1667 _g1rs->scrub(_region_bm, _card_bm); 1668 } 1669 } 1670 1671 }; 1672 1673 G1NoteEndOfConcMarkClosure:: 1674 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1675 int worker_num, 1676 FreeRegionList* local_cleanup_list, 1677 HumongousRegionSet* humongous_proxy_set, 1678 HRRSCleanupTask* hrrs_cleanup_task) 1679 : _g1(g1), _worker_num(worker_num), 1680 _max_live_bytes(0), _regions_claimed(0), 1681 _freed_bytes(0), 1682 _claimed_region_time(0.0), _max_region_time(0.0), 1683 _local_cleanup_list(local_cleanup_list), 1684 _humongous_proxy_set(humongous_proxy_set), 1685 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1686 1687 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { 1688 // We use a claim value of zero here because all regions 1689 // were claimed with value 1 in the FinalCount task. 1690 hr->reset_gc_time_stamp(); 1691 if (!hr->continuesHumongous()) { 1692 double start = os::elapsedTime(); 1693 _regions_claimed++; 1694 hr->note_end_of_marking(); 1695 _max_live_bytes += hr->max_live_bytes(); 1696 _g1->free_region_if_empty(hr, 1697 &_freed_bytes, 1698 _local_cleanup_list, 1699 _humongous_proxy_set, 1700 _hrrs_cleanup_task, 1701 true /* par */); 1702 double region_time = (os::elapsedTime() - start); 1703 _claimed_region_time += region_time; 1704 if (region_time > _max_region_time) _max_region_time = region_time; 1705 } 1706 return false; 1707 } 1708 1709 void ConcurrentMark::cleanup() { 1710 // world is stopped at this checkpoint 1711 assert(SafepointSynchronize::is_at_safepoint(), 1712 "world should be stopped"); 1713 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1714 1715 // If a full collection has happened, we shouldn't do this. 1716 if (has_aborted()) { 1717 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1718 return; 1719 } 1720 1721 g1h->verify_region_sets_optional(); 1722 1723 if (VerifyDuringGC) { 1724 HandleMark hm; // handle scope 1725 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1726 Universe::heap()->prepare_for_verify(); 1727 Universe::verify(/* allow dirty */ true, 1728 /* silent */ false, 1729 /* prev marking */ true); 1730 } 1731 1732 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1733 g1p->record_concurrent_mark_cleanup_start(); 1734 1735 double start = os::elapsedTime(); 1736 1737 HeapRegionRemSet::reset_for_cleanup_tasks(); 1738 1739 // Do counting once more with the world stopped for good measure. 1740 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1741 &_region_bm, &_card_bm); 1742 if (G1CollectedHeap::use_parallel_gc_threads()) { 1743 assert(g1h->check_heap_region_claim_values( 1744 HeapRegion::InitialClaimValue), 1745 "sanity check"); 1746 1747 int n_workers = g1h->workers()->total_workers(); 1748 g1h->set_par_threads(n_workers); 1749 g1h->workers()->run_task(&g1_par_count_task); 1750 g1h->set_par_threads(0); 1751 1752 assert(g1h->check_heap_region_claim_values( 1753 HeapRegion::FinalCountClaimValue), 1754 "sanity check"); 1755 } else { 1756 g1_par_count_task.work(0); 1757 } 1758 1759 size_t known_garbage_bytes = 1760 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1761 #if 0 1762 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1763 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1764 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1765 (double) known_garbage_bytes / (double) (1024 * 1024)); 1766 #endif // 0 1767 g1p->set_known_garbage_bytes(known_garbage_bytes); 1768 1769 size_t start_used_bytes = g1h->used(); 1770 _at_least_one_mark_complete = true; 1771 g1h->set_marking_complete(); 1772 1773 double count_end = os::elapsedTime(); 1774 double this_final_counting_time = (count_end - start); 1775 if (G1PrintParCleanupStats) { 1776 gclog_or_tty->print_cr("Cleanup:"); 1777 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1778 this_final_counting_time*1000.0); 1779 } 1780 _total_counting_time += this_final_counting_time; 1781 1782 if (G1PrintRegionLivenessInfo) { 1783 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1784 _g1h->heap_region_iterate(&cl); 1785 } 1786 1787 // Install newly created mark bitMap as "prev". 1788 swapMarkBitMaps(); 1789 1790 g1h->reset_gc_time_stamp(); 1791 1792 // Note end of marking in all heap regions. 1793 double note_end_start = os::elapsedTime(); 1794 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1795 if (G1CollectedHeap::use_parallel_gc_threads()) { 1796 int n_workers = g1h->workers()->total_workers(); 1797 g1h->set_par_threads(n_workers); 1798 g1h->workers()->run_task(&g1_par_note_end_task); 1799 g1h->set_par_threads(0); 1800 1801 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1802 "sanity check"); 1803 } else { 1804 g1_par_note_end_task.work(0); 1805 } 1806 1807 if (!cleanup_list_is_empty()) { 1808 // The cleanup list is not empty, so we'll have to process it 1809 // concurrently. Notify anyone else that might be wanting free 1810 // regions that there will be more free regions coming soon. 1811 g1h->set_free_regions_coming(); 1812 } 1813 double note_end_end = os::elapsedTime(); 1814 if (G1PrintParCleanupStats) { 1815 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1816 (note_end_end - note_end_start)*1000.0); 1817 } 1818 1819 1820 // call below, since it affects the metric by which we sort the heap 1821 // regions. 1822 if (G1ScrubRemSets) { 1823 double rs_scrub_start = os::elapsedTime(); 1824 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1825 if (G1CollectedHeap::use_parallel_gc_threads()) { 1826 int n_workers = g1h->workers()->total_workers(); 1827 g1h->set_par_threads(n_workers); 1828 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1829 g1h->set_par_threads(0); 1830 1831 assert(g1h->check_heap_region_claim_values( 1832 HeapRegion::ScrubRemSetClaimValue), 1833 "sanity check"); 1834 } else { 1835 g1_par_scrub_rs_task.work(0); 1836 } 1837 1838 double rs_scrub_end = os::elapsedTime(); 1839 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1840 _total_rs_scrub_time += this_rs_scrub_time; 1841 } 1842 1843 // this will also free any regions totally full of garbage objects, 1844 // and sort the regions. 1845 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1846 g1_par_note_end_task.freed_bytes(), 1847 g1_par_note_end_task.max_live_bytes()); 1848 1849 // Statistics. 1850 double end = os::elapsedTime(); 1851 _cleanup_times.add((end - start) * 1000.0); 1852 1853 // G1CollectedHeap::heap()->print(); 1854 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1855 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1856 1857 if (PrintGC || PrintGCDetails) { 1858 g1h->print_size_transition(gclog_or_tty, 1859 start_used_bytes, 1860 g1h->used(), 1861 g1h->capacity()); 1862 } 1863 1864 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1865 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1866 1867 // We need to make this be a "collection" so any collection pause that 1868 // races with it goes around and waits for completeCleanup to finish. 1869 g1h->increment_total_collections(); 1870 1871 if (VerifyDuringGC) { 1872 HandleMark hm; // handle scope 1873 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1874 Universe::heap()->prepare_for_verify(); 1875 Universe::verify(/* allow dirty */ true, 1876 /* silent */ false, 1877 /* prev marking */ true); 1878 } 1879 1880 g1h->verify_region_sets_optional(); 1881 } 1882 1883 void ConcurrentMark::completeCleanup() { 1884 if (has_aborted()) return; 1885 1886 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1887 1888 _cleanup_list.verify_optional(); 1889 FreeRegionList tmp_free_list("Tmp Free List"); 1890 1891 if (G1ConcRegionFreeingVerbose) { 1892 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1893 "cleanup list has "SIZE_FORMAT" entries", 1894 _cleanup_list.length()); 1895 } 1896 1897 // Noone else should be accessing the _cleanup_list at this point, 1898 // so it's not necessary to take any locks 1899 while (!_cleanup_list.is_empty()) { 1900 HeapRegion* hr = _cleanup_list.remove_head(); 1901 assert(hr != NULL, "the list was not empty"); 1902 hr->par_clear(); 1903 tmp_free_list.add_as_tail(hr); 1904 1905 // Instead of adding one region at a time to the secondary_free_list, 1906 // we accumulate them in the local list and move them a few at a 1907 // time. This also cuts down on the number of notify_all() calls 1908 // we do during this process. We'll also append the local list when 1909 // _cleanup_list is empty (which means we just removed the last 1910 // region from the _cleanup_list). 1911 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1912 _cleanup_list.is_empty()) { 1913 if (G1ConcRegionFreeingVerbose) { 1914 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1915 "appending "SIZE_FORMAT" entries to the " 1916 "secondary_free_list, clean list still has " 1917 SIZE_FORMAT" entries", 1918 tmp_free_list.length(), 1919 _cleanup_list.length()); 1920 } 1921 1922 { 1923 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1924 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1925 SecondaryFreeList_lock->notify_all(); 1926 } 1927 1928 if (G1StressConcRegionFreeing) { 1929 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1930 os::sleep(Thread::current(), (jlong) 1, false); 1931 } 1932 } 1933 } 1934 } 1935 assert(tmp_free_list.is_empty(), "post-condition"); 1936 } 1937 1938 // Support closures for reference procssing in G1 1939 1940 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1941 HeapWord* addr = (HeapWord*)obj; 1942 return addr != NULL && 1943 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1944 } 1945 1946 class G1CMKeepAliveClosure: public OopClosure { 1947 G1CollectedHeap* _g1; 1948 ConcurrentMark* _cm; 1949 CMBitMap* _bitMap; 1950 public: 1951 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1952 CMBitMap* bitMap) : 1953 _g1(g1), _cm(cm), 1954 _bitMap(bitMap) {} 1955 1956 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1957 virtual void do_oop( oop* p) { do_oop_work(p); } 1958 1959 template <class T> void do_oop_work(T* p) { 1960 oop obj = oopDesc::load_decode_heap_oop(p); 1961 HeapWord* addr = (HeapWord*)obj; 1962 1963 if (_cm->verbose_high()) 1964 gclog_or_tty->print_cr("\t[0] we're looking at location " 1965 "*"PTR_FORMAT" = "PTR_FORMAT, 1966 p, (void*) obj); 1967 1968 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 1969 _bitMap->mark(addr); 1970 _cm->mark_stack_push(obj); 1971 } 1972 } 1973 }; 1974 1975 class G1CMDrainMarkingStackClosure: public VoidClosure { 1976 CMMarkStack* _markStack; 1977 CMBitMap* _bitMap; 1978 G1CMKeepAliveClosure* _oopClosure; 1979 public: 1980 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 1981 G1CMKeepAliveClosure* oopClosure) : 1982 _bitMap(bitMap), 1983 _markStack(markStack), 1984 _oopClosure(oopClosure) 1985 {} 1986 1987 void do_void() { 1988 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 1989 } 1990 }; 1991 1992 // 'Keep Alive' closure used by parallel reference processing. 1993 // An instance of this closure is used in the parallel reference processing 1994 // code rather than an instance of G1CMKeepAliveClosure. We could have used 1995 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 1996 // placed on to discovered ref lists once so we can mark and push with no 1997 // need to check whether the object has already been marked. Using the 1998 // G1CMKeepAliveClosure would mean, however, having all the worker threads 1999 // operating on the global mark stack. This means that an individual 2000 // worker would be doing lock-free pushes while it processes its own 2001 // discovered ref list followed by drain call. If the discovered ref lists 2002 // are unbalanced then this could cause interference with the other 2003 // workers. Using a CMTask (and its embedded local data structures) 2004 // avoids that potential interference. 2005 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2006 ConcurrentMark* _cm; 2007 CMTask* _task; 2008 CMBitMap* _bitMap; 2009 int _ref_counter_limit; 2010 int _ref_counter; 2011 public: 2012 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 2013 CMTask* task, 2014 CMBitMap* bitMap) : 2015 _cm(cm), _task(task), _bitMap(bitMap), 2016 _ref_counter_limit(G1RefProcDrainInterval) 2017 { 2018 assert(_ref_counter_limit > 0, "sanity"); 2019 _ref_counter = _ref_counter_limit; 2020 } 2021 2022 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2023 virtual void do_oop( oop* p) { do_oop_work(p); } 2024 2025 template <class T> void do_oop_work(T* p) { 2026 if (!_cm->has_overflown()) { 2027 oop obj = oopDesc::load_decode_heap_oop(p); 2028 if (_cm->verbose_high()) 2029 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2030 "*"PTR_FORMAT" = "PTR_FORMAT, 2031 _task->task_id(), p, (void*) obj); 2032 2033 _task->deal_with_reference(obj); 2034 _ref_counter--; 2035 2036 if (_ref_counter == 0) { 2037 // We have dealt with _ref_counter_limit references, pushing them and objects 2038 // reachable from them on to the local stack (and possibly the global stack). 2039 // Call do_marking_step() to process these entries. We call the routine in a 2040 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2041 // with the entries that we've pushed as a result of the deal_with_reference 2042 // calls above) or we overflow. 2043 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2044 // while there may still be some work to do. (See the comment at the 2045 // beginning of CMTask::do_marking_step() for those conditions - one of which 2046 // is reaching the specified time target.) It is only when 2047 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2048 // that the marking has completed. 2049 do { 2050 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2051 _task->do_marking_step(mark_step_duration_ms, 2052 false /* do_stealing */, 2053 false /* do_termination */); 2054 } while (_task->has_aborted() && !_cm->has_overflown()); 2055 _ref_counter = _ref_counter_limit; 2056 } 2057 } else { 2058 if (_cm->verbose_high()) 2059 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2060 } 2061 } 2062 }; 2063 2064 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2065 ConcurrentMark* _cm; 2066 CMTask* _task; 2067 public: 2068 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2069 _cm(cm), _task(task) 2070 {} 2071 2072 void do_void() { 2073 do { 2074 if (_cm->verbose_high()) 2075 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id()); 2076 2077 // We call CMTask::do_marking_step() to completely drain the local and 2078 // global marking stacks. The routine is called in a loop, which we'll 2079 // exit if there's nothing more to do (i.e. we'completely drained the 2080 // entries that were pushed as a result of applying the 2081 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2082 // lists above) or we overflow the global marking stack. 2083 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2084 // while there may still be some work to do. (See the comment at the 2085 // beginning of CMTask::do_marking_step() for those conditions - one of which 2086 // is reaching the specified time target.) It is only when 2087 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2088 // that the marking has completed. 2089 2090 _task->do_marking_step(1000000000.0 /* something very large */, 2091 true /* do_stealing */, 2092 true /* do_termination */); 2093 } while (_task->has_aborted() && !_cm->has_overflown()); 2094 } 2095 }; 2096 2097 // Implementation of AbstractRefProcTaskExecutor for G1 2098 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2099 private: 2100 G1CollectedHeap* _g1h; 2101 ConcurrentMark* _cm; 2102 CMBitMap* _bitmap; 2103 WorkGang* _workers; 2104 int _active_workers; 2105 2106 public: 2107 G1RefProcTaskExecutor(G1CollectedHeap* g1h, 2108 ConcurrentMark* cm, 2109 CMBitMap* bitmap, 2110 WorkGang* workers, 2111 int n_workers) : 2112 _g1h(g1h), _cm(cm), _bitmap(bitmap), 2113 _workers(workers), _active_workers(n_workers) 2114 { } 2115 2116 // Executes the given task using concurrent marking worker threads. 2117 virtual void execute(ProcessTask& task); 2118 virtual void execute(EnqueueTask& task); 2119 }; 2120 2121 class G1RefProcTaskProxy: public AbstractGangTask { 2122 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2123 ProcessTask& _proc_task; 2124 G1CollectedHeap* _g1h; 2125 ConcurrentMark* _cm; 2126 CMBitMap* _bitmap; 2127 2128 public: 2129 G1RefProcTaskProxy(ProcessTask& proc_task, 2130 G1CollectedHeap* g1h, 2131 ConcurrentMark* cm, 2132 CMBitMap* bitmap) : 2133 AbstractGangTask("Process reference objects in parallel"), 2134 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 2135 {} 2136 2137 virtual void work(int i) { 2138 CMTask* marking_task = _cm->task(i); 2139 G1CMIsAliveClosure g1_is_alive(_g1h); 2140 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 2141 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2142 2143 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2144 } 2145 }; 2146 2147 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) { 2148 assert(_workers != NULL, "Need parallel worker threads."); 2149 2150 G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 2151 2152 // We need to reset the phase for each task execution so that 2153 // the termination protocol of CMTask::do_marking_step works. 2154 _cm->set_phase(_active_workers, false /* concurrent */); 2155 _g1h->set_par_threads(_active_workers); 2156 _workers->run_task(&proc_task_proxy); 2157 _g1h->set_par_threads(0); 2158 } 2159 2160 class G1RefEnqueueTaskProxy: public AbstractGangTask { 2161 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2162 EnqueueTask& _enq_task; 2163 2164 public: 2165 G1RefEnqueueTaskProxy(EnqueueTask& enq_task) : 2166 AbstractGangTask("Enqueue reference objects in parallel"), 2167 _enq_task(enq_task) 2168 { } 2169 2170 virtual void work(int i) { 2171 _enq_task.work(i); 2172 } 2173 }; 2174 2175 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2176 assert(_workers != NULL, "Need parallel worker threads."); 2177 2178 G1RefEnqueueTaskProxy enq_task_proxy(enq_task); 2179 2180 _g1h->set_par_threads(_active_workers); 2181 _workers->run_task(&enq_task_proxy); 2182 _g1h->set_par_threads(0); 2183 } 2184 2185 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2186 ResourceMark rm; 2187 HandleMark hm; 2188 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2189 ReferenceProcessor* rp = g1h->ref_processor(); 2190 2191 // See the comment in G1CollectedHeap::ref_processing_init() 2192 // about how reference processing currently works in G1. 2193 2194 // Process weak references. 2195 rp->setup_policy(clear_all_soft_refs); 2196 assert(_markStack.isEmpty(), "mark stack should be empty"); 2197 2198 G1CMIsAliveClosure g1_is_alive(g1h); 2199 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2200 G1CMDrainMarkingStackClosure 2201 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2202 // We use the work gang from the G1CollectedHeap and we utilize all 2203 // the worker threads. 2204 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 2205 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2206 2207 G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 2208 g1h->workers(), active_workers); 2209 2210 2211 if (rp->processing_is_mt()) { 2212 // Set the degree of MT here. If the discovery is done MT, there 2213 // may have been a different number of threads doing the discovery 2214 // and a different number of discovered lists may have Ref objects. 2215 // That is OK as long as the Reference lists are balanced (see 2216 // balance_all_queues() and balance_queues()). 2217 rp->set_active_mt_degree(active_workers); 2218 2219 rp->process_discovered_references(&g1_is_alive, 2220 &g1_keep_alive, 2221 &g1_drain_mark_stack, 2222 &par_task_executor); 2223 2224 // The work routines of the parallel keep_alive and drain_marking_stack 2225 // will set the has_overflown flag if we overflow the global marking 2226 // stack. 2227 } else { 2228 rp->process_discovered_references(&g1_is_alive, 2229 &g1_keep_alive, 2230 &g1_drain_mark_stack, 2231 NULL); 2232 2233 } 2234 2235 assert(_markStack.overflow() || _markStack.isEmpty(), 2236 "mark stack should be empty (unless it overflowed)"); 2237 if (_markStack.overflow()) { 2238 // Should have been done already when we tried to push an 2239 // entry on to the global mark stack. But let's do it again. 2240 set_has_overflown(); 2241 } 2242 2243 if (rp->processing_is_mt()) { 2244 assert(rp->num_q() == active_workers, "why not"); 2245 rp->enqueue_discovered_references(&par_task_executor); 2246 } else { 2247 rp->enqueue_discovered_references(); 2248 } 2249 2250 rp->verify_no_references_recorded(); 2251 assert(!rp->discovery_enabled(), "should have been disabled"); 2252 2253 // Now clean up stale oops in StringTable 2254 StringTable::unlink(&g1_is_alive); 2255 // Clean up unreferenced symbols in symbol table. 2256 SymbolTable::unlink(); 2257 } 2258 2259 void ConcurrentMark::swapMarkBitMaps() { 2260 CMBitMapRO* temp = _prevMarkBitMap; 2261 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2262 _nextMarkBitMap = (CMBitMap*) temp; 2263 } 2264 2265 class CMRemarkTask: public AbstractGangTask { 2266 private: 2267 ConcurrentMark *_cm; 2268 2269 public: 2270 void work(int worker_i) { 2271 // Since all available tasks are actually started, we should 2272 // only proceed if we're supposed to be actived. 2273 if ((size_t)worker_i < _cm->active_tasks()) { 2274 CMTask* task = _cm->task(worker_i); 2275 task->record_start_time(); 2276 do { 2277 task->do_marking_step(1000000000.0 /* something very large */, 2278 true /* do_stealing */, 2279 true /* do_termination */); 2280 } while (task->has_aborted() && !_cm->has_overflown()); 2281 // If we overflow, then we do not want to restart. We instead 2282 // want to abort remark and do concurrent marking again. 2283 task->record_end_time(); 2284 } 2285 } 2286 2287 CMRemarkTask(ConcurrentMark* cm) : 2288 AbstractGangTask("Par Remark"), _cm(cm) { } 2289 }; 2290 2291 void ConcurrentMark::checkpointRootsFinalWork() { 2292 ResourceMark rm; 2293 HandleMark hm; 2294 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2295 2296 g1h->ensure_parsability(false); 2297 2298 if (G1CollectedHeap::use_parallel_gc_threads()) { 2299 G1CollectedHeap::StrongRootsScope srs(g1h); 2300 // this is remark, so we'll use up all available threads 2301 int active_workers = ParallelGCThreads; 2302 set_phase(active_workers, false /* concurrent */); 2303 2304 CMRemarkTask remarkTask(this); 2305 // We will start all available threads, even if we decide that the 2306 // active_workers will be fewer. The extra ones will just bail out 2307 // immediately. 2308 int n_workers = g1h->workers()->total_workers(); 2309 g1h->set_par_threads(n_workers); 2310 g1h->workers()->run_task(&remarkTask); 2311 g1h->set_par_threads(0); 2312 } else { 2313 G1CollectedHeap::StrongRootsScope srs(g1h); 2314 // this is remark, so we'll use up all available threads 2315 int active_workers = 1; 2316 set_phase(active_workers, false /* concurrent */); 2317 2318 CMRemarkTask remarkTask(this); 2319 // We will start all available threads, even if we decide that the 2320 // active_workers will be fewer. The extra ones will just bail out 2321 // immediately. 2322 remarkTask.work(0); 2323 } 2324 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2325 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2326 2327 print_stats(); 2328 2329 #if VERIFY_OBJS_PROCESSED 2330 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2331 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2332 _scan_obj_cl.objs_processed, 2333 ThreadLocalObjQueue::objs_enqueued); 2334 guarantee(_scan_obj_cl.objs_processed == 2335 ThreadLocalObjQueue::objs_enqueued, 2336 "Different number of objs processed and enqueued."); 2337 } 2338 #endif 2339 } 2340 2341 #ifndef PRODUCT 2342 2343 class PrintReachableOopClosure: public OopClosure { 2344 private: 2345 G1CollectedHeap* _g1h; 2346 CMBitMapRO* _bitmap; 2347 outputStream* _out; 2348 bool _use_prev_marking; 2349 bool _all; 2350 2351 public: 2352 PrintReachableOopClosure(CMBitMapRO* bitmap, 2353 outputStream* out, 2354 bool use_prev_marking, 2355 bool all) : 2356 _g1h(G1CollectedHeap::heap()), 2357 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2358 2359 void do_oop(narrowOop* p) { do_oop_work(p); } 2360 void do_oop( oop* p) { do_oop_work(p); } 2361 2362 template <class T> void do_oop_work(T* p) { 2363 oop obj = oopDesc::load_decode_heap_oop(p); 2364 const char* str = NULL; 2365 const char* str2 = ""; 2366 2367 if (obj == NULL) { 2368 str = ""; 2369 } else if (!_g1h->is_in_g1_reserved(obj)) { 2370 str = " O"; 2371 } else { 2372 HeapRegion* hr = _g1h->heap_region_containing(obj); 2373 guarantee(hr != NULL, "invariant"); 2374 bool over_tams = false; 2375 if (_use_prev_marking) { 2376 over_tams = hr->obj_allocated_since_prev_marking(obj); 2377 } else { 2378 over_tams = hr->obj_allocated_since_next_marking(obj); 2379 } 2380 bool marked = _bitmap->isMarked((HeapWord*) obj); 2381 2382 if (over_tams) { 2383 str = " >"; 2384 if (marked) { 2385 str2 = " AND MARKED"; 2386 } 2387 } else if (marked) { 2388 str = " M"; 2389 } else { 2390 str = " NOT"; 2391 } 2392 } 2393 2394 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2395 p, (void*) obj, str, str2); 2396 } 2397 }; 2398 2399 class PrintReachableObjectClosure : public ObjectClosure { 2400 private: 2401 CMBitMapRO* _bitmap; 2402 outputStream* _out; 2403 bool _use_prev_marking; 2404 bool _all; 2405 HeapRegion* _hr; 2406 2407 public: 2408 PrintReachableObjectClosure(CMBitMapRO* bitmap, 2409 outputStream* out, 2410 bool use_prev_marking, 2411 bool all, 2412 HeapRegion* hr) : 2413 _bitmap(bitmap), _out(out), 2414 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { } 2415 2416 void do_object(oop o) { 2417 bool over_tams; 2418 if (_use_prev_marking) { 2419 over_tams = _hr->obj_allocated_since_prev_marking(o); 2420 } else { 2421 over_tams = _hr->obj_allocated_since_next_marking(o); 2422 } 2423 bool marked = _bitmap->isMarked((HeapWord*) o); 2424 bool print_it = _all || over_tams || marked; 2425 2426 if (print_it) { 2427 _out->print_cr(" "PTR_FORMAT"%s", 2428 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2429 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all); 2430 o->oop_iterate(&oopCl); 2431 } 2432 } 2433 }; 2434 2435 class PrintReachableRegionClosure : public HeapRegionClosure { 2436 private: 2437 CMBitMapRO* _bitmap; 2438 outputStream* _out; 2439 bool _use_prev_marking; 2440 bool _all; 2441 2442 public: 2443 bool doHeapRegion(HeapRegion* hr) { 2444 HeapWord* b = hr->bottom(); 2445 HeapWord* e = hr->end(); 2446 HeapWord* t = hr->top(); 2447 HeapWord* p = NULL; 2448 if (_use_prev_marking) { 2449 p = hr->prev_top_at_mark_start(); 2450 } else { 2451 p = hr->next_top_at_mark_start(); 2452 } 2453 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2454 "TAMS: "PTR_FORMAT, b, e, t, p); 2455 _out->cr(); 2456 2457 HeapWord* from = b; 2458 HeapWord* to = t; 2459 2460 if (to > from) { 2461 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2462 _out->cr(); 2463 PrintReachableObjectClosure ocl(_bitmap, _out, 2464 _use_prev_marking, _all, hr); 2465 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2466 _out->cr(); 2467 } 2468 2469 return false; 2470 } 2471 2472 PrintReachableRegionClosure(CMBitMapRO* bitmap, 2473 outputStream* out, 2474 bool use_prev_marking, 2475 bool all) : 2476 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2477 }; 2478 2479 void ConcurrentMark::print_reachable(const char* str, 2480 bool use_prev_marking, 2481 bool all) { 2482 gclog_or_tty->cr(); 2483 gclog_or_tty->print_cr("== Doing heap dump... "); 2484 2485 if (G1PrintReachableBaseFile == NULL) { 2486 gclog_or_tty->print_cr(" #### error: no base file defined"); 2487 return; 2488 } 2489 2490 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2491 (JVM_MAXPATHLEN - 1)) { 2492 gclog_or_tty->print_cr(" #### error: file name too long"); 2493 return; 2494 } 2495 2496 char file_name[JVM_MAXPATHLEN]; 2497 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2498 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2499 2500 fileStream fout(file_name); 2501 if (!fout.is_open()) { 2502 gclog_or_tty->print_cr(" #### error: could not open file"); 2503 return; 2504 } 2505 2506 outputStream* out = &fout; 2507 2508 CMBitMapRO* bitmap = NULL; 2509 if (use_prev_marking) { 2510 bitmap = _prevMarkBitMap; 2511 } else { 2512 bitmap = _nextMarkBitMap; 2513 } 2514 2515 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS"); 2516 out->cr(); 2517 2518 out->print_cr("--- ITERATING OVER REGIONS"); 2519 out->cr(); 2520 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all); 2521 _g1h->heap_region_iterate(&rcl); 2522 out->cr(); 2523 2524 gclog_or_tty->print_cr(" done"); 2525 gclog_or_tty->flush(); 2526 } 2527 2528 #endif // PRODUCT 2529 2530 // This note is for drainAllSATBBuffers and the code in between. 2531 // In the future we could reuse a task to do this work during an 2532 // evacuation pause (since now tasks are not active and can be claimed 2533 // during an evacuation pause). This was a late change to the code and 2534 // is currently not being taken advantage of. 2535 2536 class CMGlobalObjectClosure : public ObjectClosure { 2537 private: 2538 ConcurrentMark* _cm; 2539 2540 public: 2541 void do_object(oop obj) { 2542 _cm->deal_with_reference(obj); 2543 } 2544 2545 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2546 }; 2547 2548 void ConcurrentMark::deal_with_reference(oop obj) { 2549 if (verbose_high()) 2550 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2551 (void*) obj); 2552 2553 2554 HeapWord* objAddr = (HeapWord*) obj; 2555 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2556 if (_g1h->is_in_g1_reserved(objAddr)) { 2557 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 2558 HeapRegion* hr = _g1h->heap_region_containing(obj); 2559 if (_g1h->is_obj_ill(obj, hr)) { 2560 if (verbose_high()) 2561 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2562 "marked", (void*) obj); 2563 2564 // we need to mark it first 2565 if (_nextMarkBitMap->parMark(objAddr)) { 2566 // No OrderAccess:store_load() is needed. It is implicit in the 2567 // CAS done in parMark(objAddr) above 2568 HeapWord* finger = _finger; 2569 if (objAddr < finger) { 2570 if (verbose_high()) 2571 gclog_or_tty->print_cr("[global] below the global finger " 2572 "("PTR_FORMAT"), pushing it", finger); 2573 if (!mark_stack_push(obj)) { 2574 if (verbose_low()) 2575 gclog_or_tty->print_cr("[global] global stack overflow during " 2576 "deal_with_reference"); 2577 } 2578 } 2579 } 2580 } 2581 } 2582 } 2583 2584 void ConcurrentMark::drainAllSATBBuffers() { 2585 CMGlobalObjectClosure oc(this); 2586 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2587 satb_mq_set.set_closure(&oc); 2588 2589 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2590 if (verbose_medium()) 2591 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2592 } 2593 2594 // no need to check whether we should do this, as this is only 2595 // called during an evacuation pause 2596 satb_mq_set.iterate_closure_all_threads(); 2597 2598 satb_mq_set.set_closure(NULL); 2599 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2600 } 2601 2602 void ConcurrentMark::markPrev(oop p) { 2603 // Note we are overriding the read-only view of the prev map here, via 2604 // the cast. 2605 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2606 } 2607 2608 void ConcurrentMark::clear(oop p) { 2609 assert(p != NULL && p->is_oop(), "expected an oop"); 2610 HeapWord* addr = (HeapWord*)p; 2611 assert(addr >= _nextMarkBitMap->startWord() || 2612 addr < _nextMarkBitMap->endWord(), "in a region"); 2613 2614 _nextMarkBitMap->clear(addr); 2615 } 2616 2617 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2618 // Note we are overriding the read-only view of the prev map here, via 2619 // the cast. 2620 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2621 _nextMarkBitMap->clearRange(mr); 2622 } 2623 2624 HeapRegion* 2625 ConcurrentMark::claim_region(int task_num) { 2626 // "checkpoint" the finger 2627 HeapWord* finger = _finger; 2628 2629 // _heap_end will not change underneath our feet; it only changes at 2630 // yield points. 2631 while (finger < _heap_end) { 2632 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2633 2634 // is the gap between reading the finger and doing the CAS too long? 2635 2636 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2637 HeapWord* bottom = curr_region->bottom(); 2638 HeapWord* end = curr_region->end(); 2639 HeapWord* limit = curr_region->next_top_at_mark_start(); 2640 2641 if (verbose_low()) 2642 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2643 "["PTR_FORMAT", "PTR_FORMAT"), " 2644 "limit = "PTR_FORMAT, 2645 task_num, curr_region, bottom, end, limit); 2646 2647 HeapWord* res = 2648 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2649 if (res == finger) { 2650 // we succeeded 2651 2652 // notice that _finger == end cannot be guaranteed here since, 2653 // someone else might have moved the finger even further 2654 assert(_finger >= end, "the finger should have moved forward"); 2655 2656 if (verbose_low()) 2657 gclog_or_tty->print_cr("[%d] we were successful with region = " 2658 PTR_FORMAT, task_num, curr_region); 2659 2660 if (limit > bottom) { 2661 if (verbose_low()) 2662 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2663 "returning it ", task_num, curr_region); 2664 return curr_region; 2665 } else { 2666 assert(limit == bottom, 2667 "the region limit should be at bottom"); 2668 if (verbose_low()) 2669 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2670 "returning NULL", task_num, curr_region); 2671 // we return NULL and the caller should try calling 2672 // claim_region() again. 2673 return NULL; 2674 } 2675 } else { 2676 assert(_finger > finger, "the finger should have moved forward"); 2677 if (verbose_low()) 2678 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2679 "global finger = "PTR_FORMAT", " 2680 "our finger = "PTR_FORMAT, 2681 task_num, _finger, finger); 2682 2683 // read it again 2684 finger = _finger; 2685 } 2686 } 2687 2688 return NULL; 2689 } 2690 2691 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2692 bool result = false; 2693 for (int i = 0; i < (int)_max_task_num; ++i) { 2694 CMTask* the_task = _tasks[i]; 2695 MemRegion mr = the_task->aborted_region(); 2696 if (mr.start() != NULL) { 2697 assert(mr.end() != NULL, "invariant"); 2698 assert(mr.word_size() > 0, "invariant"); 2699 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2700 assert(hr != NULL, "invariant"); 2701 if (hr->in_collection_set()) { 2702 // The region points into the collection set 2703 the_task->set_aborted_region(MemRegion()); 2704 result = true; 2705 } 2706 } 2707 } 2708 return result; 2709 } 2710 2711 bool ConcurrentMark::has_aborted_regions() { 2712 for (int i = 0; i < (int)_max_task_num; ++i) { 2713 CMTask* the_task = _tasks[i]; 2714 MemRegion mr = the_task->aborted_region(); 2715 if (mr.start() != NULL) { 2716 assert(mr.end() != NULL, "invariant"); 2717 assert(mr.word_size() > 0, "invariant"); 2718 return true; 2719 } 2720 } 2721 return false; 2722 } 2723 2724 void ConcurrentMark::oops_do(OopClosure* cl) { 2725 if (_markStack.size() > 0 && verbose_low()) 2726 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2727 "size = %d", _markStack.size()); 2728 // we first iterate over the contents of the mark stack... 2729 _markStack.oops_do(cl); 2730 2731 for (int i = 0; i < (int)_max_task_num; ++i) { 2732 OopTaskQueue* queue = _task_queues->queue((int)i); 2733 2734 if (queue->size() > 0 && verbose_low()) 2735 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2736 "size = %d", i, queue->size()); 2737 2738 // ...then over the contents of the all the task queues. 2739 queue->oops_do(cl); 2740 } 2741 2742 // Invalidate any entries, that are in the region stack, that 2743 // point into the collection set 2744 if (_regionStack.invalidate_entries_into_cset()) { 2745 // otherwise, any gray objects copied during the evacuation pause 2746 // might not be visited. 2747 assert(_should_gray_objects, "invariant"); 2748 } 2749 2750 // Invalidate any aborted regions, recorded in the individual CM 2751 // tasks, that point into the collection set. 2752 if (invalidate_aborted_regions_in_cset()) { 2753 // otherwise, any gray objects copied during the evacuation pause 2754 // might not be visited. 2755 assert(_should_gray_objects, "invariant"); 2756 } 2757 2758 } 2759 2760 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2761 _markStack.setEmpty(); 2762 _markStack.clear_overflow(); 2763 _regionStack.setEmpty(); 2764 _regionStack.clear_overflow(); 2765 if (clear_overflow) { 2766 clear_has_overflown(); 2767 } else { 2768 assert(has_overflown(), "pre-condition"); 2769 } 2770 _finger = _heap_start; 2771 2772 for (int i = 0; i < (int)_max_task_num; ++i) { 2773 OopTaskQueue* queue = _task_queues->queue(i); 2774 queue->set_empty(); 2775 // Clear any partial regions from the CMTasks 2776 _tasks[i]->clear_aborted_region(); 2777 } 2778 } 2779 2780 void ConcurrentMark::print_stats() { 2781 if (verbose_stats()) { 2782 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2783 for (size_t i = 0; i < _active_tasks; ++i) { 2784 _tasks[i]->print_stats(); 2785 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2786 } 2787 } 2788 } 2789 2790 class CSMarkOopClosure: public OopClosure { 2791 friend class CSMarkBitMapClosure; 2792 2793 G1CollectedHeap* _g1h; 2794 CMBitMap* _bm; 2795 ConcurrentMark* _cm; 2796 oop* _ms; 2797 jint* _array_ind_stack; 2798 int _ms_size; 2799 int _ms_ind; 2800 int _array_increment; 2801 2802 bool push(oop obj, int arr_ind = 0) { 2803 if (_ms_ind == _ms_size) { 2804 gclog_or_tty->print_cr("Mark stack is full."); 2805 return false; 2806 } 2807 _ms[_ms_ind] = obj; 2808 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; 2809 _ms_ind++; 2810 return true; 2811 } 2812 2813 oop pop() { 2814 if (_ms_ind == 0) return NULL; 2815 else { 2816 _ms_ind--; 2817 return _ms[_ms_ind]; 2818 } 2819 } 2820 2821 template <class T> bool drain() { 2822 while (_ms_ind > 0) { 2823 oop obj = pop(); 2824 assert(obj != NULL, "Since index was non-zero."); 2825 if (obj->is_objArray()) { 2826 jint arr_ind = _array_ind_stack[_ms_ind]; 2827 objArrayOop aobj = objArrayOop(obj); 2828 jint len = aobj->length(); 2829 jint next_arr_ind = arr_ind + _array_increment; 2830 if (next_arr_ind < len) { 2831 push(obj, next_arr_ind); 2832 } 2833 // Now process this portion of this one. 2834 int lim = MIN2(next_arr_ind, len); 2835 for (int j = arr_ind; j < lim; j++) { 2836 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2837 } 2838 2839 } else { 2840 obj->oop_iterate(this); 2841 } 2842 if (abort()) return false; 2843 } 2844 return true; 2845 } 2846 2847 public: 2848 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2849 _g1h(G1CollectedHeap::heap()), 2850 _cm(cm), 2851 _bm(cm->nextMarkBitMap()), 2852 _ms_size(ms_size), _ms_ind(0), 2853 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2854 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2855 _array_increment(MAX2(ms_size/8, 16)) 2856 {} 2857 2858 ~CSMarkOopClosure() { 2859 FREE_C_HEAP_ARRAY(oop, _ms); 2860 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2861 } 2862 2863 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2864 virtual void do_oop( oop* p) { do_oop_work(p); } 2865 2866 template <class T> void do_oop_work(T* p) { 2867 T heap_oop = oopDesc::load_heap_oop(p); 2868 if (oopDesc::is_null(heap_oop)) return; 2869 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2870 if (obj->is_forwarded()) { 2871 // If the object has already been forwarded, we have to make sure 2872 // that it's marked. So follow the forwarding pointer. Note that 2873 // this does the right thing for self-forwarding pointers in the 2874 // evacuation failure case. 2875 obj = obj->forwardee(); 2876 } 2877 HeapRegion* hr = _g1h->heap_region_containing(obj); 2878 if (hr != NULL) { 2879 if (hr->in_collection_set()) { 2880 if (_g1h->is_obj_ill(obj)) { 2881 _bm->mark((HeapWord*)obj); 2882 if (!push(obj)) { 2883 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 2884 set_abort(); 2885 } 2886 } 2887 } else { 2888 // Outside the collection set; we need to gray it 2889 _cm->deal_with_reference(obj); 2890 } 2891 } 2892 } 2893 }; 2894 2895 class CSMarkBitMapClosure: public BitMapClosure { 2896 G1CollectedHeap* _g1h; 2897 CMBitMap* _bitMap; 2898 ConcurrentMark* _cm; 2899 CSMarkOopClosure _oop_cl; 2900 public: 2901 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 2902 _g1h(G1CollectedHeap::heap()), 2903 _bitMap(cm->nextMarkBitMap()), 2904 _oop_cl(cm, ms_size) 2905 {} 2906 2907 ~CSMarkBitMapClosure() {} 2908 2909 bool do_bit(size_t offset) { 2910 // convert offset into a HeapWord* 2911 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 2912 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 2913 "address out of range"); 2914 assert(_bitMap->isMarked(addr), "tautology"); 2915 oop obj = oop(addr); 2916 if (!obj->is_forwarded()) { 2917 if (!_oop_cl.push(obj)) return false; 2918 if (UseCompressedOops) { 2919 if (!_oop_cl.drain<narrowOop>()) return false; 2920 } else { 2921 if (!_oop_cl.drain<oop>()) return false; 2922 } 2923 } 2924 // Otherwise... 2925 return true; 2926 } 2927 }; 2928 2929 2930 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 2931 CMBitMap* _bm; 2932 CSMarkBitMapClosure _bit_cl; 2933 enum SomePrivateConstants { 2934 MSSize = 1000 2935 }; 2936 bool _completed; 2937 public: 2938 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 2939 _bm(cm->nextMarkBitMap()), 2940 _bit_cl(cm, MSSize), 2941 _completed(true) 2942 {} 2943 2944 ~CompleteMarkingInCSHRClosure() {} 2945 2946 bool doHeapRegion(HeapRegion* r) { 2947 if (!r->evacuation_failed()) { 2948 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 2949 if (!mr.is_empty()) { 2950 if (!_bm->iterate(&_bit_cl, mr)) { 2951 _completed = false; 2952 return true; 2953 } 2954 } 2955 } 2956 return false; 2957 } 2958 2959 bool completed() { return _completed; } 2960 }; 2961 2962 class ClearMarksInHRClosure: public HeapRegionClosure { 2963 CMBitMap* _bm; 2964 public: 2965 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 2966 2967 bool doHeapRegion(HeapRegion* r) { 2968 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 2969 MemRegion usedMR = r->used_region(); 2970 _bm->clearRange(r->used_region()); 2971 } 2972 return false; 2973 } 2974 }; 2975 2976 void ConcurrentMark::complete_marking_in_collection_set() { 2977 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2978 2979 if (!g1h->mark_in_progress()) { 2980 g1h->g1_policy()->record_mark_closure_time(0.0); 2981 return; 2982 } 2983 2984 int i = 1; 2985 double start = os::elapsedTime(); 2986 while (true) { 2987 i++; 2988 CompleteMarkingInCSHRClosure cmplt(this); 2989 g1h->collection_set_iterate(&cmplt); 2990 if (cmplt.completed()) break; 2991 } 2992 double end_time = os::elapsedTime(); 2993 double elapsed_time_ms = (end_time - start) * 1000.0; 2994 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 2995 2996 ClearMarksInHRClosure clr(nextMarkBitMap()); 2997 g1h->collection_set_iterate(&clr); 2998 } 2999 3000 // The next two methods deal with the following optimisation. Some 3001 // objects are gray by being marked and located above the finger. If 3002 // they are copied, during an evacuation pause, below the finger then 3003 // the need to be pushed on the stack. The observation is that, if 3004 // there are no regions in the collection set located above the 3005 // finger, then the above cannot happen, hence we do not need to 3006 // explicitly gray any objects when copying them to below the 3007 // finger. The global stack will be scanned to ensure that, if it 3008 // points to objects being copied, it will update their 3009 // location. There is a tricky situation with the gray objects in 3010 // region stack that are being coped, however. See the comment in 3011 // newCSet(). 3012 3013 void ConcurrentMark::newCSet() { 3014 if (!concurrent_marking_in_progress()) 3015 // nothing to do if marking is not in progress 3016 return; 3017 3018 // find what the lowest finger is among the global and local fingers 3019 _min_finger = _finger; 3020 for (int i = 0; i < (int)_max_task_num; ++i) { 3021 CMTask* task = _tasks[i]; 3022 HeapWord* task_finger = task->finger(); 3023 if (task_finger != NULL && task_finger < _min_finger) 3024 _min_finger = task_finger; 3025 } 3026 3027 _should_gray_objects = false; 3028 3029 // This fixes a very subtle and fustrating bug. It might be the case 3030 // that, during en evacuation pause, heap regions that contain 3031 // objects that are gray (by being in regions contained in the 3032 // region stack) are included in the collection set. Since such gray 3033 // objects will be moved, and because it's not easy to redirect 3034 // region stack entries to point to a new location (because objects 3035 // in one region might be scattered to multiple regions after they 3036 // are copied), one option is to ensure that all marked objects 3037 // copied during a pause are pushed on the stack. Notice, however, 3038 // that this problem can only happen when the region stack is not 3039 // empty during an evacuation pause. So, we make the fix a bit less 3040 // conservative and ensure that regions are pushed on the stack, 3041 // irrespective whether all collection set regions are below the 3042 // finger, if the region stack is not empty. This is expected to be 3043 // a rare case, so I don't think it's necessary to be smarted about it. 3044 if (!region_stack_empty() || has_aborted_regions()) 3045 _should_gray_objects = true; 3046 } 3047 3048 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3049 if (!concurrent_marking_in_progress()) 3050 return; 3051 3052 HeapWord* region_end = hr->end(); 3053 if (region_end > _min_finger) 3054 _should_gray_objects = true; 3055 } 3056 3057 // Resets the region fields of active CMTasks whose values point 3058 // into the collection set. 3059 void ConcurrentMark::reset_active_task_region_fields_in_cset() { 3060 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW"); 3061 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 3062 3063 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) { 3064 CMTask* task = _tasks[i]; 3065 HeapWord* task_finger = task->finger(); 3066 if (task_finger != NULL) { 3067 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap"); 3068 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger); 3069 if (finger_region->in_collection_set()) { 3070 // The task's current region is in the collection set. 3071 // This region will be evacuated in the current GC and 3072 // the region fields in the task will be stale. 3073 task->giveup_current_region(); 3074 } 3075 } 3076 } 3077 } 3078 3079 // abandon current marking iteration due to a Full GC 3080 void ConcurrentMark::abort() { 3081 // Clear all marks to force marking thread to do nothing 3082 _nextMarkBitMap->clearAll(); 3083 // Empty mark stack 3084 clear_marking_state(); 3085 for (int i = 0; i < (int)_max_task_num; ++i) { 3086 _tasks[i]->clear_region_fields(); 3087 } 3088 _has_aborted = true; 3089 3090 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3091 satb_mq_set.abandon_partial_marking(); 3092 // This can be called either during or outside marking, we'll read 3093 // the expected_active value from the SATB queue set. 3094 satb_mq_set.set_active_all_threads( 3095 false, /* new active value */ 3096 satb_mq_set.is_active() /* expected_active */); 3097 } 3098 3099 static void print_ms_time_info(const char* prefix, const char* name, 3100 NumberSeq& ns) { 3101 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3102 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3103 if (ns.num() > 0) { 3104 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3105 prefix, ns.sd(), ns.maximum()); 3106 } 3107 } 3108 3109 void ConcurrentMark::print_summary_info() { 3110 gclog_or_tty->print_cr(" Concurrent marking:"); 3111 print_ms_time_info(" ", "init marks", _init_times); 3112 print_ms_time_info(" ", "remarks", _remark_times); 3113 { 3114 print_ms_time_info(" ", "final marks", _remark_mark_times); 3115 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3116 3117 } 3118 print_ms_time_info(" ", "cleanups", _cleanup_times); 3119 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3120 _total_counting_time, 3121 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3122 (double)_cleanup_times.num() 3123 : 0.0)); 3124 if (G1ScrubRemSets) { 3125 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3126 _total_rs_scrub_time, 3127 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3128 (double)_cleanup_times.num() 3129 : 0.0)); 3130 } 3131 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3132 (_init_times.sum() + _remark_times.sum() + 3133 _cleanup_times.sum())/1000.0); 3134 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3135 "(%8.2f s marking, %8.2f s counting).", 3136 cmThread()->vtime_accum(), 3137 cmThread()->vtime_mark_accum(), 3138 cmThread()->vtime_count_accum()); 3139 } 3140 3141 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3142 _parallel_workers->print_worker_threads_on(st); 3143 } 3144 3145 // Closures 3146 // XXX: there seems to be a lot of code duplication here; 3147 // should refactor and consolidate the shared code. 3148 3149 // This closure is used to mark refs into the CMS generation in 3150 // the CMS bit map. Called at the first checkpoint. 3151 3152 // We take a break if someone is trying to stop the world. 3153 bool ConcurrentMark::do_yield_check(int worker_i) { 3154 if (should_yield()) { 3155 if (worker_i == 0) 3156 _g1h->g1_policy()->record_concurrent_pause(); 3157 cmThread()->yield(); 3158 if (worker_i == 0) 3159 _g1h->g1_policy()->record_concurrent_pause_end(); 3160 return true; 3161 } else { 3162 return false; 3163 } 3164 } 3165 3166 bool ConcurrentMark::should_yield() { 3167 return cmThread()->should_yield(); 3168 } 3169 3170 bool ConcurrentMark::containing_card_is_marked(void* p) { 3171 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3172 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3173 } 3174 3175 bool ConcurrentMark::containing_cards_are_marked(void* start, 3176 void* last) { 3177 return 3178 containing_card_is_marked(start) && 3179 containing_card_is_marked(last); 3180 } 3181 3182 #ifndef PRODUCT 3183 // for debugging purposes 3184 void ConcurrentMark::print_finger() { 3185 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3186 _heap_start, _heap_end, _finger); 3187 for (int i = 0; i < (int) _max_task_num; ++i) { 3188 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3189 } 3190 gclog_or_tty->print_cr(""); 3191 } 3192 #endif 3193 3194 // Closure for iteration over bitmaps 3195 class CMBitMapClosure : public BitMapClosure { 3196 private: 3197 // the bitmap that is being iterated over 3198 CMBitMap* _nextMarkBitMap; 3199 ConcurrentMark* _cm; 3200 CMTask* _task; 3201 // true if we're scanning a heap region claimed by the task (so that 3202 // we move the finger along), false if we're not, i.e. currently when 3203 // scanning a heap region popped from the region stack (so that we 3204 // do not move the task finger along; it'd be a mistake if we did so). 3205 bool _scanning_heap_region; 3206 3207 public: 3208 CMBitMapClosure(CMTask *task, 3209 ConcurrentMark* cm, 3210 CMBitMap* nextMarkBitMap) 3211 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3212 3213 void set_scanning_heap_region(bool scanning_heap_region) { 3214 _scanning_heap_region = scanning_heap_region; 3215 } 3216 3217 bool do_bit(size_t offset) { 3218 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3219 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3220 assert( addr < _cm->finger(), "invariant"); 3221 3222 if (_scanning_heap_region) { 3223 statsOnly( _task->increase_objs_found_on_bitmap() ); 3224 assert(addr >= _task->finger(), "invariant"); 3225 // We move that task's local finger along. 3226 _task->move_finger_to(addr); 3227 } else { 3228 // We move the task's region finger along. 3229 _task->move_region_finger_to(addr); 3230 } 3231 3232 _task->scan_object(oop(addr)); 3233 // we only partially drain the local queue and global stack 3234 _task->drain_local_queue(true); 3235 _task->drain_global_stack(true); 3236 3237 // if the has_aborted flag has been raised, we need to bail out of 3238 // the iteration 3239 return !_task->has_aborted(); 3240 } 3241 }; 3242 3243 // Closure for iterating over objects, currently only used for 3244 // processing SATB buffers. 3245 class CMObjectClosure : public ObjectClosure { 3246 private: 3247 CMTask* _task; 3248 3249 public: 3250 void do_object(oop obj) { 3251 _task->deal_with_reference(obj); 3252 } 3253 3254 CMObjectClosure(CMTask* task) : _task(task) { } 3255 }; 3256 3257 // Closure for iterating over object fields 3258 class CMOopClosure : public OopClosure { 3259 private: 3260 G1CollectedHeap* _g1h; 3261 ConcurrentMark* _cm; 3262 CMTask* _task; 3263 3264 public: 3265 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 3266 virtual void do_oop( oop* p) { do_oop_work(p); } 3267 3268 template <class T> void do_oop_work(T* p) { 3269 assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant"); 3270 assert(!_g1h->is_on_master_free_list( 3271 _g1h->heap_region_containing((HeapWord*) p)), "invariant"); 3272 3273 oop obj = oopDesc::load_decode_heap_oop(p); 3274 if (_cm->verbose_high()) 3275 gclog_or_tty->print_cr("[%d] we're looking at location " 3276 "*"PTR_FORMAT" = "PTR_FORMAT, 3277 _task->task_id(), p, (void*) obj); 3278 _task->deal_with_reference(obj); 3279 } 3280 3281 CMOopClosure(G1CollectedHeap* g1h, 3282 ConcurrentMark* cm, 3283 CMTask* task) 3284 : _g1h(g1h), _cm(cm), _task(task) 3285 { 3286 assert(_ref_processor == NULL, "should be initialized to NULL"); 3287 3288 if (G1UseConcMarkReferenceProcessing) { 3289 _ref_processor = g1h->ref_processor(); 3290 assert(_ref_processor != NULL, "should not be NULL"); 3291 } 3292 } 3293 }; 3294 3295 void CMTask::setup_for_region(HeapRegion* hr) { 3296 // Separated the asserts so that we know which one fires. 3297 assert(hr != NULL, 3298 "claim_region() should have filtered out continues humongous regions"); 3299 assert(!hr->continuesHumongous(), 3300 "claim_region() should have filtered out continues humongous regions"); 3301 3302 if (_cm->verbose_low()) 3303 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3304 _task_id, hr); 3305 3306 _curr_region = hr; 3307 _finger = hr->bottom(); 3308 update_region_limit(); 3309 } 3310 3311 void CMTask::update_region_limit() { 3312 HeapRegion* hr = _curr_region; 3313 HeapWord* bottom = hr->bottom(); 3314 HeapWord* limit = hr->next_top_at_mark_start(); 3315 3316 if (limit == bottom) { 3317 if (_cm->verbose_low()) 3318 gclog_or_tty->print_cr("[%d] found an empty region " 3319 "["PTR_FORMAT", "PTR_FORMAT")", 3320 _task_id, bottom, limit); 3321 // The region was collected underneath our feet. 3322 // We set the finger to bottom to ensure that the bitmap 3323 // iteration that will follow this will not do anything. 3324 // (this is not a condition that holds when we set the region up, 3325 // as the region is not supposed to be empty in the first place) 3326 _finger = bottom; 3327 } else if (limit >= _region_limit) { 3328 assert(limit >= _finger, "peace of mind"); 3329 } else { 3330 assert(limit < _region_limit, "only way to get here"); 3331 // This can happen under some pretty unusual circumstances. An 3332 // evacuation pause empties the region underneath our feet (NTAMS 3333 // at bottom). We then do some allocation in the region (NTAMS 3334 // stays at bottom), followed by the region being used as a GC 3335 // alloc region (NTAMS will move to top() and the objects 3336 // originally below it will be grayed). All objects now marked in 3337 // the region are explicitly grayed, if below the global finger, 3338 // and we do not need in fact to scan anything else. So, we simply 3339 // set _finger to be limit to ensure that the bitmap iteration 3340 // doesn't do anything. 3341 _finger = limit; 3342 } 3343 3344 _region_limit = limit; 3345 } 3346 3347 void CMTask::giveup_current_region() { 3348 assert(_curr_region != NULL, "invariant"); 3349 if (_cm->verbose_low()) 3350 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3351 _task_id, _curr_region); 3352 clear_region_fields(); 3353 } 3354 3355 void CMTask::clear_region_fields() { 3356 // Values for these three fields that indicate that we're not 3357 // holding on to a region. 3358 _curr_region = NULL; 3359 _finger = NULL; 3360 _region_limit = NULL; 3361 3362 _region_finger = NULL; 3363 } 3364 3365 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3366 guarantee(nextMarkBitMap != NULL, "invariant"); 3367 3368 if (_cm->verbose_low()) 3369 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3370 3371 _nextMarkBitMap = nextMarkBitMap; 3372 clear_region_fields(); 3373 assert(_aborted_region.is_empty(), "should have been cleared"); 3374 3375 _calls = 0; 3376 _elapsed_time_ms = 0.0; 3377 _termination_time_ms = 0.0; 3378 _termination_start_time_ms = 0.0; 3379 3380 #if _MARKING_STATS_ 3381 _local_pushes = 0; 3382 _local_pops = 0; 3383 _local_max_size = 0; 3384 _objs_scanned = 0; 3385 _global_pushes = 0; 3386 _global_pops = 0; 3387 _global_max_size = 0; 3388 _global_transfers_to = 0; 3389 _global_transfers_from = 0; 3390 _region_stack_pops = 0; 3391 _regions_claimed = 0; 3392 _objs_found_on_bitmap = 0; 3393 _satb_buffers_processed = 0; 3394 _steal_attempts = 0; 3395 _steals = 0; 3396 _aborted = 0; 3397 _aborted_overflow = 0; 3398 _aborted_cm_aborted = 0; 3399 _aborted_yield = 0; 3400 _aborted_timed_out = 0; 3401 _aborted_satb = 0; 3402 _aborted_termination = 0; 3403 #endif // _MARKING_STATS_ 3404 } 3405 3406 bool CMTask::should_exit_termination() { 3407 regular_clock_call(); 3408 // This is called when we are in the termination protocol. We should 3409 // quit if, for some reason, this task wants to abort or the global 3410 // stack is not empty (this means that we can get work from it). 3411 return !_cm->mark_stack_empty() || has_aborted(); 3412 } 3413 3414 // This determines whether the method below will check both the local 3415 // and global fingers when determining whether to push on the stack a 3416 // gray object (value 1) or whether it will only check the global one 3417 // (value 0). The tradeoffs are that the former will be a bit more 3418 // accurate and possibly push less on the stack, but it might also be 3419 // a little bit slower. 3420 3421 #define _CHECK_BOTH_FINGERS_ 1 3422 3423 void CMTask::deal_with_reference(oop obj) { 3424 if (_cm->verbose_high()) 3425 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, 3426 _task_id, (void*) obj); 3427 3428 ++_refs_reached; 3429 3430 HeapWord* objAddr = (HeapWord*) obj; 3431 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 3432 if (_g1h->is_in_g1_reserved(objAddr)) { 3433 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 3434 HeapRegion* hr = _g1h->heap_region_containing(obj); 3435 if (_g1h->is_obj_ill(obj, hr)) { 3436 if (_cm->verbose_high()) 3437 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", 3438 _task_id, (void*) obj); 3439 3440 // we need to mark it first 3441 if (_nextMarkBitMap->parMark(objAddr)) { 3442 // No OrderAccess:store_load() is needed. It is implicit in the 3443 // CAS done in parMark(objAddr) above 3444 HeapWord* global_finger = _cm->finger(); 3445 3446 #if _CHECK_BOTH_FINGERS_ 3447 // we will check both the local and global fingers 3448 3449 if (_finger != NULL && objAddr < _finger) { 3450 if (_cm->verbose_high()) 3451 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " 3452 "pushing it", _task_id, _finger); 3453 push(obj); 3454 } else if (_curr_region != NULL && objAddr < _region_limit) { 3455 // do nothing 3456 } else if (objAddr < global_finger) { 3457 // Notice that the global finger might be moving forward 3458 // concurrently. This is not a problem. In the worst case, we 3459 // mark the object while it is above the global finger and, by 3460 // the time we read the global finger, it has moved forward 3461 // passed this object. In this case, the object will probably 3462 // be visited when a task is scanning the region and will also 3463 // be pushed on the stack. So, some duplicate work, but no 3464 // correctness problems. 3465 3466 if (_cm->verbose_high()) 3467 gclog_or_tty->print_cr("[%d] below the global finger " 3468 "("PTR_FORMAT"), pushing it", 3469 _task_id, global_finger); 3470 push(obj); 3471 } else { 3472 // do nothing 3473 } 3474 #else // _CHECK_BOTH_FINGERS_ 3475 // we will only check the global finger 3476 3477 if (objAddr < global_finger) { 3478 // see long comment above 3479 3480 if (_cm->verbose_high()) 3481 gclog_or_tty->print_cr("[%d] below the global finger " 3482 "("PTR_FORMAT"), pushing it", 3483 _task_id, global_finger); 3484 push(obj); 3485 } 3486 #endif // _CHECK_BOTH_FINGERS_ 3487 } 3488 } 3489 } 3490 } 3491 3492 void CMTask::push(oop obj) { 3493 HeapWord* objAddr = (HeapWord*) obj; 3494 assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); 3495 assert(!_g1h->is_on_master_free_list( 3496 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant"); 3497 assert(!_g1h->is_obj_ill(obj), "invariant"); 3498 assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); 3499 3500 if (_cm->verbose_high()) 3501 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); 3502 3503 if (!_task_queue->push(obj)) { 3504 // The local task queue looks full. We need to push some entries 3505 // to the global stack. 3506 3507 if (_cm->verbose_medium()) 3508 gclog_or_tty->print_cr("[%d] task queue overflow, " 3509 "moving entries to the global stack", 3510 _task_id); 3511 move_entries_to_global_stack(); 3512 3513 // this should succeed since, even if we overflow the global 3514 // stack, we should have definitely removed some entries from the 3515 // local queue. So, there must be space on it. 3516 bool success = _task_queue->push(obj); 3517 assert(success, "invariant"); 3518 } 3519 3520 statsOnly( int tmp_size = _task_queue->size(); 3521 if (tmp_size > _local_max_size) 3522 _local_max_size = tmp_size; 3523 ++_local_pushes ); 3524 } 3525 3526 void CMTask::reached_limit() { 3527 assert(_words_scanned >= _words_scanned_limit || 3528 _refs_reached >= _refs_reached_limit , 3529 "shouldn't have been called otherwise"); 3530 regular_clock_call(); 3531 } 3532 3533 void CMTask::regular_clock_call() { 3534 if (has_aborted()) 3535 return; 3536 3537 // First, we need to recalculate the words scanned and refs reached 3538 // limits for the next clock call. 3539 recalculate_limits(); 3540 3541 // During the regular clock call we do the following 3542 3543 // (1) If an overflow has been flagged, then we abort. 3544 if (_cm->has_overflown()) { 3545 set_has_aborted(); 3546 return; 3547 } 3548 3549 // If we are not concurrent (i.e. we're doing remark) we don't need 3550 // to check anything else. The other steps are only needed during 3551 // the concurrent marking phase. 3552 if (!concurrent()) 3553 return; 3554 3555 // (2) If marking has been aborted for Full GC, then we also abort. 3556 if (_cm->has_aborted()) { 3557 set_has_aborted(); 3558 statsOnly( ++_aborted_cm_aborted ); 3559 return; 3560 } 3561 3562 double curr_time_ms = os::elapsedVTime() * 1000.0; 3563 3564 // (3) If marking stats are enabled, then we update the step history. 3565 #if _MARKING_STATS_ 3566 if (_words_scanned >= _words_scanned_limit) 3567 ++_clock_due_to_scanning; 3568 if (_refs_reached >= _refs_reached_limit) 3569 ++_clock_due_to_marking; 3570 3571 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3572 _interval_start_time_ms = curr_time_ms; 3573 _all_clock_intervals_ms.add(last_interval_ms); 3574 3575 if (_cm->verbose_medium()) { 3576 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3577 "scanned = %d%s, refs reached = %d%s", 3578 _task_id, last_interval_ms, 3579 _words_scanned, 3580 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3581 _refs_reached, 3582 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3583 } 3584 #endif // _MARKING_STATS_ 3585 3586 // (4) We check whether we should yield. If we have to, then we abort. 3587 if (_cm->should_yield()) { 3588 // We should yield. To do this we abort the task. The caller is 3589 // responsible for yielding. 3590 set_has_aborted(); 3591 statsOnly( ++_aborted_yield ); 3592 return; 3593 } 3594 3595 // (5) We check whether we've reached our time quota. If we have, 3596 // then we abort. 3597 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3598 if (elapsed_time_ms > _time_target_ms) { 3599 set_has_aborted(); 3600 _has_timed_out = true; 3601 statsOnly( ++_aborted_timed_out ); 3602 return; 3603 } 3604 3605 // (6) Finally, we check whether there are enough completed STAB 3606 // buffers available for processing. If there are, we abort. 3607 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3608 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3609 if (_cm->verbose_low()) 3610 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3611 _task_id); 3612 // we do need to process SATB buffers, we'll abort and restart 3613 // the marking task to do so 3614 set_has_aborted(); 3615 statsOnly( ++_aborted_satb ); 3616 return; 3617 } 3618 } 3619 3620 void CMTask::recalculate_limits() { 3621 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3622 _words_scanned_limit = _real_words_scanned_limit; 3623 3624 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3625 _refs_reached_limit = _real_refs_reached_limit; 3626 } 3627 3628 void CMTask::decrease_limits() { 3629 // This is called when we believe that we're going to do an infrequent 3630 // operation which will increase the per byte scanned cost (i.e. move 3631 // entries to/from the global stack). It basically tries to decrease the 3632 // scanning limit so that the clock is called earlier. 3633 3634 if (_cm->verbose_medium()) 3635 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3636 3637 _words_scanned_limit = _real_words_scanned_limit - 3638 3 * words_scanned_period / 4; 3639 _refs_reached_limit = _real_refs_reached_limit - 3640 3 * refs_reached_period / 4; 3641 } 3642 3643 void CMTask::move_entries_to_global_stack() { 3644 // local array where we'll store the entries that will be popped 3645 // from the local queue 3646 oop buffer[global_stack_transfer_size]; 3647 3648 int n = 0; 3649 oop obj; 3650 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3651 buffer[n] = obj; 3652 ++n; 3653 } 3654 3655 if (n > 0) { 3656 // we popped at least one entry from the local queue 3657 3658 statsOnly( ++_global_transfers_to; _local_pops += n ); 3659 3660 if (!_cm->mark_stack_push(buffer, n)) { 3661 if (_cm->verbose_low()) 3662 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); 3663 set_has_aborted(); 3664 } else { 3665 // the transfer was successful 3666 3667 if (_cm->verbose_medium()) 3668 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3669 _task_id, n); 3670 statsOnly( int tmp_size = _cm->mark_stack_size(); 3671 if (tmp_size > _global_max_size) 3672 _global_max_size = tmp_size; 3673 _global_pushes += n ); 3674 } 3675 } 3676 3677 // this operation was quite expensive, so decrease the limits 3678 decrease_limits(); 3679 } 3680 3681 void CMTask::get_entries_from_global_stack() { 3682 // local array where we'll store the entries that will be popped 3683 // from the global stack. 3684 oop buffer[global_stack_transfer_size]; 3685 int n; 3686 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3687 assert(n <= global_stack_transfer_size, 3688 "we should not pop more than the given limit"); 3689 if (n > 0) { 3690 // yes, we did actually pop at least one entry 3691 3692 statsOnly( ++_global_transfers_from; _global_pops += n ); 3693 if (_cm->verbose_medium()) 3694 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3695 _task_id, n); 3696 for (int i = 0; i < n; ++i) { 3697 bool success = _task_queue->push(buffer[i]); 3698 // We only call this when the local queue is empty or under a 3699 // given target limit. So, we do not expect this push to fail. 3700 assert(success, "invariant"); 3701 } 3702 3703 statsOnly( int tmp_size = _task_queue->size(); 3704 if (tmp_size > _local_max_size) 3705 _local_max_size = tmp_size; 3706 _local_pushes += n ); 3707 } 3708 3709 // this operation was quite expensive, so decrease the limits 3710 decrease_limits(); 3711 } 3712 3713 void CMTask::drain_local_queue(bool partially) { 3714 if (has_aborted()) 3715 return; 3716 3717 // Decide what the target size is, depending whether we're going to 3718 // drain it partially (so that other tasks can steal if they run out 3719 // of things to do) or totally (at the very end). 3720 size_t target_size; 3721 if (partially) 3722 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3723 else 3724 target_size = 0; 3725 3726 if (_task_queue->size() > target_size) { 3727 if (_cm->verbose_high()) 3728 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3729 _task_id, target_size); 3730 3731 oop obj; 3732 bool ret = _task_queue->pop_local(obj); 3733 while (ret) { 3734 statsOnly( ++_local_pops ); 3735 3736 if (_cm->verbose_high()) 3737 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3738 (void*) obj); 3739 3740 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3741 assert(!_g1h->is_on_master_free_list( 3742 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3743 3744 scan_object(obj); 3745 3746 if (_task_queue->size() <= target_size || has_aborted()) 3747 ret = false; 3748 else 3749 ret = _task_queue->pop_local(obj); 3750 } 3751 3752 if (_cm->verbose_high()) 3753 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3754 _task_id, _task_queue->size()); 3755 } 3756 } 3757 3758 void CMTask::drain_global_stack(bool partially) { 3759 if (has_aborted()) 3760 return; 3761 3762 // We have a policy to drain the local queue before we attempt to 3763 // drain the global stack. 3764 assert(partially || _task_queue->size() == 0, "invariant"); 3765 3766 // Decide what the target size is, depending whether we're going to 3767 // drain it partially (so that other tasks can steal if they run out 3768 // of things to do) or totally (at the very end). Notice that, 3769 // because we move entries from the global stack in chunks or 3770 // because another task might be doing the same, we might in fact 3771 // drop below the target. But, this is not a problem. 3772 size_t target_size; 3773 if (partially) 3774 target_size = _cm->partial_mark_stack_size_target(); 3775 else 3776 target_size = 0; 3777 3778 if (_cm->mark_stack_size() > target_size) { 3779 if (_cm->verbose_low()) 3780 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3781 _task_id, target_size); 3782 3783 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3784 get_entries_from_global_stack(); 3785 drain_local_queue(partially); 3786 } 3787 3788 if (_cm->verbose_low()) 3789 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3790 _task_id, _cm->mark_stack_size()); 3791 } 3792 } 3793 3794 // SATB Queue has several assumptions on whether to call the par or 3795 // non-par versions of the methods. this is why some of the code is 3796 // replicated. We should really get rid of the single-threaded version 3797 // of the code to simplify things. 3798 void CMTask::drain_satb_buffers() { 3799 if (has_aborted()) 3800 return; 3801 3802 // We set this so that the regular clock knows that we're in the 3803 // middle of draining buffers and doesn't set the abort flag when it 3804 // notices that SATB buffers are available for draining. It'd be 3805 // very counter productive if it did that. :-) 3806 _draining_satb_buffers = true; 3807 3808 CMObjectClosure oc(this); 3809 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3810 if (G1CollectedHeap::use_parallel_gc_threads()) 3811 satb_mq_set.set_par_closure(_task_id, &oc); 3812 else 3813 satb_mq_set.set_closure(&oc); 3814 3815 // This keeps claiming and applying the closure to completed buffers 3816 // until we run out of buffers or we need to abort. 3817 if (G1CollectedHeap::use_parallel_gc_threads()) { 3818 while (!has_aborted() && 3819 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3820 if (_cm->verbose_medium()) 3821 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3822 statsOnly( ++_satb_buffers_processed ); 3823 regular_clock_call(); 3824 } 3825 } else { 3826 while (!has_aborted() && 3827 satb_mq_set.apply_closure_to_completed_buffer()) { 3828 if (_cm->verbose_medium()) 3829 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3830 statsOnly( ++_satb_buffers_processed ); 3831 regular_clock_call(); 3832 } 3833 } 3834 3835 if (!concurrent() && !has_aborted()) { 3836 // We should only do this during remark. 3837 if (G1CollectedHeap::use_parallel_gc_threads()) 3838 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3839 else 3840 satb_mq_set.iterate_closure_all_threads(); 3841 } 3842 3843 _draining_satb_buffers = false; 3844 3845 assert(has_aborted() || 3846 concurrent() || 3847 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3848 3849 if (G1CollectedHeap::use_parallel_gc_threads()) 3850 satb_mq_set.set_par_closure(_task_id, NULL); 3851 else 3852 satb_mq_set.set_closure(NULL); 3853 3854 // again, this was a potentially expensive operation, decrease the 3855 // limits to get the regular clock call early 3856 decrease_limits(); 3857 } 3858 3859 void CMTask::drain_region_stack(BitMapClosure* bc) { 3860 if (has_aborted()) 3861 return; 3862 3863 assert(_region_finger == NULL, 3864 "it should be NULL when we're not scanning a region"); 3865 3866 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3867 if (_cm->verbose_low()) 3868 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3869 _task_id, _cm->region_stack_size()); 3870 3871 MemRegion mr; 3872 3873 if (!_aborted_region.is_empty()) { 3874 mr = _aborted_region; 3875 _aborted_region = MemRegion(); 3876 3877 if (_cm->verbose_low()) 3878 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )", 3879 _task_id, mr.start(), mr.end()); 3880 } else { 3881 mr = _cm->region_stack_pop_lock_free(); 3882 // it returns MemRegion() if the pop fails 3883 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3884 } 3885 3886 while (mr.start() != NULL) { 3887 if (_cm->verbose_medium()) 3888 gclog_or_tty->print_cr("[%d] we are scanning region " 3889 "["PTR_FORMAT", "PTR_FORMAT")", 3890 _task_id, mr.start(), mr.end()); 3891 3892 assert(mr.end() <= _cm->finger(), 3893 "otherwise the region shouldn't be on the stack"); 3894 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3895 if (_nextMarkBitMap->iterate(bc, mr)) { 3896 assert(!has_aborted(), 3897 "cannot abort the task without aborting the bitmap iteration"); 3898 3899 // We finished iterating over the region without aborting. 3900 regular_clock_call(); 3901 if (has_aborted()) 3902 mr = MemRegion(); 3903 else { 3904 mr = _cm->region_stack_pop_lock_free(); 3905 // it returns MemRegion() if the pop fails 3906 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3907 } 3908 } else { 3909 assert(has_aborted(), "currently the only way to do so"); 3910 3911 // The only way to abort the bitmap iteration is to return 3912 // false from the do_bit() method. However, inside the 3913 // do_bit() method we move the _region_finger to point to the 3914 // object currently being looked at. So, if we bail out, we 3915 // have definitely set _region_finger to something non-null. 3916 assert(_region_finger != NULL, "invariant"); 3917 3918 // Make sure that any previously aborted region has been 3919 // cleared. 3920 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3921 3922 // The iteration was actually aborted. So now _region_finger 3923 // points to the address of the object we last scanned. If we 3924 // leave it there, when we restart this task, we will rescan 3925 // the object. It is easy to avoid this. We move the finger by 3926 // enough to point to the next possible object header (the 3927 // bitmap knows by how much we need to move it as it knows its 3928 // granularity). 3929 MemRegion newRegion = 3930 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3931 3932 if (!newRegion.is_empty()) { 3933 if (_cm->verbose_low()) { 3934 gclog_or_tty->print_cr("[%d] recording unscanned region" 3935 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3936 _task_id, 3937 newRegion.start(), newRegion.end()); 3938 } 3939 // Now record the part of the region we didn't scan to 3940 // make sure this task scans it later. 3941 _aborted_region = newRegion; 3942 } 3943 // break from while 3944 mr = MemRegion(); 3945 } 3946 _region_finger = NULL; 3947 } 3948 3949 if (_cm->verbose_low()) 3950 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3951 _task_id, _cm->region_stack_size()); 3952 } 3953 } 3954 3955 void CMTask::print_stats() { 3956 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3957 _task_id, _calls); 3958 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3959 _elapsed_time_ms, _termination_time_ms); 3960 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3961 _step_times_ms.num(), _step_times_ms.avg(), 3962 _step_times_ms.sd()); 3963 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3964 _step_times_ms.maximum(), _step_times_ms.sum()); 3965 3966 #if _MARKING_STATS_ 3967 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3968 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3969 _all_clock_intervals_ms.sd()); 3970 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3971 _all_clock_intervals_ms.maximum(), 3972 _all_clock_intervals_ms.sum()); 3973 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3974 _clock_due_to_scanning, _clock_due_to_marking); 3975 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3976 _objs_scanned, _objs_found_on_bitmap); 3977 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3978 _local_pushes, _local_pops, _local_max_size); 3979 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3980 _global_pushes, _global_pops, _global_max_size); 3981 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3982 _global_transfers_to,_global_transfers_from); 3983 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 3984 _regions_claimed, _region_stack_pops); 3985 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3986 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3987 _steal_attempts, _steals); 3988 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3989 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3990 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3991 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3992 _aborted_timed_out, _aborted_satb, _aborted_termination); 3993 #endif // _MARKING_STATS_ 3994 } 3995 3996 /***************************************************************************** 3997 3998 The do_marking_step(time_target_ms) method is the building block 3999 of the parallel marking framework. It can be called in parallel 4000 with other invocations of do_marking_step() on different tasks 4001 (but only one per task, obviously) and concurrently with the 4002 mutator threads, or during remark, hence it eliminates the need 4003 for two versions of the code. When called during remark, it will 4004 pick up from where the task left off during the concurrent marking 4005 phase. Interestingly, tasks are also claimable during evacuation 4006 pauses too, since do_marking_step() ensures that it aborts before 4007 it needs to yield. 4008 4009 The data structures that is uses to do marking work are the 4010 following: 4011 4012 (1) Marking Bitmap. If there are gray objects that appear only 4013 on the bitmap (this happens either when dealing with an overflow 4014 or when the initial marking phase has simply marked the roots 4015 and didn't push them on the stack), then tasks claim heap 4016 regions whose bitmap they then scan to find gray objects. A 4017 global finger indicates where the end of the last claimed region 4018 is. A local finger indicates how far into the region a task has 4019 scanned. The two fingers are used to determine how to gray an 4020 object (i.e. whether simply marking it is OK, as it will be 4021 visited by a task in the future, or whether it needs to be also 4022 pushed on a stack). 4023 4024 (2) Local Queue. The local queue of the task which is accessed 4025 reasonably efficiently by the task. Other tasks can steal from 4026 it when they run out of work. Throughout the marking phase, a 4027 task attempts to keep its local queue short but not totally 4028 empty, so that entries are available for stealing by other 4029 tasks. Only when there is no more work, a task will totally 4030 drain its local queue. 4031 4032 (3) Global Mark Stack. This handles local queue overflow. During 4033 marking only sets of entries are moved between it and the local 4034 queues, as access to it requires a mutex and more fine-grain 4035 interaction with it which might cause contention. If it 4036 overflows, then the marking phase should restart and iterate 4037 over the bitmap to identify gray objects. Throughout the marking 4038 phase, tasks attempt to keep the global mark stack at a small 4039 length but not totally empty, so that entries are available for 4040 popping by other tasks. Only when there is no more work, tasks 4041 will totally drain the global mark stack. 4042 4043 (4) Global Region Stack. Entries on it correspond to areas of 4044 the bitmap that need to be scanned since they contain gray 4045 objects. Pushes on the region stack only happen during 4046 evacuation pauses and typically correspond to areas covered by 4047 GC LABS. If it overflows, then the marking phase should restart 4048 and iterate over the bitmap to identify gray objects. Tasks will 4049 try to totally drain the region stack as soon as possible. 4050 4051 (5) SATB Buffer Queue. This is where completed SATB buffers are 4052 made available. Buffers are regularly removed from this queue 4053 and scanned for roots, so that the queue doesn't get too 4054 long. During remark, all completed buffers are processed, as 4055 well as the filled in parts of any uncompleted buffers. 4056 4057 The do_marking_step() method tries to abort when the time target 4058 has been reached. There are a few other cases when the 4059 do_marking_step() method also aborts: 4060 4061 (1) When the marking phase has been aborted (after a Full GC). 4062 4063 (2) When a global overflow (either on the global stack or the 4064 region stack) has been triggered. Before the task aborts, it 4065 will actually sync up with the other tasks to ensure that all 4066 the marking data structures (local queues, stacks, fingers etc.) 4067 are re-initialised so that when do_marking_step() completes, 4068 the marking phase can immediately restart. 4069 4070 (3) When enough completed SATB buffers are available. The 4071 do_marking_step() method only tries to drain SATB buffers right 4072 at the beginning. So, if enough buffers are available, the 4073 marking step aborts and the SATB buffers are processed at 4074 the beginning of the next invocation. 4075 4076 (4) To yield. when we have to yield then we abort and yield 4077 right at the end of do_marking_step(). This saves us from a lot 4078 of hassle as, by yielding we might allow a Full GC. If this 4079 happens then objects will be compacted underneath our feet, the 4080 heap might shrink, etc. We save checking for this by just 4081 aborting and doing the yield right at the end. 4082 4083 From the above it follows that the do_marking_step() method should 4084 be called in a loop (or, otherwise, regularly) until it completes. 4085 4086 If a marking step completes without its has_aborted() flag being 4087 true, it means it has completed the current marking phase (and 4088 also all other marking tasks have done so and have all synced up). 4089 4090 A method called regular_clock_call() is invoked "regularly" (in 4091 sub ms intervals) throughout marking. It is this clock method that 4092 checks all the abort conditions which were mentioned above and 4093 decides when the task should abort. A work-based scheme is used to 4094 trigger this clock method: when the number of object words the 4095 marking phase has scanned or the number of references the marking 4096 phase has visited reach a given limit. Additional invocations to 4097 the method clock have been planted in a few other strategic places 4098 too. The initial reason for the clock method was to avoid calling 4099 vtime too regularly, as it is quite expensive. So, once it was in 4100 place, it was natural to piggy-back all the other conditions on it 4101 too and not constantly check them throughout the code. 4102 4103 *****************************************************************************/ 4104 4105 void CMTask::do_marking_step(double time_target_ms, 4106 bool do_stealing, 4107 bool do_termination) { 4108 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4109 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4110 4111 assert(concurrent() || _cm->region_stack_empty(), 4112 "the region stack should have been cleared before remark"); 4113 assert(concurrent() || !_cm->has_aborted_regions(), 4114 "aborted regions should have been cleared before remark"); 4115 assert(_region_finger == NULL, 4116 "this should be non-null only when a region is being scanned"); 4117 4118 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4119 assert(_task_queues != NULL, "invariant"); 4120 assert(_task_queue != NULL, "invariant"); 4121 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4122 4123 assert(!_claimed, 4124 "only one thread should claim this task at any one time"); 4125 4126 // OK, this doesn't safeguard again all possible scenarios, as it is 4127 // possible for two threads to set the _claimed flag at the same 4128 // time. But it is only for debugging purposes anyway and it will 4129 // catch most problems. 4130 _claimed = true; 4131 4132 _start_time_ms = os::elapsedVTime() * 1000.0; 4133 statsOnly( _interval_start_time_ms = _start_time_ms ); 4134 4135 double diff_prediction_ms = 4136 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4137 _time_target_ms = time_target_ms - diff_prediction_ms; 4138 4139 // set up the variables that are used in the work-based scheme to 4140 // call the regular clock method 4141 _words_scanned = 0; 4142 _refs_reached = 0; 4143 recalculate_limits(); 4144 4145 // clear all flags 4146 clear_has_aborted(); 4147 _has_timed_out = false; 4148 _draining_satb_buffers = false; 4149 4150 ++_calls; 4151 4152 if (_cm->verbose_low()) 4153 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4154 "target = %1.2lfms >>>>>>>>>>", 4155 _task_id, _calls, _time_target_ms); 4156 4157 // Set up the bitmap and oop closures. Anything that uses them is 4158 // eventually called from this method, so it is OK to allocate these 4159 // statically. 4160 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4161 CMOopClosure oop_closure(_g1h, _cm, this); 4162 set_oop_closure(&oop_closure); 4163 4164 if (_cm->has_overflown()) { 4165 // This can happen if the region stack or the mark stack overflows 4166 // during a GC pause and this task, after a yield point, 4167 // restarts. We have to abort as we need to get into the overflow 4168 // protocol which happens right at the end of this task. 4169 set_has_aborted(); 4170 } 4171 4172 // First drain any available SATB buffers. After this, we will not 4173 // look at SATB buffers before the next invocation of this method. 4174 // If enough completed SATB buffers are queued up, the regular clock 4175 // will abort this task so that it restarts. 4176 drain_satb_buffers(); 4177 // ...then partially drain the local queue and the global stack 4178 drain_local_queue(true); 4179 drain_global_stack(true); 4180 4181 // Then totally drain the region stack. We will not look at 4182 // it again before the next invocation of this method. Entries on 4183 // the region stack are only added during evacuation pauses, for 4184 // which we have to yield. When we do, we abort the task anyway so 4185 // it will look at the region stack again when it restarts. 4186 bitmap_closure.set_scanning_heap_region(false); 4187 drain_region_stack(&bitmap_closure); 4188 // ...then partially drain the local queue and the global stack 4189 drain_local_queue(true); 4190 drain_global_stack(true); 4191 4192 do { 4193 if (!has_aborted() && _curr_region != NULL) { 4194 // This means that we're already holding on to a region. 4195 assert(_finger != NULL, "if region is not NULL, then the finger " 4196 "should not be NULL either"); 4197 4198 // We might have restarted this task after an evacuation pause 4199 // which might have evacuated the region we're holding on to 4200 // underneath our feet. Let's read its limit again to make sure 4201 // that we do not iterate over a region of the heap that 4202 // contains garbage (update_region_limit() will also move 4203 // _finger to the start of the region if it is found empty). 4204 update_region_limit(); 4205 // We will start from _finger not from the start of the region, 4206 // as we might be restarting this task after aborting half-way 4207 // through scanning this region. In this case, _finger points to 4208 // the address where we last found a marked object. If this is a 4209 // fresh region, _finger points to start(). 4210 MemRegion mr = MemRegion(_finger, _region_limit); 4211 4212 if (_cm->verbose_low()) 4213 gclog_or_tty->print_cr("[%d] we're scanning part " 4214 "["PTR_FORMAT", "PTR_FORMAT") " 4215 "of region "PTR_FORMAT, 4216 _task_id, _finger, _region_limit, _curr_region); 4217 4218 // Let's iterate over the bitmap of the part of the 4219 // region that is left. 4220 bitmap_closure.set_scanning_heap_region(true); 4221 if (mr.is_empty() || 4222 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4223 // We successfully completed iterating over the region. Now, 4224 // let's give up the region. 4225 giveup_current_region(); 4226 regular_clock_call(); 4227 } else { 4228 assert(has_aborted(), "currently the only way to do so"); 4229 // The only way to abort the bitmap iteration is to return 4230 // false from the do_bit() method. However, inside the 4231 // do_bit() method we move the _finger to point to the 4232 // object currently being looked at. So, if we bail out, we 4233 // have definitely set _finger to something non-null. 4234 assert(_finger != NULL, "invariant"); 4235 4236 // Region iteration was actually aborted. So now _finger 4237 // points to the address of the object we last scanned. If we 4238 // leave it there, when we restart this task, we will rescan 4239 // the object. It is easy to avoid this. We move the finger by 4240 // enough to point to the next possible object header (the 4241 // bitmap knows by how much we need to move it as it knows its 4242 // granularity). 4243 assert(_finger < _region_limit, "invariant"); 4244 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4245 // Check if bitmap iteration was aborted while scanning the last object 4246 if (new_finger >= _region_limit) { 4247 giveup_current_region(); 4248 } else { 4249 move_finger_to(new_finger); 4250 } 4251 } 4252 } 4253 // At this point we have either completed iterating over the 4254 // region we were holding on to, or we have aborted. 4255 4256 // We then partially drain the local queue and the global stack. 4257 // (Do we really need this?) 4258 drain_local_queue(true); 4259 drain_global_stack(true); 4260 4261 // Read the note on the claim_region() method on why it might 4262 // return NULL with potentially more regions available for 4263 // claiming and why we have to check out_of_regions() to determine 4264 // whether we're done or not. 4265 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4266 // We are going to try to claim a new region. We should have 4267 // given up on the previous one. 4268 // Separated the asserts so that we know which one fires. 4269 assert(_curr_region == NULL, "invariant"); 4270 assert(_finger == NULL, "invariant"); 4271 assert(_region_limit == NULL, "invariant"); 4272 if (_cm->verbose_low()) 4273 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4274 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4275 if (claimed_region != NULL) { 4276 // Yes, we managed to claim one 4277 statsOnly( ++_regions_claimed ); 4278 4279 if (_cm->verbose_low()) 4280 gclog_or_tty->print_cr("[%d] we successfully claimed " 4281 "region "PTR_FORMAT, 4282 _task_id, claimed_region); 4283 4284 setup_for_region(claimed_region); 4285 assert(_curr_region == claimed_region, "invariant"); 4286 } 4287 // It is important to call the regular clock here. It might take 4288 // a while to claim a region if, for example, we hit a large 4289 // block of empty regions. So we need to call the regular clock 4290 // method once round the loop to make sure it's called 4291 // frequently enough. 4292 regular_clock_call(); 4293 } 4294 4295 if (!has_aborted() && _curr_region == NULL) { 4296 assert(_cm->out_of_regions(), 4297 "at this point we should be out of regions"); 4298 } 4299 } while ( _curr_region != NULL && !has_aborted()); 4300 4301 if (!has_aborted()) { 4302 // We cannot check whether the global stack is empty, since other 4303 // tasks might be pushing objects to it concurrently. We also cannot 4304 // check if the region stack is empty because if a thread is aborting 4305 // it can push a partially done region back. 4306 assert(_cm->out_of_regions(), 4307 "at this point we should be out of regions"); 4308 4309 if (_cm->verbose_low()) 4310 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4311 4312 // Try to reduce the number of available SATB buffers so that 4313 // remark has less work to do. 4314 drain_satb_buffers(); 4315 } 4316 4317 // Since we've done everything else, we can now totally drain the 4318 // local queue and global stack. 4319 drain_local_queue(false); 4320 drain_global_stack(false); 4321 4322 // Attempt at work stealing from other task's queues. 4323 if (do_stealing && !has_aborted()) { 4324 // We have not aborted. This means that we have finished all that 4325 // we could. Let's try to do some stealing... 4326 4327 // We cannot check whether the global stack is empty, since other 4328 // tasks might be pushing objects to it concurrently. We also cannot 4329 // check if the region stack is empty because if a thread is aborting 4330 // it can push a partially done region back. 4331 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4332 "only way to reach here"); 4333 4334 if (_cm->verbose_low()) 4335 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4336 4337 while (!has_aborted()) { 4338 oop obj; 4339 statsOnly( ++_steal_attempts ); 4340 4341 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4342 if (_cm->verbose_medium()) 4343 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4344 _task_id, (void*) obj); 4345 4346 statsOnly( ++_steals ); 4347 4348 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4349 "any stolen object should be marked"); 4350 scan_object(obj); 4351 4352 // And since we're towards the end, let's totally drain the 4353 // local queue and global stack. 4354 drain_local_queue(false); 4355 drain_global_stack(false); 4356 } else { 4357 break; 4358 } 4359 } 4360 } 4361 4362 // If we are about to wrap up and go into termination, check if we 4363 // should raise the overflow flag. 4364 if (do_termination && !has_aborted()) { 4365 if (_cm->force_overflow()->should_force()) { 4366 _cm->set_has_overflown(); 4367 regular_clock_call(); 4368 } 4369 } 4370 4371 // We still haven't aborted. Now, let's try to get into the 4372 // termination protocol. 4373 if (do_termination && !has_aborted()) { 4374 // We cannot check whether the global stack is empty, since other 4375 // tasks might be concurrently pushing objects on it. We also cannot 4376 // check if the region stack is empty because if a thread is aborting 4377 // it can push a partially done region back. 4378 // Separated the asserts so that we know which one fires. 4379 assert(_cm->out_of_regions(), "only way to reach here"); 4380 assert(_task_queue->size() == 0, "only way to reach here"); 4381 4382 if (_cm->verbose_low()) 4383 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4384 4385 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4386 // The CMTask class also extends the TerminatorTerminator class, 4387 // hence its should_exit_termination() method will also decide 4388 // whether to exit the termination protocol or not. 4389 bool finished = _cm->terminator()->offer_termination(this); 4390 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4391 _termination_time_ms += 4392 termination_end_time_ms - _termination_start_time_ms; 4393 4394 if (finished) { 4395 // We're all done. 4396 4397 if (_task_id == 0) { 4398 // let's allow task 0 to do this 4399 if (concurrent()) { 4400 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4401 // we need to set this to false before the next 4402 // safepoint. This way we ensure that the marking phase 4403 // doesn't observe any more heap expansions. 4404 _cm->clear_concurrent_marking_in_progress(); 4405 } 4406 } 4407 4408 // We can now guarantee that the global stack is empty, since 4409 // all other tasks have finished. We separated the guarantees so 4410 // that, if a condition is false, we can immediately find out 4411 // which one. 4412 guarantee(_cm->out_of_regions(), "only way to reach here"); 4413 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4414 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4415 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4416 guarantee(_task_queue->size() == 0, "only way to reach here"); 4417 guarantee(!_cm->has_overflown(), "only way to reach here"); 4418 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4419 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4420 4421 if (_cm->verbose_low()) 4422 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4423 } else { 4424 // Apparently there's more work to do. Let's abort this task. It 4425 // will restart it and we can hopefully find more things to do. 4426 4427 if (_cm->verbose_low()) 4428 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); 4429 4430 set_has_aborted(); 4431 statsOnly( ++_aborted_termination ); 4432 } 4433 } 4434 4435 // Mainly for debugging purposes to make sure that a pointer to the 4436 // closure which was statically allocated in this frame doesn't 4437 // escape it by accident. 4438 set_oop_closure(NULL); 4439 double end_time_ms = os::elapsedVTime() * 1000.0; 4440 double elapsed_time_ms = end_time_ms - _start_time_ms; 4441 // Update the step history. 4442 _step_times_ms.add(elapsed_time_ms); 4443 4444 if (has_aborted()) { 4445 // The task was aborted for some reason. 4446 4447 statsOnly( ++_aborted ); 4448 4449 if (_has_timed_out) { 4450 double diff_ms = elapsed_time_ms - _time_target_ms; 4451 // Keep statistics of how well we did with respect to hitting 4452 // our target only if we actually timed out (if we aborted for 4453 // other reasons, then the results might get skewed). 4454 _marking_step_diffs_ms.add(diff_ms); 4455 } 4456 4457 if (_cm->has_overflown()) { 4458 // This is the interesting one. We aborted because a global 4459 // overflow was raised. This means we have to restart the 4460 // marking phase and start iterating over regions. However, in 4461 // order to do this we have to make sure that all tasks stop 4462 // what they are doing and re-initialise in a safe manner. We 4463 // will achieve this with the use of two barrier sync points. 4464 4465 if (_cm->verbose_low()) 4466 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4467 4468 _cm->enter_first_sync_barrier(_task_id); 4469 // When we exit this sync barrier we know that all tasks have 4470 // stopped doing marking work. So, it's now safe to 4471 // re-initialise our data structures. At the end of this method, 4472 // task 0 will clear the global data structures. 4473 4474 statsOnly( ++_aborted_overflow ); 4475 4476 // We clear the local state of this task... 4477 clear_region_fields(); 4478 4479 // ...and enter the second barrier. 4480 _cm->enter_second_sync_barrier(_task_id); 4481 // At this point everything has bee re-initialised and we're 4482 // ready to restart. 4483 } 4484 4485 if (_cm->verbose_low()) { 4486 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4487 "elapsed = %1.2lfms <<<<<<<<<<", 4488 _task_id, _time_target_ms, elapsed_time_ms); 4489 if (_cm->has_aborted()) 4490 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4491 _task_id); 4492 } 4493 } else { 4494 if (_cm->verbose_low()) 4495 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4496 "elapsed = %1.2lfms <<<<<<<<<<", 4497 _task_id, _time_target_ms, elapsed_time_ms); 4498 } 4499 4500 _claimed = false; 4501 } 4502 4503 CMTask::CMTask(int task_id, 4504 ConcurrentMark* cm, 4505 CMTaskQueue* task_queue, 4506 CMTaskQueueSet* task_queues) 4507 : _g1h(G1CollectedHeap::heap()), 4508 _task_id(task_id), _cm(cm), 4509 _claimed(false), 4510 _nextMarkBitMap(NULL), _hash_seed(17), 4511 _task_queue(task_queue), 4512 _task_queues(task_queues), 4513 _oop_closure(NULL), 4514 _aborted_region(MemRegion()) { 4515 guarantee(task_queue != NULL, "invariant"); 4516 guarantee(task_queues != NULL, "invariant"); 4517 4518 statsOnly( _clock_due_to_scanning = 0; 4519 _clock_due_to_marking = 0 ); 4520 4521 _marking_step_diffs_ms.add(0.5); 4522 } 4523 4524 // These are formatting macros that are used below to ensure 4525 // consistent formatting. The *_H_* versions are used to format the 4526 // header for a particular value and they should be kept consistent 4527 // with the corresponding macro. Also note that most of the macros add 4528 // the necessary white space (as a prefix) which makes them a bit 4529 // easier to compose. 4530 4531 // All the output lines are prefixed with this string to be able to 4532 // identify them easily in a large log file. 4533 #define G1PPRL_LINE_PREFIX "###" 4534 4535 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4536 #ifdef _LP64 4537 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4538 #else // _LP64 4539 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4540 #endif // _LP64 4541 4542 // For per-region info 4543 #define G1PPRL_TYPE_FORMAT " %-4s" 4544 #define G1PPRL_TYPE_H_FORMAT " %4s" 4545 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4546 #define G1PPRL_BYTE_H_FORMAT " %9s" 4547 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4548 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4549 4550 // For summary info 4551 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4552 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4553 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4554 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4555 4556 G1PrintRegionLivenessInfoClosure:: 4557 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4558 : _out(out), 4559 _total_used_bytes(0), _total_capacity_bytes(0), 4560 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4561 _hum_used_bytes(0), _hum_capacity_bytes(0), 4562 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4563 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4564 MemRegion g1_committed = g1h->g1_committed(); 4565 MemRegion g1_reserved = g1h->g1_reserved(); 4566 double now = os::elapsedTime(); 4567 4568 // Print the header of the output. 4569 _out->cr(); 4570 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4571 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4572 G1PPRL_SUM_ADDR_FORMAT("committed") 4573 G1PPRL_SUM_ADDR_FORMAT("reserved") 4574 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4575 g1_committed.start(), g1_committed.end(), 4576 g1_reserved.start(), g1_reserved.end(), 4577 HeapRegion::GrainBytes); 4578 _out->print_cr(G1PPRL_LINE_PREFIX); 4579 _out->print_cr(G1PPRL_LINE_PREFIX 4580 G1PPRL_TYPE_H_FORMAT 4581 G1PPRL_ADDR_BASE_H_FORMAT 4582 G1PPRL_BYTE_H_FORMAT 4583 G1PPRL_BYTE_H_FORMAT 4584 G1PPRL_BYTE_H_FORMAT 4585 G1PPRL_DOUBLE_H_FORMAT, 4586 "type", "address-range", 4587 "used", "prev-live", "next-live", "gc-eff"); 4588 } 4589 4590 // It takes as a parameter a reference to one of the _hum_* fields, it 4591 // deduces the corresponding value for a region in a humongous region 4592 // series (either the region size, or what's left if the _hum_* field 4593 // is < the region size), and updates the _hum_* field accordingly. 4594 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4595 size_t bytes = 0; 4596 // The > 0 check is to deal with the prev and next live bytes which 4597 // could be 0. 4598 if (*hum_bytes > 0) { 4599 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes); 4600 *hum_bytes -= bytes; 4601 } 4602 return bytes; 4603 } 4604 4605 // It deduces the values for a region in a humongous region series 4606 // from the _hum_* fields and updates those accordingly. It assumes 4607 // that that _hum_* fields have already been set up from the "starts 4608 // humongous" region and we visit the regions in address order. 4609 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4610 size_t* capacity_bytes, 4611 size_t* prev_live_bytes, 4612 size_t* next_live_bytes) { 4613 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4614 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4615 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4616 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4617 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4618 } 4619 4620 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4621 const char* type = ""; 4622 HeapWord* bottom = r->bottom(); 4623 HeapWord* end = r->end(); 4624 size_t capacity_bytes = r->capacity(); 4625 size_t used_bytes = r->used(); 4626 size_t prev_live_bytes = r->live_bytes(); 4627 size_t next_live_bytes = r->next_live_bytes(); 4628 double gc_eff = r->gc_efficiency(); 4629 if (r->used() == 0) { 4630 type = "FREE"; 4631 } else if (r->is_survivor()) { 4632 type = "SURV"; 4633 } else if (r->is_young()) { 4634 type = "EDEN"; 4635 } else if (r->startsHumongous()) { 4636 type = "HUMS"; 4637 4638 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4639 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4640 "they should have been zeroed after the last time we used them"); 4641 // Set up the _hum_* fields. 4642 _hum_capacity_bytes = capacity_bytes; 4643 _hum_used_bytes = used_bytes; 4644 _hum_prev_live_bytes = prev_live_bytes; 4645 _hum_next_live_bytes = next_live_bytes; 4646 get_hum_bytes(&used_bytes, &capacity_bytes, 4647 &prev_live_bytes, &next_live_bytes); 4648 end = bottom + HeapRegion::GrainWords; 4649 } else if (r->continuesHumongous()) { 4650 type = "HUMC"; 4651 get_hum_bytes(&used_bytes, &capacity_bytes, 4652 &prev_live_bytes, &next_live_bytes); 4653 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4654 } else { 4655 type = "OLD"; 4656 } 4657 4658 _total_used_bytes += used_bytes; 4659 _total_capacity_bytes += capacity_bytes; 4660 _total_prev_live_bytes += prev_live_bytes; 4661 _total_next_live_bytes += next_live_bytes; 4662 4663 // Print a line for this particular region. 4664 _out->print_cr(G1PPRL_LINE_PREFIX 4665 G1PPRL_TYPE_FORMAT 4666 G1PPRL_ADDR_BASE_FORMAT 4667 G1PPRL_BYTE_FORMAT 4668 G1PPRL_BYTE_FORMAT 4669 G1PPRL_BYTE_FORMAT 4670 G1PPRL_DOUBLE_FORMAT, 4671 type, bottom, end, 4672 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4673 4674 return false; 4675 } 4676 4677 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4678 // Print the footer of the output. 4679 _out->print_cr(G1PPRL_LINE_PREFIX); 4680 _out->print_cr(G1PPRL_LINE_PREFIX 4681 " SUMMARY" 4682 G1PPRL_SUM_MB_FORMAT("capacity") 4683 G1PPRL_SUM_MB_PERC_FORMAT("used") 4684 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4685 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4686 bytes_to_mb(_total_capacity_bytes), 4687 bytes_to_mb(_total_used_bytes), 4688 perc(_total_used_bytes, _total_capacity_bytes), 4689 bytes_to_mb(_total_prev_live_bytes), 4690 perc(_total_prev_live_bytes, _total_capacity_bytes), 4691 bytes_to_mb(_total_next_live_bytes), 4692 perc(_total_next_live_bytes, _total_capacity_bytes)); 4693 _out->cr(); 4694 }