1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1RemSet.hpp" 32 #include "gc_implementation/g1/heapRegionRemSet.hpp" 33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 34 #include "gc_implementation/shared/vmGCOperations.hpp" 35 #include "memory/genOopClosures.inline.hpp" 36 #include "memory/referencePolicy.hpp" 37 #include "memory/resourceArea.hpp" 38 #include "oops/oop.inline.hpp" 39 #include "runtime/handles.inline.hpp" 40 #include "runtime/java.hpp" 41 42 // 43 // CMS Bit Map Wrapper 44 45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 46 _bm((uintptr_t*)NULL,0), 47 _shifter(shifter) { 48 _bmStartWord = (HeapWord*)(rs.base()); 49 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 50 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 51 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 52 53 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 54 // For now we'll just commit all of the bit map up fromt. 55 // Later on we'll try to be more parsimonious with swap. 56 guarantee(_virtual_space.initialize(brs, brs.size()), 57 "couldn't reseve backing store for CMS bit map"); 58 assert(_virtual_space.committed_size() == brs.size(), 59 "didn't reserve backing store for all of CMS bit map?"); 60 _bm.set_map((uintptr_t*)_virtual_space.low()); 61 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 62 _bmWordSize, "inconsistency in bit map sizing"); 63 _bm.set_size(_bmWordSize >> _shifter); 64 } 65 66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 67 HeapWord* limit) const { 68 // First we must round addr *up* to a possible object boundary. 69 addr = (HeapWord*)align_size_up((intptr_t)addr, 70 HeapWordSize << _shifter); 71 size_t addrOffset = heapWordToOffset(addr); 72 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 73 size_t limitOffset = heapWordToOffset(limit); 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 76 assert(nextAddr >= addr, "get_next_one postcondition"); 77 assert(nextAddr == limit || isMarked(nextAddr), 78 "get_next_one postcondition"); 79 return nextAddr; 80 } 81 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 83 HeapWord* limit) const { 84 size_t addrOffset = heapWordToOffset(addr); 85 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 86 size_t limitOffset = heapWordToOffset(limit); 87 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 88 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 89 assert(nextAddr >= addr, "get_next_one postcondition"); 90 assert(nextAddr == limit || !isMarked(nextAddr), 91 "get_next_one postcondition"); 92 return nextAddr; 93 } 94 95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 96 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 97 return (int) (diff >> _shifter); 98 } 99 100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 101 HeapWord* left = MAX2(_bmStartWord, mr.start()); 102 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 103 if (right > left) { 104 // Right-open interval [leftOffset, rightOffset). 105 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 106 } else { 107 return true; 108 } 109 } 110 111 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 112 size_t from_start_index, 113 HeapWord* to_start_word, 114 size_t word_num) { 115 _bm.mostly_disjoint_range_union(from_bitmap, 116 from_start_index, 117 heapWordToOffset(to_start_word), 118 word_num); 119 } 120 121 #ifndef PRODUCT 122 bool CMBitMapRO::covers(ReservedSpace rs) const { 123 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 124 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 125 "size inconsistency"); 126 return _bmStartWord == (HeapWord*)(rs.base()) && 127 _bmWordSize == rs.size()>>LogHeapWordSize; 128 } 129 #endif 130 131 void CMBitMap::clearAll() { 132 _bm.clear(); 133 return; 134 } 135 136 void CMBitMap::markRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 140 ((HeapWord *) mr.end())), 141 "markRange memory region end is not card aligned"); 142 // convert address range into offset range 143 _bm.at_put_range(heapWordToOffset(mr.start()), 144 heapWordToOffset(mr.end()), true); 145 } 146 147 void CMBitMap::clearRange(MemRegion mr) { 148 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 149 assert(!mr.is_empty(), "unexpected empty region"); 150 // convert address range into offset range 151 _bm.at_put_range(heapWordToOffset(mr.start()), 152 heapWordToOffset(mr.end()), false); 153 } 154 155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 156 HeapWord* end_addr) { 157 HeapWord* start = getNextMarkedWordAddress(addr); 158 start = MIN2(start, end_addr); 159 HeapWord* end = getNextUnmarkedWordAddress(start); 160 end = MIN2(end, end_addr); 161 assert(start <= end, "Consistency check"); 162 MemRegion mr(start, end); 163 if (!mr.is_empty()) { 164 clearRange(mr); 165 } 166 return mr; 167 } 168 169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 170 _base(NULL), _cm(cm) 171 #ifdef ASSERT 172 , _drain_in_progress(false) 173 , _drain_in_progress_yields(false) 174 #endif 175 {} 176 177 void CMMarkStack::allocate(size_t size) { 178 _base = NEW_C_HEAP_ARRAY(oop, size); 179 if (_base == NULL) 180 vm_exit_during_initialization("Failed to allocate " 181 "CM region mark stack"); 182 _index = 0; 183 // QQQQ cast ... 184 _capacity = (jint) size; 185 _oops_do_bound = -1; 186 NOT_PRODUCT(_max_depth = 0); 187 } 188 189 CMMarkStack::~CMMarkStack() { 190 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 191 } 192 193 void CMMarkStack::par_push(oop ptr) { 194 while (true) { 195 if (isFull()) { 196 _overflow = true; 197 return; 198 } 199 // Otherwise... 200 jint index = _index; 201 jint next_index = index+1; 202 jint res = Atomic::cmpxchg(next_index, &_index, index); 203 if (res == index) { 204 _base[index] = ptr; 205 // Note that we don't maintain this atomically. We could, but it 206 // doesn't seem necessary. 207 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 208 return; 209 } 210 // Otherwise, we need to try again. 211 } 212 } 213 214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 215 while (true) { 216 if (isFull()) { 217 _overflow = true; 218 return; 219 } 220 // Otherwise... 221 jint index = _index; 222 jint next_index = index + n; 223 if (next_index > _capacity) { 224 _overflow = true; 225 return; 226 } 227 jint res = Atomic::cmpxchg(next_index, &_index, index); 228 if (res == index) { 229 for (int i = 0; i < n; i++) { 230 int ind = index + i; 231 assert(ind < _capacity, "By overflow test above."); 232 _base[ind] = ptr_arr[i]; 233 } 234 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 235 return; 236 } 237 // Otherwise, we need to try again. 238 } 239 } 240 241 242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 243 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 244 jint start = _index; 245 jint next_index = start + n; 246 if (next_index > _capacity) { 247 _overflow = true; 248 return; 249 } 250 // Otherwise. 251 _index = next_index; 252 for (int i = 0; i < n; i++) { 253 int ind = start + i; 254 assert(ind < _capacity, "By overflow test above."); 255 _base[ind] = ptr_arr[i]; 256 } 257 } 258 259 260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 261 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 262 jint index = _index; 263 if (index == 0) { 264 *n = 0; 265 return false; 266 } else { 267 int k = MIN2(max, index); 268 jint new_ind = index - k; 269 for (int j = 0; j < k; j++) { 270 ptr_arr[j] = _base[new_ind + j]; 271 } 272 _index = new_ind; 273 *n = k; 274 return true; 275 } 276 } 277 278 279 CMRegionStack::CMRegionStack() : _base(NULL) {} 280 281 void CMRegionStack::allocate(size_t size) { 282 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 283 if (_base == NULL) 284 vm_exit_during_initialization("Failed to allocate " 285 "CM region mark stack"); 286 _index = 0; 287 // QQQQ cast ... 288 _capacity = (jint) size; 289 } 290 291 CMRegionStack::~CMRegionStack() { 292 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 293 } 294 295 void CMRegionStack::push_lock_free(MemRegion mr) { 296 assert(mr.word_size() > 0, "Precondition"); 297 while (true) { 298 jint index = _index; 299 300 if (index >= _capacity) { 301 _overflow = true; 302 return; 303 } 304 // Otherwise... 305 jint next_index = index+1; 306 jint res = Atomic::cmpxchg(next_index, &_index, index); 307 if (res == index) { 308 _base[index] = mr; 309 return; 310 } 311 // Otherwise, we need to try again. 312 } 313 } 314 315 // Lock-free pop of the region stack. Called during the concurrent 316 // marking / remark phases. Should only be called in tandem with 317 // other lock-free pops. 318 MemRegion CMRegionStack::pop_lock_free() { 319 while (true) { 320 jint index = _index; 321 322 if (index == 0) { 323 return MemRegion(); 324 } 325 // Otherwise... 326 jint next_index = index-1; 327 jint res = Atomic::cmpxchg(next_index, &_index, index); 328 if (res == index) { 329 MemRegion mr = _base[next_index]; 330 if (mr.start() != NULL) { 331 assert(mr.end() != NULL, "invariant"); 332 assert(mr.word_size() > 0, "invariant"); 333 return mr; 334 } else { 335 // that entry was invalidated... let's skip it 336 assert(mr.end() == NULL, "invariant"); 337 } 338 } 339 // Otherwise, we need to try again. 340 } 341 } 342 343 #if 0 344 // The routines that manipulate the region stack with a lock are 345 // not currently used. They should be retained, however, as a 346 // diagnostic aid. 347 348 void CMRegionStack::push_with_lock(MemRegion mr) { 349 assert(mr.word_size() > 0, "Precondition"); 350 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 351 352 if (isFull()) { 353 _overflow = true; 354 return; 355 } 356 357 _base[_index] = mr; 358 _index += 1; 359 } 360 361 MemRegion CMRegionStack::pop_with_lock() { 362 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 363 364 while (true) { 365 if (_index == 0) { 366 return MemRegion(); 367 } 368 _index -= 1; 369 370 MemRegion mr = _base[_index]; 371 if (mr.start() != NULL) { 372 assert(mr.end() != NULL, "invariant"); 373 assert(mr.word_size() > 0, "invariant"); 374 return mr; 375 } else { 376 // that entry was invalidated... let's skip it 377 assert(mr.end() == NULL, "invariant"); 378 } 379 } 380 } 381 #endif 382 383 bool CMRegionStack::invalidate_entries_into_cset() { 384 bool result = false; 385 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 386 for (int i = 0; i < _oops_do_bound; ++i) { 387 MemRegion mr = _base[i]; 388 if (mr.start() != NULL) { 389 assert(mr.end() != NULL, "invariant"); 390 assert(mr.word_size() > 0, "invariant"); 391 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 392 assert(hr != NULL, "invariant"); 393 if (hr->in_collection_set()) { 394 // The region points into the collection set 395 _base[i] = MemRegion(); 396 result = true; 397 } 398 } else { 399 // that entry was invalidated... let's skip it 400 assert(mr.end() == NULL, "invariant"); 401 } 402 } 403 return result; 404 } 405 406 template<class OopClosureClass> 407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 408 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 409 || SafepointSynchronize::is_at_safepoint(), 410 "Drain recursion must be yield-safe."); 411 bool res = true; 412 debug_only(_drain_in_progress = true); 413 debug_only(_drain_in_progress_yields = yield_after); 414 while (!isEmpty()) { 415 oop newOop = pop(); 416 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 417 assert(newOop->is_oop(), "Expected an oop"); 418 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 419 "only grey objects on this stack"); 420 // iterate over the oops in this oop, marking and pushing 421 // the ones in CMS generation. 422 newOop->oop_iterate(cl); 423 if (yield_after && _cm->do_yield_check()) { 424 res = false; break; 425 } 426 } 427 debug_only(_drain_in_progress = false); 428 return res; 429 } 430 431 void CMMarkStack::oops_do(OopClosure* f) { 432 if (_index == 0) return; 433 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 434 "Bound must be set."); 435 for (int i = 0; i < _oops_do_bound; i++) { 436 f->do_oop(&_base[i]); 437 } 438 _oops_do_bound = -1; 439 } 440 441 bool ConcurrentMark::not_yet_marked(oop obj) const { 442 return (_g1h->is_obj_ill(obj) 443 || (_g1h->is_in_permanent(obj) 444 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 445 } 446 447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 449 #endif // _MSC_VER 450 451 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 452 int max_regions) : 453 _markBitMap1(rs, MinObjAlignment - 1), 454 _markBitMap2(rs, MinObjAlignment - 1), 455 456 _parallel_marking_threads(0), 457 _sleep_factor(0.0), 458 _marking_task_overhead(1.0), 459 _cleanup_sleep_factor(0.0), 460 _cleanup_task_overhead(1.0), 461 _cleanup_list("Cleanup List"), 462 _region_bm(max_regions, false /* in_resource_area*/), 463 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 464 CardTableModRefBS::card_shift, 465 false /* in_resource_area*/), 466 _prevMarkBitMap(&_markBitMap1), 467 _nextMarkBitMap(&_markBitMap2), 468 _at_least_one_mark_complete(false), 469 470 _markStack(this), 471 _regionStack(), 472 // _finger set in set_non_marking_state 473 474 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 475 // _active_tasks set in set_non_marking_state 476 // _tasks set inside the constructor 477 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 478 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 479 480 _has_overflown(false), 481 _concurrent(false), 482 _has_aborted(false), 483 _restart_for_overflow(false), 484 _concurrent_marking_in_progress(false), 485 _should_gray_objects(false), 486 487 // _verbose_level set below 488 489 _init_times(), 490 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 491 _cleanup_times(), 492 _total_counting_time(0.0), 493 _total_rs_scrub_time(0.0), 494 495 _parallel_workers(NULL) 496 { 497 CMVerboseLevel verbose_level = 498 (CMVerboseLevel) G1MarkingVerboseLevel; 499 if (verbose_level < no_verbose) 500 verbose_level = no_verbose; 501 if (verbose_level > high_verbose) 502 verbose_level = high_verbose; 503 _verbose_level = verbose_level; 504 505 if (verbose_low()) 506 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 507 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 508 509 _markStack.allocate(MarkStackSize); 510 _regionStack.allocate(G1MarkRegionStackSize); 511 512 // Create & start a ConcurrentMark thread. 513 _cmThread = new ConcurrentMarkThread(this); 514 assert(cmThread() != NULL, "CM Thread should have been created"); 515 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 516 517 _g1h = G1CollectedHeap::heap(); 518 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 519 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 520 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 521 522 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 523 satb_qs.set_buffer_size(G1SATBBufferSize); 524 525 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 526 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 527 528 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 529 _active_tasks = _max_task_num; 530 for (int i = 0; i < (int) _max_task_num; ++i) { 531 CMTaskQueue* task_queue = new CMTaskQueue(); 532 task_queue->initialize(); 533 _task_queues->register_queue(i, task_queue); 534 535 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 536 _accum_task_vtime[i] = 0.0; 537 } 538 539 if (ConcGCThreads > ParallelGCThreads) { 540 vm_exit_during_initialization("Can't have more ConcGCThreads " 541 "than ParallelGCThreads."); 542 } 543 if (ParallelGCThreads == 0) { 544 // if we are not running with any parallel GC threads we will not 545 // spawn any marking threads either 546 _parallel_marking_threads = 0; 547 _sleep_factor = 0.0; 548 _marking_task_overhead = 1.0; 549 } else { 550 if (ConcGCThreads > 0) { 551 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 552 // if both are set 553 554 _parallel_marking_threads = ConcGCThreads; 555 _sleep_factor = 0.0; 556 _marking_task_overhead = 1.0; 557 } else if (G1MarkingOverheadPercent > 0) { 558 // we will calculate the number of parallel marking threads 559 // based on a target overhead with respect to the soft real-time 560 // goal 561 562 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 563 double overall_cm_overhead = 564 (double) MaxGCPauseMillis * marking_overhead / 565 (double) GCPauseIntervalMillis; 566 double cpu_ratio = 1.0 / (double) os::processor_count(); 567 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 568 double marking_task_overhead = 569 overall_cm_overhead / marking_thread_num * 570 (double) os::processor_count(); 571 double sleep_factor = 572 (1.0 - marking_task_overhead) / marking_task_overhead; 573 574 _parallel_marking_threads = (size_t) marking_thread_num; 575 _sleep_factor = sleep_factor; 576 _marking_task_overhead = marking_task_overhead; 577 } else { 578 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 579 _sleep_factor = 0.0; 580 _marking_task_overhead = 1.0; 581 } 582 583 if (parallel_marking_threads() > 1) 584 _cleanup_task_overhead = 1.0; 585 else 586 _cleanup_task_overhead = marking_task_overhead(); 587 _cleanup_sleep_factor = 588 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 589 590 #if 0 591 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 592 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 593 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 594 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 595 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 596 #endif 597 598 guarantee(parallel_marking_threads() > 0, "peace of mind"); 599 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 600 (int) _parallel_marking_threads, false, true); 601 if (_parallel_workers == NULL) { 602 vm_exit_during_initialization("Failed necessary allocation."); 603 } else { 604 _parallel_workers->initialize_workers(); 605 } 606 } 607 608 // so that the call below can read a sensible value 609 _heap_start = (HeapWord*) rs.base(); 610 set_non_marking_state(); 611 } 612 613 void ConcurrentMark::update_g1_committed(bool force) { 614 // If concurrent marking is not in progress, then we do not need to 615 // update _heap_end. This has a subtle and important 616 // side-effect. Imagine that two evacuation pauses happen between 617 // marking completion and remark. The first one can grow the 618 // heap (hence now the finger is below the heap end). Then, the 619 // second one could unnecessarily push regions on the region 620 // stack. This causes the invariant that the region stack is empty 621 // at the beginning of remark to be false. By ensuring that we do 622 // not observe heap expansions after marking is complete, then we do 623 // not have this problem. 624 if (!concurrent_marking_in_progress() && !force) 625 return; 626 627 MemRegion committed = _g1h->g1_committed(); 628 assert(committed.start() == _heap_start, "start shouldn't change"); 629 HeapWord* new_end = committed.end(); 630 if (new_end > _heap_end) { 631 // The heap has been expanded. 632 633 _heap_end = new_end; 634 } 635 // Notice that the heap can also shrink. However, this only happens 636 // during a Full GC (at least currently) and the entire marking 637 // phase will bail out and the task will not be restarted. So, let's 638 // do nothing. 639 } 640 641 void ConcurrentMark::reset() { 642 // Starting values for these two. This should be called in a STW 643 // phase. CM will be notified of any future g1_committed expansions 644 // will be at the end of evacuation pauses, when tasks are 645 // inactive. 646 MemRegion committed = _g1h->g1_committed(); 647 _heap_start = committed.start(); 648 _heap_end = committed.end(); 649 650 // Separated the asserts so that we know which one fires. 651 assert(_heap_start != NULL, "heap bounds should look ok"); 652 assert(_heap_end != NULL, "heap bounds should look ok"); 653 assert(_heap_start < _heap_end, "heap bounds should look ok"); 654 655 // reset all the marking data structures and any necessary flags 656 clear_marking_state(); 657 658 if (verbose_low()) 659 gclog_or_tty->print_cr("[global] resetting"); 660 661 // We do reset all of them, since different phases will use 662 // different number of active threads. So, it's easiest to have all 663 // of them ready. 664 for (int i = 0; i < (int) _max_task_num; ++i) { 665 _tasks[i]->reset(_nextMarkBitMap); 666 } 667 668 // we need this to make sure that the flag is on during the evac 669 // pause with initial mark piggy-backed 670 set_concurrent_marking_in_progress(); 671 } 672 673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 674 assert(active_tasks <= _max_task_num, "we should not have more"); 675 676 _active_tasks = active_tasks; 677 // Need to update the three data structures below according to the 678 // number of active threads for this phase. 679 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 680 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 681 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 682 683 _concurrent = concurrent; 684 // We propagate this to all tasks, not just the active ones. 685 for (int i = 0; i < (int) _max_task_num; ++i) 686 _tasks[i]->set_concurrent(concurrent); 687 688 if (concurrent) { 689 set_concurrent_marking_in_progress(); 690 } else { 691 // We currently assume that the concurrent flag has been set to 692 // false before we start remark. At this point we should also be 693 // in a STW phase. 694 assert(!concurrent_marking_in_progress(), "invariant"); 695 assert(_finger == _heap_end, "only way to get here"); 696 update_g1_committed(true); 697 } 698 } 699 700 void ConcurrentMark::set_non_marking_state() { 701 // We set the global marking state to some default values when we're 702 // not doing marking. 703 clear_marking_state(); 704 _active_tasks = 0; 705 clear_concurrent_marking_in_progress(); 706 } 707 708 ConcurrentMark::~ConcurrentMark() { 709 for (int i = 0; i < (int) _max_task_num; ++i) { 710 delete _task_queues->queue(i); 711 delete _tasks[i]; 712 } 713 delete _task_queues; 714 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 715 } 716 717 // This closure is used to mark refs into the g1 generation 718 // from external roots in the CMS bit map. 719 // Called at the first checkpoint. 720 // 721 722 void ConcurrentMark::clearNextBitmap() { 723 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 724 G1CollectorPolicy* g1p = g1h->g1_policy(); 725 726 // Make sure that the concurrent mark thread looks to still be in 727 // the current cycle. 728 guarantee(cmThread()->during_cycle(), "invariant"); 729 730 // We are finishing up the current cycle by clearing the next 731 // marking bitmap and getting it ready for the next cycle. During 732 // this time no other cycle can start. So, let's make sure that this 733 // is the case. 734 guarantee(!g1h->mark_in_progress(), "invariant"); 735 736 // clear the mark bitmap (no grey objects to start with). 737 // We need to do this in chunks and offer to yield in between 738 // each chunk. 739 HeapWord* start = _nextMarkBitMap->startWord(); 740 HeapWord* end = _nextMarkBitMap->endWord(); 741 HeapWord* cur = start; 742 size_t chunkSize = M; 743 while (cur < end) { 744 HeapWord* next = cur + chunkSize; 745 if (next > end) 746 next = end; 747 MemRegion mr(cur,next); 748 _nextMarkBitMap->clearRange(mr); 749 cur = next; 750 do_yield_check(); 751 752 // Repeat the asserts from above. We'll do them as asserts here to 753 // minimize their overhead on the product. However, we'll have 754 // them as guarantees at the beginning / end of the bitmap 755 // clearing to get some checking in the product. 756 assert(cmThread()->during_cycle(), "invariant"); 757 assert(!g1h->mark_in_progress(), "invariant"); 758 } 759 760 // Repeat the asserts from above. 761 guarantee(cmThread()->during_cycle(), "invariant"); 762 guarantee(!g1h->mark_in_progress(), "invariant"); 763 } 764 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 766 public: 767 bool doHeapRegion(HeapRegion* r) { 768 if (!r->continuesHumongous()) { 769 r->note_start_of_marking(true); 770 } 771 return false; 772 } 773 }; 774 775 void ConcurrentMark::checkpointRootsInitialPre() { 776 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 777 G1CollectorPolicy* g1p = g1h->g1_policy(); 778 779 _has_aborted = false; 780 781 #ifndef PRODUCT 782 if (G1PrintReachableAtInitialMark) { 783 print_reachable("at-cycle-start", 784 true /* use_prev_marking */, true /* all */); 785 } 786 #endif 787 788 // Initialise marking structures. This has to be done in a STW phase. 789 reset(); 790 } 791 792 class CMMarkRootsClosure: public OopsInGenClosure { 793 private: 794 ConcurrentMark* _cm; 795 G1CollectedHeap* _g1h; 796 bool _do_barrier; 797 798 public: 799 CMMarkRootsClosure(ConcurrentMark* cm, 800 G1CollectedHeap* g1h, 801 bool do_barrier) : _cm(cm), _g1h(g1h), 802 _do_barrier(do_barrier) { } 803 804 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 805 virtual void do_oop( oop* p) { do_oop_work(p); } 806 807 template <class T> void do_oop_work(T* p) { 808 T heap_oop = oopDesc::load_heap_oop(p); 809 if (!oopDesc::is_null(heap_oop)) { 810 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 811 assert(obj->is_oop() || obj->mark() == NULL, 812 "expected an oop, possibly with mark word displaced"); 813 HeapWord* addr = (HeapWord*)obj; 814 if (_g1h->is_in_g1_reserved(addr)) { 815 _cm->grayRoot(obj); 816 } 817 } 818 if (_do_barrier) { 819 assert(!_g1h->is_in_g1_reserved(p), 820 "Should be called on external roots"); 821 do_barrier(p); 822 } 823 } 824 }; 825 826 void ConcurrentMark::checkpointRootsInitialPost() { 827 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 828 829 // For each region note start of marking. 830 NoteStartOfMarkHRClosure startcl; 831 g1h->heap_region_iterate(&startcl); 832 833 // Start weak-reference discovery. 834 ReferenceProcessor* rp = g1h->ref_processor(); 835 rp->verify_no_references_recorded(); 836 rp->enable_discovery(); // enable ("weak") refs discovery 837 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 838 839 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 840 // This is the start of the marking cycle, we're expected all 841 // threads to have SATB queues with active set to false. 842 satb_mq_set.set_active_all_threads(true, /* new active value */ 843 false /* expected_active */); 844 845 // update_g1_committed() will be called at the end of an evac pause 846 // when marking is on. So, it's also called at the end of the 847 // initial-mark pause to update the heap end, if the heap expands 848 // during it. No need to call it here. 849 } 850 851 // Checkpoint the roots into this generation from outside 852 // this generation. [Note this initial checkpoint need only 853 // be approximate -- we'll do a catch up phase subsequently.] 854 void ConcurrentMark::checkpointRootsInitial() { 855 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 856 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 857 858 double start = os::elapsedTime(); 859 860 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 861 g1p->record_concurrent_mark_init_start(); 862 checkpointRootsInitialPre(); 863 864 // YSR: when concurrent precleaning is in place, we'll 865 // need to clear the cached card table here 866 867 ResourceMark rm; 868 HandleMark hm; 869 870 g1h->ensure_parsability(false); 871 g1h->perm_gen()->save_marks(); 872 873 CMMarkRootsClosure notOlder(this, g1h, false); 874 CMMarkRootsClosure older(this, g1h, true); 875 876 g1h->set_marking_started(); 877 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 878 879 g1h->process_strong_roots(true, // activate StrongRootsScope 880 false, // fake perm gen collection 881 SharedHeap::SO_AllClasses, 882 ¬Older, // Regular roots 883 NULL, // do not visit active blobs 884 &older // Perm Gen Roots 885 ); 886 checkpointRootsInitialPost(); 887 888 // Statistics. 889 double end = os::elapsedTime(); 890 _init_times.add((end - start) * 1000.0); 891 892 g1p->record_concurrent_mark_init_end(); 893 } 894 895 /* 896 Notice that in the next two methods, we actually leave the STS 897 during the barrier sync and join it immediately afterwards. If we 898 do not do this, this then the following deadlock can occur: one 899 thread could be in the barrier sync code, waiting for the other 900 thread to also sync up, whereas another one could be trying to 901 yield, while also waiting for the other threads to sync up too. 902 903 Because the thread that does the sync barrier has left the STS, it 904 is possible to be suspended for a Full GC or an evacuation pause 905 could occur. This is actually safe, since the entering the sync 906 barrier is one of the last things do_marking_step() does, and it 907 doesn't manipulate any data structures afterwards. 908 */ 909 910 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 911 if (verbose_low()) 912 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 913 914 ConcurrentGCThread::stsLeave(); 915 _first_overflow_barrier_sync.enter(); 916 ConcurrentGCThread::stsJoin(); 917 // at this point everyone should have synced up and not be doing any 918 // more work 919 920 if (verbose_low()) 921 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 922 923 // let task 0 do this 924 if (task_num == 0) { 925 // task 0 is responsible for clearing the global data structures 926 clear_marking_state(); 927 928 if (PrintGC) { 929 gclog_or_tty->date_stamp(PrintGCDateStamps); 930 gclog_or_tty->stamp(PrintGCTimeStamps); 931 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 932 } 933 } 934 935 // after this, each task should reset its own data structures then 936 // then go into the second barrier 937 } 938 939 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 940 if (verbose_low()) 941 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 942 943 ConcurrentGCThread::stsLeave(); 944 _second_overflow_barrier_sync.enter(); 945 ConcurrentGCThread::stsJoin(); 946 // at this point everything should be re-initialised and ready to go 947 948 if (verbose_low()) 949 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 950 } 951 952 void ConcurrentMark::grayRoot(oop p) { 953 HeapWord* addr = (HeapWord*) p; 954 // We can't really check against _heap_start and _heap_end, since it 955 // is possible during an evacuation pause with piggy-backed 956 // initial-mark that the committed space is expanded during the 957 // pause without CM observing this change. So the assertions below 958 // is a bit conservative; but better than nothing. 959 assert(_g1h->g1_committed().contains(addr), 960 "address should be within the heap bounds"); 961 962 if (!_nextMarkBitMap->isMarked(addr)) 963 _nextMarkBitMap->parMark(addr); 964 } 965 966 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 967 // The objects on the region have already been marked "in bulk" by 968 // the caller. We only need to decide whether to push the region on 969 // the region stack or not. 970 971 if (!concurrent_marking_in_progress() || !_should_gray_objects) 972 // We're done with marking and waiting for remark. We do not need to 973 // push anything else on the region stack. 974 return; 975 976 HeapWord* finger = _finger; 977 978 if (verbose_low()) 979 gclog_or_tty->print_cr("[global] attempting to push " 980 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 981 PTR_FORMAT, mr.start(), mr.end(), finger); 982 983 if (mr.start() < finger) { 984 // The finger is always heap region aligned and it is not possible 985 // for mr to span heap regions. 986 assert(mr.end() <= finger, "invariant"); 987 988 // Separated the asserts so that we know which one fires. 989 assert(mr.start() <= mr.end(), 990 "region boundaries should fall within the committed space"); 991 assert(_heap_start <= mr.start(), 992 "region boundaries should fall within the committed space"); 993 assert(mr.end() <= _heap_end, 994 "region boundaries should fall within the committed space"); 995 if (verbose_low()) 996 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 997 "below the finger, pushing it", 998 mr.start(), mr.end()); 999 1000 if (!region_stack_push_lock_free(mr)) { 1001 if (verbose_low()) 1002 gclog_or_tty->print_cr("[global] region stack has overflown."); 1003 } 1004 } 1005 } 1006 1007 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1008 // The object is not marked by the caller. We need to at least mark 1009 // it and maybe push in on the stack. 1010 1011 HeapWord* addr = (HeapWord*)p; 1012 if (!_nextMarkBitMap->isMarked(addr)) { 1013 // We definitely need to mark it, irrespective whether we bail out 1014 // because we're done with marking. 1015 if (_nextMarkBitMap->parMark(addr)) { 1016 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1017 // If we're done with concurrent marking and we're waiting for 1018 // remark, then we're not pushing anything on the stack. 1019 return; 1020 1021 // No OrderAccess:store_load() is needed. It is implicit in the 1022 // CAS done in parMark(addr) above 1023 HeapWord* finger = _finger; 1024 1025 if (addr < finger) { 1026 if (!mark_stack_push(oop(addr))) { 1027 if (verbose_low()) 1028 gclog_or_tty->print_cr("[global] global stack overflow " 1029 "during parMark"); 1030 } 1031 } 1032 } 1033 } 1034 } 1035 1036 class CMConcurrentMarkingTask: public AbstractGangTask { 1037 private: 1038 ConcurrentMark* _cm; 1039 ConcurrentMarkThread* _cmt; 1040 1041 public: 1042 void work(int worker_i) { 1043 assert(Thread::current()->is_ConcurrentGC_thread(), 1044 "this should only be done by a conc GC thread"); 1045 ResourceMark rm; 1046 1047 double start_vtime = os::elapsedVTime(); 1048 1049 ConcurrentGCThread::stsJoin(); 1050 1051 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1052 CMTask* the_task = _cm->task(worker_i); 1053 the_task->record_start_time(); 1054 if (!_cm->has_aborted()) { 1055 do { 1056 double start_vtime_sec = os::elapsedVTime(); 1057 double start_time_sec = os::elapsedTime(); 1058 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1059 1060 the_task->do_marking_step(mark_step_duration_ms, 1061 true /* do_stealing */, 1062 true /* do_termination */); 1063 1064 double end_time_sec = os::elapsedTime(); 1065 double end_vtime_sec = os::elapsedVTime(); 1066 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1067 double elapsed_time_sec = end_time_sec - start_time_sec; 1068 _cm->clear_has_overflown(); 1069 1070 bool ret = _cm->do_yield_check(worker_i); 1071 1072 jlong sleep_time_ms; 1073 if (!_cm->has_aborted() && the_task->has_aborted()) { 1074 sleep_time_ms = 1075 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1076 ConcurrentGCThread::stsLeave(); 1077 os::sleep(Thread::current(), sleep_time_ms, false); 1078 ConcurrentGCThread::stsJoin(); 1079 } 1080 double end_time2_sec = os::elapsedTime(); 1081 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1082 1083 #if 0 1084 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1085 "overhead %1.4lf", 1086 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1087 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1088 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1089 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1090 #endif 1091 } while (!_cm->has_aborted() && the_task->has_aborted()); 1092 } 1093 the_task->record_end_time(); 1094 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1095 1096 ConcurrentGCThread::stsLeave(); 1097 1098 double end_vtime = os::elapsedVTime(); 1099 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1100 } 1101 1102 CMConcurrentMarkingTask(ConcurrentMark* cm, 1103 ConcurrentMarkThread* cmt) : 1104 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1105 1106 ~CMConcurrentMarkingTask() { } 1107 }; 1108 1109 void ConcurrentMark::markFromRoots() { 1110 // we might be tempted to assert that: 1111 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1112 // "inconsistent argument?"); 1113 // However that wouldn't be right, because it's possible that 1114 // a safepoint is indeed in progress as a younger generation 1115 // stop-the-world GC happens even as we mark in this generation. 1116 1117 _restart_for_overflow = false; 1118 1119 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1120 set_phase(active_workers, true /* concurrent */); 1121 1122 CMConcurrentMarkingTask markingTask(this, cmThread()); 1123 if (parallel_marking_threads() > 0) 1124 _parallel_workers->run_task(&markingTask); 1125 else 1126 markingTask.work(0); 1127 print_stats(); 1128 } 1129 1130 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1131 // world is stopped at this checkpoint 1132 assert(SafepointSynchronize::is_at_safepoint(), 1133 "world should be stopped"); 1134 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1135 1136 // If a full collection has happened, we shouldn't do this. 1137 if (has_aborted()) { 1138 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1139 return; 1140 } 1141 1142 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1143 1144 if (VerifyDuringGC) { 1145 HandleMark hm; // handle scope 1146 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1147 Universe::heap()->prepare_for_verify(); 1148 Universe::verify(true, false, true); 1149 } 1150 1151 G1CollectorPolicy* g1p = g1h->g1_policy(); 1152 g1p->record_concurrent_mark_remark_start(); 1153 1154 double start = os::elapsedTime(); 1155 1156 checkpointRootsFinalWork(); 1157 1158 double mark_work_end = os::elapsedTime(); 1159 1160 weakRefsWork(clear_all_soft_refs); 1161 1162 if (has_overflown()) { 1163 // Oops. We overflowed. Restart concurrent marking. 1164 _restart_for_overflow = true; 1165 // Clear the flag. We do not need it any more. 1166 clear_has_overflown(); 1167 if (G1TraceMarkStackOverflow) 1168 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1169 } else { 1170 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1171 // We're done with marking. 1172 // This is the end of the marking cycle, we're expected all 1173 // threads to have SATB queues with active set to true. 1174 satb_mq_set.set_active_all_threads(false, /* new active value */ 1175 true /* expected_active */); 1176 1177 if (VerifyDuringGC) { 1178 HandleMark hm; // handle scope 1179 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1180 Universe::heap()->prepare_for_verify(); 1181 Universe::heap()->verify(/* allow_dirty */ true, 1182 /* silent */ false, 1183 /* use_prev_marking */ false); 1184 } 1185 assert(!restart_for_overflow(), "sanity"); 1186 } 1187 1188 // Reset the marking state if marking completed 1189 if (!restart_for_overflow()) { 1190 set_non_marking_state(); 1191 } 1192 1193 #if VERIFY_OBJS_PROCESSED 1194 _scan_obj_cl.objs_processed = 0; 1195 ThreadLocalObjQueue::objs_enqueued = 0; 1196 #endif 1197 1198 // Statistics 1199 double now = os::elapsedTime(); 1200 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1201 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1202 _remark_times.add((now - start) * 1000.0); 1203 1204 g1p->record_concurrent_mark_remark_end(); 1205 } 1206 1207 1208 #define CARD_BM_TEST_MODE 0 1209 1210 class CalcLiveObjectsClosure: public HeapRegionClosure { 1211 1212 CMBitMapRO* _bm; 1213 ConcurrentMark* _cm; 1214 bool _changed; 1215 bool _yield; 1216 size_t _words_done; 1217 size_t _tot_live; 1218 size_t _tot_used; 1219 size_t _regions_done; 1220 double _start_vtime_sec; 1221 1222 BitMap* _region_bm; 1223 BitMap* _card_bm; 1224 intptr_t _bottom_card_num; 1225 bool _final; 1226 1227 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1228 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1229 #if CARD_BM_TEST_MODE 1230 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1231 #else 1232 _card_bm->par_at_put(i - _bottom_card_num, 1); 1233 #endif 1234 } 1235 } 1236 1237 public: 1238 CalcLiveObjectsClosure(bool final, 1239 CMBitMapRO *bm, ConcurrentMark *cm, 1240 BitMap* region_bm, BitMap* card_bm) : 1241 _bm(bm), _cm(cm), _changed(false), _yield(true), 1242 _words_done(0), _tot_live(0), _tot_used(0), 1243 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1244 _regions_done(0), _start_vtime_sec(0.0) 1245 { 1246 _bottom_card_num = 1247 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1248 CardTableModRefBS::card_shift); 1249 } 1250 1251 // It takes a region that's not empty (i.e., it has at least one 1252 // live object in it and sets its corresponding bit on the region 1253 // bitmap to 1. If the region is "starts humongous" it will also set 1254 // to 1 the bits on the region bitmap that correspond to its 1255 // associated "continues humongous" regions. 1256 void set_bit_for_region(HeapRegion* hr) { 1257 assert(!hr->continuesHumongous(), "should have filtered those out"); 1258 1259 size_t index = hr->hrs_index(); 1260 if (!hr->startsHumongous()) { 1261 // Normal (non-humongous) case: just set the bit. 1262 _region_bm->par_at_put((BitMap::idx_t) index, true); 1263 } else { 1264 // Starts humongous case: calculate how many regions are part of 1265 // this humongous region and then set the bit range. It might 1266 // have been a bit more efficient to look at the object that 1267 // spans these humongous regions to calculate their number from 1268 // the object's size. However, it's a good idea to calculate 1269 // this based on the metadata itself, and not the region 1270 // contents, so that this code is not aware of what goes into 1271 // the humongous regions (in case this changes in the future). 1272 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1273 size_t end_index = index + 1; 1274 while (end_index < g1h->n_regions()) { 1275 HeapRegion* chr = g1h->region_at(end_index); 1276 if (!chr->continuesHumongous()) { 1277 break; 1278 } 1279 end_index += 1; 1280 } 1281 _region_bm->par_at_put_range((BitMap::idx_t) index, 1282 (BitMap::idx_t) end_index, true); 1283 } 1284 } 1285 1286 bool doHeapRegion(HeapRegion* hr) { 1287 if (!_final && _regions_done == 0) 1288 _start_vtime_sec = os::elapsedVTime(); 1289 1290 if (hr->continuesHumongous()) { 1291 // We will ignore these here and process them when their 1292 // associated "starts humongous" region is processed (see 1293 // set_bit_for_heap_region()). Note that we cannot rely on their 1294 // associated "starts humongous" region to have their bit set to 1295 // 1 since, due to the region chunking in the parallel region 1296 // iteration, a "continues humongous" region might be visited 1297 // before its associated "starts humongous". 1298 return false; 1299 } 1300 1301 HeapWord* nextTop = hr->next_top_at_mark_start(); 1302 HeapWord* start = hr->top_at_conc_mark_count(); 1303 assert(hr->bottom() <= start && start <= hr->end() && 1304 hr->bottom() <= nextTop && nextTop <= hr->end() && 1305 start <= nextTop, 1306 "Preconditions."); 1307 // Otherwise, record the number of word's we'll examine. 1308 size_t words_done = (nextTop - start); 1309 // Find the first marked object at or after "start". 1310 start = _bm->getNextMarkedWordAddress(start, nextTop); 1311 size_t marked_bytes = 0; 1312 1313 // Below, the term "card num" means the result of shifting an address 1314 // by the card shift -- address 0 corresponds to card number 0. One 1315 // must subtract the card num of the bottom of the heap to obtain a 1316 // card table index. 1317 // The first card num of the sequence of live cards currently being 1318 // constructed. -1 ==> no sequence. 1319 intptr_t start_card_num = -1; 1320 // The last card num of the sequence of live cards currently being 1321 // constructed. -1 ==> no sequence. 1322 intptr_t last_card_num = -1; 1323 1324 while (start < nextTop) { 1325 if (_yield && _cm->do_yield_check()) { 1326 // We yielded. It might be for a full collection, in which case 1327 // all bets are off; terminate the traversal. 1328 if (_cm->has_aborted()) { 1329 _changed = false; 1330 return true; 1331 } else { 1332 // Otherwise, it might be a collection pause, and the region 1333 // we're looking at might be in the collection set. We'll 1334 // abandon this region. 1335 return false; 1336 } 1337 } 1338 oop obj = oop(start); 1339 int obj_sz = obj->size(); 1340 // The card num of the start of the current object. 1341 intptr_t obj_card_num = 1342 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1343 1344 HeapWord* obj_last = start + obj_sz - 1; 1345 intptr_t obj_last_card_num = 1346 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1347 1348 if (obj_card_num != last_card_num) { 1349 if (start_card_num == -1) { 1350 assert(last_card_num == -1, "Both or neither."); 1351 start_card_num = obj_card_num; 1352 } else { 1353 assert(last_card_num != -1, "Both or neither."); 1354 assert(obj_card_num >= last_card_num, "Inv"); 1355 if ((obj_card_num - last_card_num) > 1) { 1356 // Mark the last run, and start a new one. 1357 mark_card_num_range(start_card_num, last_card_num); 1358 start_card_num = obj_card_num; 1359 } 1360 } 1361 #if CARD_BM_TEST_MODE 1362 /* 1363 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1364 obj_card_num - _bottom_card_num, 1365 obj_last_card_num - _bottom_card_num); 1366 */ 1367 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1368 _card_bm->par_at_put(j - _bottom_card_num, 1); 1369 } 1370 #endif 1371 } 1372 // In any case, we set the last card num. 1373 last_card_num = obj_last_card_num; 1374 1375 marked_bytes += (size_t)obj_sz * HeapWordSize; 1376 // Find the next marked object after this one. 1377 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1378 _changed = true; 1379 } 1380 // Handle the last range, if any. 1381 if (start_card_num != -1) 1382 mark_card_num_range(start_card_num, last_card_num); 1383 if (_final) { 1384 // Mark the allocated-since-marking portion... 1385 HeapWord* tp = hr->top(); 1386 if (nextTop < tp) { 1387 start_card_num = 1388 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1389 last_card_num = 1390 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1391 mark_card_num_range(start_card_num, last_card_num); 1392 // This definitely means the region has live objects. 1393 set_bit_for_region(hr); 1394 } 1395 } 1396 1397 hr->add_to_marked_bytes(marked_bytes); 1398 // Update the live region bitmap. 1399 if (marked_bytes > 0) { 1400 set_bit_for_region(hr); 1401 } 1402 hr->set_top_at_conc_mark_count(nextTop); 1403 _tot_live += hr->next_live_bytes(); 1404 _tot_used += hr->used(); 1405 _words_done = words_done; 1406 1407 if (!_final) { 1408 ++_regions_done; 1409 if (_regions_done % 10 == 0) { 1410 double end_vtime_sec = os::elapsedVTime(); 1411 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1412 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1413 jlong sleep_time_ms = 1414 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1415 os::sleep(Thread::current(), sleep_time_ms, false); 1416 _start_vtime_sec = end_vtime_sec; 1417 } 1418 } 1419 } 1420 1421 return false; 1422 } 1423 1424 bool changed() { return _changed; } 1425 void reset() { _changed = false; _words_done = 0; } 1426 void no_yield() { _yield = false; } 1427 size_t words_done() { return _words_done; } 1428 size_t tot_live() { return _tot_live; } 1429 size_t tot_used() { return _tot_used; } 1430 }; 1431 1432 1433 void ConcurrentMark::calcDesiredRegions() { 1434 _region_bm.clear(); 1435 _card_bm.clear(); 1436 CalcLiveObjectsClosure calccl(false /*final*/, 1437 nextMarkBitMap(), this, 1438 &_region_bm, &_card_bm); 1439 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1440 g1h->heap_region_iterate(&calccl); 1441 1442 do { 1443 calccl.reset(); 1444 g1h->heap_region_iterate(&calccl); 1445 } while (calccl.changed()); 1446 } 1447 1448 class G1ParFinalCountTask: public AbstractGangTask { 1449 protected: 1450 G1CollectedHeap* _g1h; 1451 CMBitMap* _bm; 1452 size_t _n_workers; 1453 size_t *_live_bytes; 1454 size_t *_used_bytes; 1455 BitMap* _region_bm; 1456 BitMap* _card_bm; 1457 public: 1458 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1459 BitMap* region_bm, BitMap* card_bm) : 1460 AbstractGangTask("G1 final counting"), _g1h(g1h), 1461 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) 1462 { 1463 if (ParallelGCThreads > 0) 1464 _n_workers = _g1h->workers()->total_workers(); 1465 else 1466 _n_workers = 1; 1467 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1468 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1469 } 1470 1471 ~G1ParFinalCountTask() { 1472 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1473 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1474 } 1475 1476 void work(int i) { 1477 CalcLiveObjectsClosure calccl(true /*final*/, 1478 _bm, _g1h->concurrent_mark(), 1479 _region_bm, _card_bm); 1480 calccl.no_yield(); 1481 if (G1CollectedHeap::use_parallel_gc_threads()) { 1482 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1483 HeapRegion::FinalCountClaimValue); 1484 } else { 1485 _g1h->heap_region_iterate(&calccl); 1486 } 1487 assert(calccl.complete(), "Shouldn't have yielded!"); 1488 1489 assert((size_t) i < _n_workers, "invariant"); 1490 _live_bytes[i] = calccl.tot_live(); 1491 _used_bytes[i] = calccl.tot_used(); 1492 } 1493 size_t live_bytes() { 1494 size_t live_bytes = 0; 1495 for (size_t i = 0; i < _n_workers; ++i) 1496 live_bytes += _live_bytes[i]; 1497 return live_bytes; 1498 } 1499 size_t used_bytes() { 1500 size_t used_bytes = 0; 1501 for (size_t i = 0; i < _n_workers; ++i) 1502 used_bytes += _used_bytes[i]; 1503 return used_bytes; 1504 } 1505 }; 1506 1507 class G1ParNoteEndTask; 1508 1509 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1510 G1CollectedHeap* _g1; 1511 int _worker_num; 1512 size_t _max_live_bytes; 1513 size_t _regions_claimed; 1514 size_t _freed_bytes; 1515 FreeRegionList* _local_cleanup_list; 1516 HumongousRegionSet* _humongous_proxy_set; 1517 HRRSCleanupTask* _hrrs_cleanup_task; 1518 double _claimed_region_time; 1519 double _max_region_time; 1520 1521 public: 1522 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1523 int worker_num, 1524 FreeRegionList* local_cleanup_list, 1525 HumongousRegionSet* humongous_proxy_set, 1526 HRRSCleanupTask* hrrs_cleanup_task); 1527 size_t freed_bytes() { return _freed_bytes; } 1528 1529 bool doHeapRegion(HeapRegion *r); 1530 1531 size_t max_live_bytes() { return _max_live_bytes; } 1532 size_t regions_claimed() { return _regions_claimed; } 1533 double claimed_region_time_sec() { return _claimed_region_time; } 1534 double max_region_time_sec() { return _max_region_time; } 1535 }; 1536 1537 class G1ParNoteEndTask: public AbstractGangTask { 1538 friend class G1NoteEndOfConcMarkClosure; 1539 1540 protected: 1541 G1CollectedHeap* _g1h; 1542 size_t _max_live_bytes; 1543 size_t _freed_bytes; 1544 FreeRegionList* _cleanup_list; 1545 1546 public: 1547 G1ParNoteEndTask(G1CollectedHeap* g1h, 1548 FreeRegionList* cleanup_list) : 1549 AbstractGangTask("G1 note end"), _g1h(g1h), 1550 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1551 1552 void work(int i) { 1553 double start = os::elapsedTime(); 1554 FreeRegionList local_cleanup_list("Local Cleanup List"); 1555 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1556 HRRSCleanupTask hrrs_cleanup_task; 1557 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1558 &humongous_proxy_set, 1559 &hrrs_cleanup_task); 1560 if (G1CollectedHeap::use_parallel_gc_threads()) { 1561 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1562 HeapRegion::NoteEndClaimValue); 1563 } else { 1564 _g1h->heap_region_iterate(&g1_note_end); 1565 } 1566 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1567 1568 // Now update the lists 1569 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1570 NULL /* free_list */, 1571 &humongous_proxy_set, 1572 true /* par */); 1573 { 1574 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1575 _max_live_bytes += g1_note_end.max_live_bytes(); 1576 _freed_bytes += g1_note_end.freed_bytes(); 1577 1578 _cleanup_list->add_as_tail(&local_cleanup_list); 1579 assert(local_cleanup_list.is_empty(), "post-condition"); 1580 1581 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1582 } 1583 double end = os::elapsedTime(); 1584 if (G1PrintParCleanupStats) { 1585 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1586 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1587 i, start, end, (end-start)*1000.0, 1588 g1_note_end.regions_claimed(), 1589 g1_note_end.claimed_region_time_sec()*1000.0, 1590 g1_note_end.max_region_time_sec()*1000.0); 1591 } 1592 } 1593 size_t max_live_bytes() { return _max_live_bytes; } 1594 size_t freed_bytes() { return _freed_bytes; } 1595 }; 1596 1597 class G1ParScrubRemSetTask: public AbstractGangTask { 1598 protected: 1599 G1RemSet* _g1rs; 1600 BitMap* _region_bm; 1601 BitMap* _card_bm; 1602 public: 1603 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1604 BitMap* region_bm, BitMap* card_bm) : 1605 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1606 _region_bm(region_bm), _card_bm(card_bm) 1607 {} 1608 1609 void work(int i) { 1610 if (G1CollectedHeap::use_parallel_gc_threads()) { 1611 _g1rs->scrub_par(_region_bm, _card_bm, i, 1612 HeapRegion::ScrubRemSetClaimValue); 1613 } else { 1614 _g1rs->scrub(_region_bm, _card_bm); 1615 } 1616 } 1617 1618 }; 1619 1620 G1NoteEndOfConcMarkClosure:: 1621 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1622 int worker_num, 1623 FreeRegionList* local_cleanup_list, 1624 HumongousRegionSet* humongous_proxy_set, 1625 HRRSCleanupTask* hrrs_cleanup_task) 1626 : _g1(g1), _worker_num(worker_num), 1627 _max_live_bytes(0), _regions_claimed(0), 1628 _freed_bytes(0), 1629 _claimed_region_time(0.0), _max_region_time(0.0), 1630 _local_cleanup_list(local_cleanup_list), 1631 _humongous_proxy_set(humongous_proxy_set), 1632 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1633 1634 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { 1635 // We use a claim value of zero here because all regions 1636 // were claimed with value 1 in the FinalCount task. 1637 hr->reset_gc_time_stamp(); 1638 if (!hr->continuesHumongous()) { 1639 double start = os::elapsedTime(); 1640 _regions_claimed++; 1641 hr->note_end_of_marking(); 1642 _max_live_bytes += hr->max_live_bytes(); 1643 _g1->free_region_if_empty(hr, 1644 &_freed_bytes, 1645 _local_cleanup_list, 1646 _humongous_proxy_set, 1647 _hrrs_cleanup_task, 1648 true /* par */); 1649 double region_time = (os::elapsedTime() - start); 1650 _claimed_region_time += region_time; 1651 if (region_time > _max_region_time) _max_region_time = region_time; 1652 } 1653 return false; 1654 } 1655 1656 void ConcurrentMark::cleanup() { 1657 // world is stopped at this checkpoint 1658 assert(SafepointSynchronize::is_at_safepoint(), 1659 "world should be stopped"); 1660 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1661 1662 // If a full collection has happened, we shouldn't do this. 1663 if (has_aborted()) { 1664 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1665 return; 1666 } 1667 1668 g1h->verify_region_sets_optional(); 1669 1670 if (VerifyDuringGC) { 1671 HandleMark hm; // handle scope 1672 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1673 Universe::heap()->prepare_for_verify(); 1674 Universe::verify(/* allow dirty */ true, 1675 /* silent */ false, 1676 /* prev marking */ true); 1677 } 1678 1679 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1680 g1p->record_concurrent_mark_cleanup_start(); 1681 1682 double start = os::elapsedTime(); 1683 1684 HeapRegionRemSet::reset_for_cleanup_tasks(); 1685 1686 // Do counting once more with the world stopped for good measure. 1687 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1688 &_region_bm, &_card_bm); 1689 if (G1CollectedHeap::use_parallel_gc_threads()) { 1690 assert(g1h->check_heap_region_claim_values( 1691 HeapRegion::InitialClaimValue), 1692 "sanity check"); 1693 1694 int n_workers = g1h->workers()->total_workers(); 1695 g1h->set_par_threads(n_workers); 1696 g1h->workers()->run_task(&g1_par_count_task); 1697 g1h->set_par_threads(0); 1698 1699 assert(g1h->check_heap_region_claim_values( 1700 HeapRegion::FinalCountClaimValue), 1701 "sanity check"); 1702 } else { 1703 g1_par_count_task.work(0); 1704 } 1705 1706 size_t known_garbage_bytes = 1707 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1708 #if 0 1709 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1710 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1711 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1712 (double) known_garbage_bytes / (double) (1024 * 1024)); 1713 #endif // 0 1714 g1p->set_known_garbage_bytes(known_garbage_bytes); 1715 1716 size_t start_used_bytes = g1h->used(); 1717 _at_least_one_mark_complete = true; 1718 g1h->set_marking_complete(); 1719 1720 double count_end = os::elapsedTime(); 1721 double this_final_counting_time = (count_end - start); 1722 if (G1PrintParCleanupStats) { 1723 gclog_or_tty->print_cr("Cleanup:"); 1724 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1725 this_final_counting_time*1000.0); 1726 } 1727 _total_counting_time += this_final_counting_time; 1728 1729 // Install newly created mark bitMap as "prev". 1730 swapMarkBitMaps(); 1731 1732 g1h->reset_gc_time_stamp(); 1733 1734 // Note end of marking in all heap regions. 1735 double note_end_start = os::elapsedTime(); 1736 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1737 if (G1CollectedHeap::use_parallel_gc_threads()) { 1738 int n_workers = g1h->workers()->total_workers(); 1739 g1h->set_par_threads(n_workers); 1740 g1h->workers()->run_task(&g1_par_note_end_task); 1741 g1h->set_par_threads(0); 1742 1743 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1744 "sanity check"); 1745 } else { 1746 g1_par_note_end_task.work(0); 1747 } 1748 1749 if (!cleanup_list_is_empty()) { 1750 // The cleanup list is not empty, so we'll have to process it 1751 // concurrently. Notify anyone else that might be wanting free 1752 // regions that there will be more free regions coming soon. 1753 g1h->set_free_regions_coming(); 1754 } 1755 double note_end_end = os::elapsedTime(); 1756 if (G1PrintParCleanupStats) { 1757 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1758 (note_end_end - note_end_start)*1000.0); 1759 } 1760 1761 1762 // call below, since it affects the metric by which we sort the heap 1763 // regions. 1764 if (G1ScrubRemSets) { 1765 double rs_scrub_start = os::elapsedTime(); 1766 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1767 if (G1CollectedHeap::use_parallel_gc_threads()) { 1768 int n_workers = g1h->workers()->total_workers(); 1769 g1h->set_par_threads(n_workers); 1770 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1771 g1h->set_par_threads(0); 1772 1773 assert(g1h->check_heap_region_claim_values( 1774 HeapRegion::ScrubRemSetClaimValue), 1775 "sanity check"); 1776 } else { 1777 g1_par_scrub_rs_task.work(0); 1778 } 1779 1780 double rs_scrub_end = os::elapsedTime(); 1781 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1782 _total_rs_scrub_time += this_rs_scrub_time; 1783 } 1784 1785 // this will also free any regions totally full of garbage objects, 1786 // and sort the regions. 1787 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1788 g1_par_note_end_task.freed_bytes(), 1789 g1_par_note_end_task.max_live_bytes()); 1790 1791 // Statistics. 1792 double end = os::elapsedTime(); 1793 _cleanup_times.add((end - start) * 1000.0); 1794 1795 // G1CollectedHeap::heap()->print(); 1796 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1797 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1798 1799 if (PrintGC || PrintGCDetails) { 1800 g1h->print_size_transition(gclog_or_tty, 1801 start_used_bytes, 1802 g1h->used(), 1803 g1h->capacity()); 1804 } 1805 1806 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1807 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1808 1809 // We need to make this be a "collection" so any collection pause that 1810 // races with it goes around and waits for completeCleanup to finish. 1811 g1h->increment_total_collections(); 1812 1813 if (VerifyDuringGC) { 1814 HandleMark hm; // handle scope 1815 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1816 Universe::heap()->prepare_for_verify(); 1817 Universe::verify(/* allow dirty */ true, 1818 /* silent */ false, 1819 /* prev marking */ true); 1820 } 1821 1822 g1h->verify_region_sets_optional(); 1823 } 1824 1825 void ConcurrentMark::completeCleanup() { 1826 if (has_aborted()) return; 1827 1828 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1829 1830 _cleanup_list.verify_optional(); 1831 FreeRegionList tmp_free_list("Tmp Free List"); 1832 1833 if (G1ConcRegionFreeingVerbose) { 1834 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1835 "cleanup list has "SIZE_FORMAT" entries", 1836 _cleanup_list.length()); 1837 } 1838 1839 // Noone else should be accessing the _cleanup_list at this point, 1840 // so it's not necessary to take any locks 1841 while (!_cleanup_list.is_empty()) { 1842 HeapRegion* hr = _cleanup_list.remove_head(); 1843 assert(hr != NULL, "the list was not empty"); 1844 hr->rem_set()->clear(); 1845 tmp_free_list.add_as_tail(hr); 1846 1847 // Instead of adding one region at a time to the secondary_free_list, 1848 // we accumulate them in the local list and move them a few at a 1849 // time. This also cuts down on the number of notify_all() calls 1850 // we do during this process. We'll also append the local list when 1851 // _cleanup_list is empty (which means we just removed the last 1852 // region from the _cleanup_list). 1853 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1854 _cleanup_list.is_empty()) { 1855 if (G1ConcRegionFreeingVerbose) { 1856 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1857 "appending "SIZE_FORMAT" entries to the " 1858 "secondary_free_list, clean list still has " 1859 SIZE_FORMAT" entries", 1860 tmp_free_list.length(), 1861 _cleanup_list.length()); 1862 } 1863 1864 { 1865 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1866 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1867 SecondaryFreeList_lock->notify_all(); 1868 } 1869 1870 if (G1StressConcRegionFreeing) { 1871 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1872 os::sleep(Thread::current(), (jlong) 1, false); 1873 } 1874 } 1875 } 1876 } 1877 assert(tmp_free_list.is_empty(), "post-condition"); 1878 } 1879 1880 // Support closures for reference procssing in G1 1881 1882 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1883 HeapWord* addr = (HeapWord*)obj; 1884 return addr != NULL && 1885 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1886 } 1887 1888 class G1CMKeepAliveClosure: public OopClosure { 1889 G1CollectedHeap* _g1; 1890 ConcurrentMark* _cm; 1891 CMBitMap* _bitMap; 1892 public: 1893 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1894 CMBitMap* bitMap) : 1895 _g1(g1), _cm(cm), 1896 _bitMap(bitMap) {} 1897 1898 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1899 virtual void do_oop( oop* p) { do_oop_work(p); } 1900 1901 template <class T> void do_oop_work(T* p) { 1902 oop obj = oopDesc::load_decode_heap_oop(p); 1903 HeapWord* addr = (HeapWord*)obj; 1904 1905 if (_cm->verbose_high()) 1906 gclog_or_tty->print_cr("\t[0] we're looking at location " 1907 "*"PTR_FORMAT" = "PTR_FORMAT, 1908 p, (void*) obj); 1909 1910 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 1911 _bitMap->mark(addr); 1912 _cm->mark_stack_push(obj); 1913 } 1914 } 1915 }; 1916 1917 class G1CMDrainMarkingStackClosure: public VoidClosure { 1918 CMMarkStack* _markStack; 1919 CMBitMap* _bitMap; 1920 G1CMKeepAliveClosure* _oopClosure; 1921 public: 1922 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 1923 G1CMKeepAliveClosure* oopClosure) : 1924 _bitMap(bitMap), 1925 _markStack(markStack), 1926 _oopClosure(oopClosure) 1927 {} 1928 1929 void do_void() { 1930 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 1931 } 1932 }; 1933 1934 // 'Keep Alive' closure used by parallel reference processing. 1935 // An instance of this closure is used in the parallel reference processing 1936 // code rather than an instance of G1CMKeepAliveClosure. We could have used 1937 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 1938 // placed on to discovered ref lists once so we can mark and push with no 1939 // need to check whether the object has already been marked. Using the 1940 // G1CMKeepAliveClosure would mean, however, having all the worker threads 1941 // operating on the global mark stack. This means that an individual 1942 // worker would be doing lock-free pushes while it processes its own 1943 // discovered ref list followed by drain call. If the discovered ref lists 1944 // are unbalanced then this could cause interference with the other 1945 // workers. Using a CMTask (and its embedded local data structures) 1946 // avoids that potential interference. 1947 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 1948 ConcurrentMark* _cm; 1949 CMTask* _task; 1950 CMBitMap* _bitMap; 1951 int _ref_counter_limit; 1952 int _ref_counter; 1953 public: 1954 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 1955 CMTask* task, 1956 CMBitMap* bitMap) : 1957 _cm(cm), _task(task), _bitMap(bitMap), 1958 _ref_counter_limit(G1RefProcDrainInterval) 1959 { 1960 assert(_ref_counter_limit > 0, "sanity"); 1961 _ref_counter = _ref_counter_limit; 1962 } 1963 1964 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1965 virtual void do_oop( oop* p) { do_oop_work(p); } 1966 1967 template <class T> void do_oop_work(T* p) { 1968 if (!_cm->has_overflown()) { 1969 oop obj = oopDesc::load_decode_heap_oop(p); 1970 if (_cm->verbose_high()) 1971 gclog_or_tty->print_cr("\t[%d] we're looking at location " 1972 "*"PTR_FORMAT" = "PTR_FORMAT, 1973 _task->task_id(), p, (void*) obj); 1974 1975 _task->deal_with_reference(obj); 1976 _ref_counter--; 1977 1978 if (_ref_counter == 0) { 1979 // We have dealt with _ref_counter_limit references, pushing them and objects 1980 // reachable from them on to the local stack (and possibly the global stack). 1981 // Call do_marking_step() to process these entries. We call the routine in a 1982 // loop, which we'll exit if there's nothing more to do (i.e. we're done 1983 // with the entries that we've pushed as a result of the deal_with_reference 1984 // calls above) or we overflow. 1985 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 1986 // while there may still be some work to do. (See the comment at the 1987 // beginning of CMTask::do_marking_step() for those conditions - one of which 1988 // is reaching the specified time target.) It is only when 1989 // CMTask::do_marking_step() returns without setting the has_aborted() flag 1990 // that the marking has completed. 1991 do { 1992 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1993 _task->do_marking_step(mark_step_duration_ms, 1994 false /* do_stealing */, 1995 false /* do_termination */); 1996 } while (_task->has_aborted() && !_cm->has_overflown()); 1997 _ref_counter = _ref_counter_limit; 1998 } 1999 } else { 2000 if (_cm->verbose_high()) 2001 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2002 } 2003 } 2004 }; 2005 2006 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2007 ConcurrentMark* _cm; 2008 CMTask* _task; 2009 public: 2010 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2011 _cm(cm), _task(task) 2012 {} 2013 2014 void do_void() { 2015 do { 2016 if (_cm->verbose_high()) 2017 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id()); 2018 2019 // We call CMTask::do_marking_step() to completely drain the local and 2020 // global marking stacks. The routine is called in a loop, which we'll 2021 // exit if there's nothing more to do (i.e. we'completely drained the 2022 // entries that were pushed as a result of applying the 2023 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2024 // lists above) or we overflow the global marking stack. 2025 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2026 // while there may still be some work to do. (See the comment at the 2027 // beginning of CMTask::do_marking_step() for those conditions - one of which 2028 // is reaching the specified time target.) It is only when 2029 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2030 // that the marking has completed. 2031 2032 _task->do_marking_step(1000000000.0 /* something very large */, 2033 true /* do_stealing */, 2034 true /* do_termination */); 2035 } while (_task->has_aborted() && !_cm->has_overflown()); 2036 } 2037 }; 2038 2039 // Implementation of AbstractRefProcTaskExecutor for G1 2040 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2041 private: 2042 G1CollectedHeap* _g1h; 2043 ConcurrentMark* _cm; 2044 CMBitMap* _bitmap; 2045 WorkGang* _workers; 2046 int _active_workers; 2047 2048 public: 2049 G1RefProcTaskExecutor(G1CollectedHeap* g1h, 2050 ConcurrentMark* cm, 2051 CMBitMap* bitmap, 2052 WorkGang* workers, 2053 int n_workers) : 2054 _g1h(g1h), _cm(cm), _bitmap(bitmap), 2055 _workers(workers), _active_workers(n_workers) 2056 { } 2057 2058 // Executes the given task using concurrent marking worker threads. 2059 virtual void execute(ProcessTask& task); 2060 virtual void execute(EnqueueTask& task); 2061 }; 2062 2063 class G1RefProcTaskProxy: public AbstractGangTask { 2064 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2065 ProcessTask& _proc_task; 2066 G1CollectedHeap* _g1h; 2067 ConcurrentMark* _cm; 2068 CMBitMap* _bitmap; 2069 2070 public: 2071 G1RefProcTaskProxy(ProcessTask& proc_task, 2072 G1CollectedHeap* g1h, 2073 ConcurrentMark* cm, 2074 CMBitMap* bitmap) : 2075 AbstractGangTask("Process reference objects in parallel"), 2076 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 2077 {} 2078 2079 virtual void work(int i) { 2080 CMTask* marking_task = _cm->task(i); 2081 G1CMIsAliveClosure g1_is_alive(_g1h); 2082 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 2083 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2084 2085 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2086 } 2087 }; 2088 2089 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) { 2090 assert(_workers != NULL, "Need parallel worker threads."); 2091 2092 G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 2093 2094 // We need to reset the phase for each task execution so that 2095 // the termination protocol of CMTask::do_marking_step works. 2096 _cm->set_phase(_active_workers, false /* concurrent */); 2097 _g1h->set_par_threads(_active_workers); 2098 _workers->run_task(&proc_task_proxy); 2099 _g1h->set_par_threads(0); 2100 } 2101 2102 class G1RefEnqueueTaskProxy: public AbstractGangTask { 2103 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2104 EnqueueTask& _enq_task; 2105 2106 public: 2107 G1RefEnqueueTaskProxy(EnqueueTask& enq_task) : 2108 AbstractGangTask("Enqueue reference objects in parallel"), 2109 _enq_task(enq_task) 2110 { } 2111 2112 virtual void work(int i) { 2113 _enq_task.work(i); 2114 } 2115 }; 2116 2117 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2118 assert(_workers != NULL, "Need parallel worker threads."); 2119 2120 G1RefEnqueueTaskProxy enq_task_proxy(enq_task); 2121 2122 _g1h->set_par_threads(_active_workers); 2123 _workers->run_task(&enq_task_proxy); 2124 _g1h->set_par_threads(0); 2125 } 2126 2127 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2128 ResourceMark rm; 2129 HandleMark hm; 2130 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2131 ReferenceProcessor* rp = g1h->ref_processor(); 2132 2133 // See the comment in G1CollectedHeap::ref_processing_init() 2134 // about how reference processing currently works in G1. 2135 2136 // Process weak references. 2137 rp->setup_policy(clear_all_soft_refs); 2138 assert(_markStack.isEmpty(), "mark stack should be empty"); 2139 2140 G1CMIsAliveClosure g1_is_alive(g1h); 2141 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2142 G1CMDrainMarkingStackClosure 2143 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2144 // We use the work gang from the G1CollectedHeap and we utilize all 2145 // the worker threads. 2146 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 2147 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2148 2149 G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 2150 g1h->workers(), active_workers); 2151 2152 2153 if (rp->processing_is_mt()) { 2154 // Set the degree of MT here. If the discovery is done MT, there 2155 // may have been a different number of threads doing the discovery 2156 // and a different number of discovered lists may have Ref objects. 2157 // That is OK as long as the Reference lists are balanced (see 2158 // balance_all_queues() and balance_queues()). 2159 rp->set_active_mt_degree(active_workers); 2160 2161 rp->process_discovered_references(&g1_is_alive, 2162 &g1_keep_alive, 2163 &g1_drain_mark_stack, 2164 &par_task_executor); 2165 2166 // The work routines of the parallel keep_alive and drain_marking_stack 2167 // will set the has_overflown flag if we overflow the global marking 2168 // stack. 2169 } else { 2170 rp->process_discovered_references(&g1_is_alive, 2171 &g1_keep_alive, 2172 &g1_drain_mark_stack, 2173 NULL); 2174 2175 } 2176 2177 assert(_markStack.overflow() || _markStack.isEmpty(), 2178 "mark stack should be empty (unless it overflowed)"); 2179 if (_markStack.overflow()) { 2180 // Should have been done already when we tried to push an 2181 // entry on to the global mark stack. But let's do it again. 2182 set_has_overflown(); 2183 } 2184 2185 if (rp->processing_is_mt()) { 2186 assert(rp->num_q() == active_workers, "why not"); 2187 rp->enqueue_discovered_references(&par_task_executor); 2188 } else { 2189 rp->enqueue_discovered_references(); 2190 } 2191 2192 rp->verify_no_references_recorded(); 2193 assert(!rp->discovery_enabled(), "should have been disabled"); 2194 2195 // Now clean up stale oops in StringTable 2196 StringTable::unlink(&g1_is_alive); 2197 // Clean up unreferenced symbols in symbol table. 2198 SymbolTable::unlink(); 2199 } 2200 2201 void ConcurrentMark::swapMarkBitMaps() { 2202 CMBitMapRO* temp = _prevMarkBitMap; 2203 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2204 _nextMarkBitMap = (CMBitMap*) temp; 2205 } 2206 2207 class CMRemarkTask: public AbstractGangTask { 2208 private: 2209 ConcurrentMark *_cm; 2210 2211 public: 2212 void work(int worker_i) { 2213 // Since all available tasks are actually started, we should 2214 // only proceed if we're supposed to be actived. 2215 if ((size_t)worker_i < _cm->active_tasks()) { 2216 CMTask* task = _cm->task(worker_i); 2217 task->record_start_time(); 2218 do { 2219 task->do_marking_step(1000000000.0 /* something very large */, 2220 true /* do_stealing */, 2221 true /* do_termination */); 2222 } while (task->has_aborted() && !_cm->has_overflown()); 2223 // If we overflow, then we do not want to restart. We instead 2224 // want to abort remark and do concurrent marking again. 2225 task->record_end_time(); 2226 } 2227 } 2228 2229 CMRemarkTask(ConcurrentMark* cm) : 2230 AbstractGangTask("Par Remark"), _cm(cm) { } 2231 }; 2232 2233 void ConcurrentMark::checkpointRootsFinalWork() { 2234 ResourceMark rm; 2235 HandleMark hm; 2236 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2237 2238 g1h->ensure_parsability(false); 2239 2240 if (G1CollectedHeap::use_parallel_gc_threads()) { 2241 G1CollectedHeap::StrongRootsScope srs(g1h); 2242 // this is remark, so we'll use up all available threads 2243 int active_workers = ParallelGCThreads; 2244 set_phase(active_workers, false /* concurrent */); 2245 2246 CMRemarkTask remarkTask(this); 2247 // We will start all available threads, even if we decide that the 2248 // active_workers will be fewer. The extra ones will just bail out 2249 // immediately. 2250 int n_workers = g1h->workers()->total_workers(); 2251 g1h->set_par_threads(n_workers); 2252 g1h->workers()->run_task(&remarkTask); 2253 g1h->set_par_threads(0); 2254 } else { 2255 G1CollectedHeap::StrongRootsScope srs(g1h); 2256 // this is remark, so we'll use up all available threads 2257 int active_workers = 1; 2258 set_phase(active_workers, false /* concurrent */); 2259 2260 CMRemarkTask remarkTask(this); 2261 // We will start all available threads, even if we decide that the 2262 // active_workers will be fewer. The extra ones will just bail out 2263 // immediately. 2264 remarkTask.work(0); 2265 } 2266 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2267 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2268 2269 print_stats(); 2270 2271 #if VERIFY_OBJS_PROCESSED 2272 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2273 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2274 _scan_obj_cl.objs_processed, 2275 ThreadLocalObjQueue::objs_enqueued); 2276 guarantee(_scan_obj_cl.objs_processed == 2277 ThreadLocalObjQueue::objs_enqueued, 2278 "Different number of objs processed and enqueued."); 2279 } 2280 #endif 2281 } 2282 2283 #ifndef PRODUCT 2284 2285 class PrintReachableOopClosure: public OopClosure { 2286 private: 2287 G1CollectedHeap* _g1h; 2288 CMBitMapRO* _bitmap; 2289 outputStream* _out; 2290 bool _use_prev_marking; 2291 bool _all; 2292 2293 public: 2294 PrintReachableOopClosure(CMBitMapRO* bitmap, 2295 outputStream* out, 2296 bool use_prev_marking, 2297 bool all) : 2298 _g1h(G1CollectedHeap::heap()), 2299 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2300 2301 void do_oop(narrowOop* p) { do_oop_work(p); } 2302 void do_oop( oop* p) { do_oop_work(p); } 2303 2304 template <class T> void do_oop_work(T* p) { 2305 oop obj = oopDesc::load_decode_heap_oop(p); 2306 const char* str = NULL; 2307 const char* str2 = ""; 2308 2309 if (obj == NULL) { 2310 str = ""; 2311 } else if (!_g1h->is_in_g1_reserved(obj)) { 2312 str = " O"; 2313 } else { 2314 HeapRegion* hr = _g1h->heap_region_containing(obj); 2315 guarantee(hr != NULL, "invariant"); 2316 bool over_tams = false; 2317 if (_use_prev_marking) { 2318 over_tams = hr->obj_allocated_since_prev_marking(obj); 2319 } else { 2320 over_tams = hr->obj_allocated_since_next_marking(obj); 2321 } 2322 bool marked = _bitmap->isMarked((HeapWord*) obj); 2323 2324 if (over_tams) { 2325 str = " >"; 2326 if (marked) { 2327 str2 = " AND MARKED"; 2328 } 2329 } else if (marked) { 2330 str = " M"; 2331 } else { 2332 str = " NOT"; 2333 } 2334 } 2335 2336 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2337 p, (void*) obj, str, str2); 2338 } 2339 }; 2340 2341 class PrintReachableObjectClosure : public ObjectClosure { 2342 private: 2343 CMBitMapRO* _bitmap; 2344 outputStream* _out; 2345 bool _use_prev_marking; 2346 bool _all; 2347 HeapRegion* _hr; 2348 2349 public: 2350 PrintReachableObjectClosure(CMBitMapRO* bitmap, 2351 outputStream* out, 2352 bool use_prev_marking, 2353 bool all, 2354 HeapRegion* hr) : 2355 _bitmap(bitmap), _out(out), 2356 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { } 2357 2358 void do_object(oop o) { 2359 bool over_tams; 2360 if (_use_prev_marking) { 2361 over_tams = _hr->obj_allocated_since_prev_marking(o); 2362 } else { 2363 over_tams = _hr->obj_allocated_since_next_marking(o); 2364 } 2365 bool marked = _bitmap->isMarked((HeapWord*) o); 2366 bool print_it = _all || over_tams || marked; 2367 2368 if (print_it) { 2369 _out->print_cr(" "PTR_FORMAT"%s", 2370 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2371 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all); 2372 o->oop_iterate(&oopCl); 2373 } 2374 } 2375 }; 2376 2377 class PrintReachableRegionClosure : public HeapRegionClosure { 2378 private: 2379 CMBitMapRO* _bitmap; 2380 outputStream* _out; 2381 bool _use_prev_marking; 2382 bool _all; 2383 2384 public: 2385 bool doHeapRegion(HeapRegion* hr) { 2386 HeapWord* b = hr->bottom(); 2387 HeapWord* e = hr->end(); 2388 HeapWord* t = hr->top(); 2389 HeapWord* p = NULL; 2390 if (_use_prev_marking) { 2391 p = hr->prev_top_at_mark_start(); 2392 } else { 2393 p = hr->next_top_at_mark_start(); 2394 } 2395 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2396 "TAMS: "PTR_FORMAT, b, e, t, p); 2397 _out->cr(); 2398 2399 HeapWord* from = b; 2400 HeapWord* to = t; 2401 2402 if (to > from) { 2403 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2404 _out->cr(); 2405 PrintReachableObjectClosure ocl(_bitmap, _out, 2406 _use_prev_marking, _all, hr); 2407 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2408 _out->cr(); 2409 } 2410 2411 return false; 2412 } 2413 2414 PrintReachableRegionClosure(CMBitMapRO* bitmap, 2415 outputStream* out, 2416 bool use_prev_marking, 2417 bool all) : 2418 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2419 }; 2420 2421 void ConcurrentMark::print_reachable(const char* str, 2422 bool use_prev_marking, 2423 bool all) { 2424 gclog_or_tty->cr(); 2425 gclog_or_tty->print_cr("== Doing heap dump... "); 2426 2427 if (G1PrintReachableBaseFile == NULL) { 2428 gclog_or_tty->print_cr(" #### error: no base file defined"); 2429 return; 2430 } 2431 2432 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2433 (JVM_MAXPATHLEN - 1)) { 2434 gclog_or_tty->print_cr(" #### error: file name too long"); 2435 return; 2436 } 2437 2438 char file_name[JVM_MAXPATHLEN]; 2439 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2440 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2441 2442 fileStream fout(file_name); 2443 if (!fout.is_open()) { 2444 gclog_or_tty->print_cr(" #### error: could not open file"); 2445 return; 2446 } 2447 2448 outputStream* out = &fout; 2449 2450 CMBitMapRO* bitmap = NULL; 2451 if (use_prev_marking) { 2452 bitmap = _prevMarkBitMap; 2453 } else { 2454 bitmap = _nextMarkBitMap; 2455 } 2456 2457 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS"); 2458 out->cr(); 2459 2460 out->print_cr("--- ITERATING OVER REGIONS"); 2461 out->cr(); 2462 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all); 2463 _g1h->heap_region_iterate(&rcl); 2464 out->cr(); 2465 2466 gclog_or_tty->print_cr(" done"); 2467 gclog_or_tty->flush(); 2468 } 2469 2470 #endif // PRODUCT 2471 2472 // This note is for drainAllSATBBuffers and the code in between. 2473 // In the future we could reuse a task to do this work during an 2474 // evacuation pause (since now tasks are not active and can be claimed 2475 // during an evacuation pause). This was a late change to the code and 2476 // is currently not being taken advantage of. 2477 2478 class CMGlobalObjectClosure : public ObjectClosure { 2479 private: 2480 ConcurrentMark* _cm; 2481 2482 public: 2483 void do_object(oop obj) { 2484 _cm->deal_with_reference(obj); 2485 } 2486 2487 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2488 }; 2489 2490 void ConcurrentMark::deal_with_reference(oop obj) { 2491 if (verbose_high()) 2492 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2493 (void*) obj); 2494 2495 2496 HeapWord* objAddr = (HeapWord*) obj; 2497 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2498 if (_g1h->is_in_g1_reserved(objAddr)) { 2499 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 2500 HeapRegion* hr = _g1h->heap_region_containing(obj); 2501 if (_g1h->is_obj_ill(obj, hr)) { 2502 if (verbose_high()) 2503 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2504 "marked", (void*) obj); 2505 2506 // we need to mark it first 2507 if (_nextMarkBitMap->parMark(objAddr)) { 2508 // No OrderAccess:store_load() is needed. It is implicit in the 2509 // CAS done in parMark(objAddr) above 2510 HeapWord* finger = _finger; 2511 if (objAddr < finger) { 2512 if (verbose_high()) 2513 gclog_or_tty->print_cr("[global] below the global finger " 2514 "("PTR_FORMAT"), pushing it", finger); 2515 if (!mark_stack_push(obj)) { 2516 if (verbose_low()) 2517 gclog_or_tty->print_cr("[global] global stack overflow during " 2518 "deal_with_reference"); 2519 } 2520 } 2521 } 2522 } 2523 } 2524 } 2525 2526 void ConcurrentMark::drainAllSATBBuffers() { 2527 CMGlobalObjectClosure oc(this); 2528 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2529 satb_mq_set.set_closure(&oc); 2530 2531 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2532 if (verbose_medium()) 2533 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2534 } 2535 2536 // no need to check whether we should do this, as this is only 2537 // called during an evacuation pause 2538 satb_mq_set.iterate_closure_all_threads(); 2539 2540 satb_mq_set.set_closure(NULL); 2541 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2542 } 2543 2544 void ConcurrentMark::markPrev(oop p) { 2545 // Note we are overriding the read-only view of the prev map here, via 2546 // the cast. 2547 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2548 } 2549 2550 void ConcurrentMark::clear(oop p) { 2551 assert(p != NULL && p->is_oop(), "expected an oop"); 2552 HeapWord* addr = (HeapWord*)p; 2553 assert(addr >= _nextMarkBitMap->startWord() || 2554 addr < _nextMarkBitMap->endWord(), "in a region"); 2555 2556 _nextMarkBitMap->clear(addr); 2557 } 2558 2559 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2560 // Note we are overriding the read-only view of the prev map here, via 2561 // the cast. 2562 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2563 _nextMarkBitMap->clearRange(mr); 2564 } 2565 2566 HeapRegion* 2567 ConcurrentMark::claim_region(int task_num) { 2568 // "checkpoint" the finger 2569 HeapWord* finger = _finger; 2570 2571 // _heap_end will not change underneath our feet; it only changes at 2572 // yield points. 2573 while (finger < _heap_end) { 2574 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2575 2576 // is the gap between reading the finger and doing the CAS too long? 2577 2578 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2579 HeapWord* bottom = curr_region->bottom(); 2580 HeapWord* end = curr_region->end(); 2581 HeapWord* limit = curr_region->next_top_at_mark_start(); 2582 2583 if (verbose_low()) 2584 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2585 "["PTR_FORMAT", "PTR_FORMAT"), " 2586 "limit = "PTR_FORMAT, 2587 task_num, curr_region, bottom, end, limit); 2588 2589 HeapWord* res = 2590 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2591 if (res == finger) { 2592 // we succeeded 2593 2594 // notice that _finger == end cannot be guaranteed here since, 2595 // someone else might have moved the finger even further 2596 assert(_finger >= end, "the finger should have moved forward"); 2597 2598 if (verbose_low()) 2599 gclog_or_tty->print_cr("[%d] we were successful with region = " 2600 PTR_FORMAT, task_num, curr_region); 2601 2602 if (limit > bottom) { 2603 if (verbose_low()) 2604 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2605 "returning it ", task_num, curr_region); 2606 return curr_region; 2607 } else { 2608 assert(limit == bottom, 2609 "the region limit should be at bottom"); 2610 if (verbose_low()) 2611 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2612 "returning NULL", task_num, curr_region); 2613 // we return NULL and the caller should try calling 2614 // claim_region() again. 2615 return NULL; 2616 } 2617 } else { 2618 assert(_finger > finger, "the finger should have moved forward"); 2619 if (verbose_low()) 2620 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2621 "global finger = "PTR_FORMAT", " 2622 "our finger = "PTR_FORMAT, 2623 task_num, _finger, finger); 2624 2625 // read it again 2626 finger = _finger; 2627 } 2628 } 2629 2630 return NULL; 2631 } 2632 2633 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2634 bool result = false; 2635 for (int i = 0; i < (int)_max_task_num; ++i) { 2636 CMTask* the_task = _tasks[i]; 2637 MemRegion mr = the_task->aborted_region(); 2638 if (mr.start() != NULL) { 2639 assert(mr.end() != NULL, "invariant"); 2640 assert(mr.word_size() > 0, "invariant"); 2641 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2642 assert(hr != NULL, "invariant"); 2643 if (hr->in_collection_set()) { 2644 // The region points into the collection set 2645 the_task->set_aborted_region(MemRegion()); 2646 result = true; 2647 } 2648 } 2649 } 2650 return result; 2651 } 2652 2653 bool ConcurrentMark::has_aborted_regions() { 2654 for (int i = 0; i < (int)_max_task_num; ++i) { 2655 CMTask* the_task = _tasks[i]; 2656 MemRegion mr = the_task->aborted_region(); 2657 if (mr.start() != NULL) { 2658 assert(mr.end() != NULL, "invariant"); 2659 assert(mr.word_size() > 0, "invariant"); 2660 return true; 2661 } 2662 } 2663 return false; 2664 } 2665 2666 void ConcurrentMark::oops_do(OopClosure* cl) { 2667 if (_markStack.size() > 0 && verbose_low()) 2668 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2669 "size = %d", _markStack.size()); 2670 // we first iterate over the contents of the mark stack... 2671 _markStack.oops_do(cl); 2672 2673 for (int i = 0; i < (int)_max_task_num; ++i) { 2674 OopTaskQueue* queue = _task_queues->queue((int)i); 2675 2676 if (queue->size() > 0 && verbose_low()) 2677 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2678 "size = %d", i, queue->size()); 2679 2680 // ...then over the contents of the all the task queues. 2681 queue->oops_do(cl); 2682 } 2683 2684 // Invalidate any entries, that are in the region stack, that 2685 // point into the collection set 2686 if (_regionStack.invalidate_entries_into_cset()) { 2687 // otherwise, any gray objects copied during the evacuation pause 2688 // might not be visited. 2689 assert(_should_gray_objects, "invariant"); 2690 } 2691 2692 // Invalidate any aborted regions, recorded in the individual CM 2693 // tasks, that point into the collection set. 2694 if (invalidate_aborted_regions_in_cset()) { 2695 // otherwise, any gray objects copied during the evacuation pause 2696 // might not be visited. 2697 assert(_should_gray_objects, "invariant"); 2698 } 2699 2700 } 2701 2702 void ConcurrentMark::clear_marking_state() { 2703 _markStack.setEmpty(); 2704 _markStack.clear_overflow(); 2705 _regionStack.setEmpty(); 2706 _regionStack.clear_overflow(); 2707 clear_has_overflown(); 2708 _finger = _heap_start; 2709 2710 for (int i = 0; i < (int)_max_task_num; ++i) { 2711 OopTaskQueue* queue = _task_queues->queue(i); 2712 queue->set_empty(); 2713 // Clear any partial regions from the CMTasks 2714 _tasks[i]->clear_aborted_region(); 2715 } 2716 } 2717 2718 void ConcurrentMark::print_stats() { 2719 if (verbose_stats()) { 2720 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2721 for (size_t i = 0; i < _active_tasks; ++i) { 2722 _tasks[i]->print_stats(); 2723 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2724 } 2725 } 2726 } 2727 2728 class CSMarkOopClosure: public OopClosure { 2729 friend class CSMarkBitMapClosure; 2730 2731 G1CollectedHeap* _g1h; 2732 CMBitMap* _bm; 2733 ConcurrentMark* _cm; 2734 oop* _ms; 2735 jint* _array_ind_stack; 2736 int _ms_size; 2737 int _ms_ind; 2738 int _array_increment; 2739 2740 bool push(oop obj, int arr_ind = 0) { 2741 if (_ms_ind == _ms_size) { 2742 gclog_or_tty->print_cr("Mark stack is full."); 2743 return false; 2744 } 2745 _ms[_ms_ind] = obj; 2746 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; 2747 _ms_ind++; 2748 return true; 2749 } 2750 2751 oop pop() { 2752 if (_ms_ind == 0) return NULL; 2753 else { 2754 _ms_ind--; 2755 return _ms[_ms_ind]; 2756 } 2757 } 2758 2759 template <class T> bool drain() { 2760 while (_ms_ind > 0) { 2761 oop obj = pop(); 2762 assert(obj != NULL, "Since index was non-zero."); 2763 if (obj->is_objArray()) { 2764 jint arr_ind = _array_ind_stack[_ms_ind]; 2765 objArrayOop aobj = objArrayOop(obj); 2766 jint len = aobj->length(); 2767 jint next_arr_ind = arr_ind + _array_increment; 2768 if (next_arr_ind < len) { 2769 push(obj, next_arr_ind); 2770 } 2771 // Now process this portion of this one. 2772 int lim = MIN2(next_arr_ind, len); 2773 for (int j = arr_ind; j < lim; j++) { 2774 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2775 } 2776 2777 } else { 2778 obj->oop_iterate(this); 2779 } 2780 if (abort()) return false; 2781 } 2782 return true; 2783 } 2784 2785 public: 2786 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2787 _g1h(G1CollectedHeap::heap()), 2788 _cm(cm), 2789 _bm(cm->nextMarkBitMap()), 2790 _ms_size(ms_size), _ms_ind(0), 2791 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2792 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2793 _array_increment(MAX2(ms_size/8, 16)) 2794 {} 2795 2796 ~CSMarkOopClosure() { 2797 FREE_C_HEAP_ARRAY(oop, _ms); 2798 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2799 } 2800 2801 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2802 virtual void do_oop( oop* p) { do_oop_work(p); } 2803 2804 template <class T> void do_oop_work(T* p) { 2805 T heap_oop = oopDesc::load_heap_oop(p); 2806 if (oopDesc::is_null(heap_oop)) return; 2807 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2808 if (obj->is_forwarded()) { 2809 // If the object has already been forwarded, we have to make sure 2810 // that it's marked. So follow the forwarding pointer. Note that 2811 // this does the right thing for self-forwarding pointers in the 2812 // evacuation failure case. 2813 obj = obj->forwardee(); 2814 } 2815 HeapRegion* hr = _g1h->heap_region_containing(obj); 2816 if (hr != NULL) { 2817 if (hr->in_collection_set()) { 2818 if (_g1h->is_obj_ill(obj)) { 2819 _bm->mark((HeapWord*)obj); 2820 if (!push(obj)) { 2821 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 2822 set_abort(); 2823 } 2824 } 2825 } else { 2826 // Outside the collection set; we need to gray it 2827 _cm->deal_with_reference(obj); 2828 } 2829 } 2830 } 2831 }; 2832 2833 class CSMarkBitMapClosure: public BitMapClosure { 2834 G1CollectedHeap* _g1h; 2835 CMBitMap* _bitMap; 2836 ConcurrentMark* _cm; 2837 CSMarkOopClosure _oop_cl; 2838 public: 2839 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 2840 _g1h(G1CollectedHeap::heap()), 2841 _bitMap(cm->nextMarkBitMap()), 2842 _oop_cl(cm, ms_size) 2843 {} 2844 2845 ~CSMarkBitMapClosure() {} 2846 2847 bool do_bit(size_t offset) { 2848 // convert offset into a HeapWord* 2849 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 2850 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 2851 "address out of range"); 2852 assert(_bitMap->isMarked(addr), "tautology"); 2853 oop obj = oop(addr); 2854 if (!obj->is_forwarded()) { 2855 if (!_oop_cl.push(obj)) return false; 2856 if (UseCompressedOops) { 2857 if (!_oop_cl.drain<narrowOop>()) return false; 2858 } else { 2859 if (!_oop_cl.drain<oop>()) return false; 2860 } 2861 } 2862 // Otherwise... 2863 return true; 2864 } 2865 }; 2866 2867 2868 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 2869 CMBitMap* _bm; 2870 CSMarkBitMapClosure _bit_cl; 2871 enum SomePrivateConstants { 2872 MSSize = 1000 2873 }; 2874 bool _completed; 2875 public: 2876 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 2877 _bm(cm->nextMarkBitMap()), 2878 _bit_cl(cm, MSSize), 2879 _completed(true) 2880 {} 2881 2882 ~CompleteMarkingInCSHRClosure() {} 2883 2884 bool doHeapRegion(HeapRegion* r) { 2885 if (!r->evacuation_failed()) { 2886 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 2887 if (!mr.is_empty()) { 2888 if (!_bm->iterate(&_bit_cl, mr)) { 2889 _completed = false; 2890 return true; 2891 } 2892 } 2893 } 2894 return false; 2895 } 2896 2897 bool completed() { return _completed; } 2898 }; 2899 2900 class ClearMarksInHRClosure: public HeapRegionClosure { 2901 CMBitMap* _bm; 2902 public: 2903 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 2904 2905 bool doHeapRegion(HeapRegion* r) { 2906 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 2907 MemRegion usedMR = r->used_region(); 2908 _bm->clearRange(r->used_region()); 2909 } 2910 return false; 2911 } 2912 }; 2913 2914 void ConcurrentMark::complete_marking_in_collection_set() { 2915 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2916 2917 if (!g1h->mark_in_progress()) { 2918 g1h->g1_policy()->record_mark_closure_time(0.0); 2919 return; 2920 } 2921 2922 int i = 1; 2923 double start = os::elapsedTime(); 2924 while (true) { 2925 i++; 2926 CompleteMarkingInCSHRClosure cmplt(this); 2927 g1h->collection_set_iterate(&cmplt); 2928 if (cmplt.completed()) break; 2929 } 2930 double end_time = os::elapsedTime(); 2931 double elapsed_time_ms = (end_time - start) * 1000.0; 2932 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 2933 2934 ClearMarksInHRClosure clr(nextMarkBitMap()); 2935 g1h->collection_set_iterate(&clr); 2936 } 2937 2938 // The next two methods deal with the following optimisation. Some 2939 // objects are gray by being marked and located above the finger. If 2940 // they are copied, during an evacuation pause, below the finger then 2941 // the need to be pushed on the stack. The observation is that, if 2942 // there are no regions in the collection set located above the 2943 // finger, then the above cannot happen, hence we do not need to 2944 // explicitly gray any objects when copying them to below the 2945 // finger. The global stack will be scanned to ensure that, if it 2946 // points to objects being copied, it will update their 2947 // location. There is a tricky situation with the gray objects in 2948 // region stack that are being coped, however. See the comment in 2949 // newCSet(). 2950 2951 void ConcurrentMark::newCSet() { 2952 if (!concurrent_marking_in_progress()) 2953 // nothing to do if marking is not in progress 2954 return; 2955 2956 // find what the lowest finger is among the global and local fingers 2957 _min_finger = _finger; 2958 for (int i = 0; i < (int)_max_task_num; ++i) { 2959 CMTask* task = _tasks[i]; 2960 HeapWord* task_finger = task->finger(); 2961 if (task_finger != NULL && task_finger < _min_finger) 2962 _min_finger = task_finger; 2963 } 2964 2965 _should_gray_objects = false; 2966 2967 // This fixes a very subtle and fustrating bug. It might be the case 2968 // that, during en evacuation pause, heap regions that contain 2969 // objects that are gray (by being in regions contained in the 2970 // region stack) are included in the collection set. Since such gray 2971 // objects will be moved, and because it's not easy to redirect 2972 // region stack entries to point to a new location (because objects 2973 // in one region might be scattered to multiple regions after they 2974 // are copied), one option is to ensure that all marked objects 2975 // copied during a pause are pushed on the stack. Notice, however, 2976 // that this problem can only happen when the region stack is not 2977 // empty during an evacuation pause. So, we make the fix a bit less 2978 // conservative and ensure that regions are pushed on the stack, 2979 // irrespective whether all collection set regions are below the 2980 // finger, if the region stack is not empty. This is expected to be 2981 // a rare case, so I don't think it's necessary to be smarted about it. 2982 if (!region_stack_empty() || has_aborted_regions()) 2983 _should_gray_objects = true; 2984 } 2985 2986 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 2987 if (!concurrent_marking_in_progress()) 2988 return; 2989 2990 HeapWord* region_end = hr->end(); 2991 if (region_end > _min_finger) 2992 _should_gray_objects = true; 2993 } 2994 2995 // abandon current marking iteration due to a Full GC 2996 void ConcurrentMark::abort() { 2997 // Clear all marks to force marking thread to do nothing 2998 _nextMarkBitMap->clearAll(); 2999 // Empty mark stack 3000 clear_marking_state(); 3001 for (int i = 0; i < (int)_max_task_num; ++i) { 3002 _tasks[i]->clear_region_fields(); 3003 } 3004 _has_aborted = true; 3005 3006 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3007 satb_mq_set.abandon_partial_marking(); 3008 // This can be called either during or outside marking, we'll read 3009 // the expected_active value from the SATB queue set. 3010 satb_mq_set.set_active_all_threads( 3011 false, /* new active value */ 3012 satb_mq_set.is_active() /* expected_active */); 3013 } 3014 3015 static void print_ms_time_info(const char* prefix, const char* name, 3016 NumberSeq& ns) { 3017 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3018 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3019 if (ns.num() > 0) { 3020 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3021 prefix, ns.sd(), ns.maximum()); 3022 } 3023 } 3024 3025 void ConcurrentMark::print_summary_info() { 3026 gclog_or_tty->print_cr(" Concurrent marking:"); 3027 print_ms_time_info(" ", "init marks", _init_times); 3028 print_ms_time_info(" ", "remarks", _remark_times); 3029 { 3030 print_ms_time_info(" ", "final marks", _remark_mark_times); 3031 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3032 3033 } 3034 print_ms_time_info(" ", "cleanups", _cleanup_times); 3035 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3036 _total_counting_time, 3037 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3038 (double)_cleanup_times.num() 3039 : 0.0)); 3040 if (G1ScrubRemSets) { 3041 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3042 _total_rs_scrub_time, 3043 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3044 (double)_cleanup_times.num() 3045 : 0.0)); 3046 } 3047 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3048 (_init_times.sum() + _remark_times.sum() + 3049 _cleanup_times.sum())/1000.0); 3050 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3051 "(%8.2f s marking, %8.2f s counting).", 3052 cmThread()->vtime_accum(), 3053 cmThread()->vtime_mark_accum(), 3054 cmThread()->vtime_count_accum()); 3055 } 3056 3057 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3058 _parallel_workers->print_worker_threads_on(st); 3059 } 3060 3061 // Closures 3062 // XXX: there seems to be a lot of code duplication here; 3063 // should refactor and consolidate the shared code. 3064 3065 // This closure is used to mark refs into the CMS generation in 3066 // the CMS bit map. Called at the first checkpoint. 3067 3068 // We take a break if someone is trying to stop the world. 3069 bool ConcurrentMark::do_yield_check(int worker_i) { 3070 if (should_yield()) { 3071 if (worker_i == 0) 3072 _g1h->g1_policy()->record_concurrent_pause(); 3073 cmThread()->yield(); 3074 if (worker_i == 0) 3075 _g1h->g1_policy()->record_concurrent_pause_end(); 3076 return true; 3077 } else { 3078 return false; 3079 } 3080 } 3081 3082 bool ConcurrentMark::should_yield() { 3083 return cmThread()->should_yield(); 3084 } 3085 3086 bool ConcurrentMark::containing_card_is_marked(void* p) { 3087 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3088 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3089 } 3090 3091 bool ConcurrentMark::containing_cards_are_marked(void* start, 3092 void* last) { 3093 return 3094 containing_card_is_marked(start) && 3095 containing_card_is_marked(last); 3096 } 3097 3098 #ifndef PRODUCT 3099 // for debugging purposes 3100 void ConcurrentMark::print_finger() { 3101 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3102 _heap_start, _heap_end, _finger); 3103 for (int i = 0; i < (int) _max_task_num; ++i) { 3104 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3105 } 3106 gclog_or_tty->print_cr(""); 3107 } 3108 #endif 3109 3110 // Closure for iteration over bitmaps 3111 class CMBitMapClosure : public BitMapClosure { 3112 private: 3113 // the bitmap that is being iterated over 3114 CMBitMap* _nextMarkBitMap; 3115 ConcurrentMark* _cm; 3116 CMTask* _task; 3117 // true if we're scanning a heap region claimed by the task (so that 3118 // we move the finger along), false if we're not, i.e. currently when 3119 // scanning a heap region popped from the region stack (so that we 3120 // do not move the task finger along; it'd be a mistake if we did so). 3121 bool _scanning_heap_region; 3122 3123 public: 3124 CMBitMapClosure(CMTask *task, 3125 ConcurrentMark* cm, 3126 CMBitMap* nextMarkBitMap) 3127 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3128 3129 void set_scanning_heap_region(bool scanning_heap_region) { 3130 _scanning_heap_region = scanning_heap_region; 3131 } 3132 3133 bool do_bit(size_t offset) { 3134 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3135 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3136 assert( addr < _cm->finger(), "invariant"); 3137 3138 if (_scanning_heap_region) { 3139 statsOnly( _task->increase_objs_found_on_bitmap() ); 3140 assert(addr >= _task->finger(), "invariant"); 3141 // We move that task's local finger along. 3142 _task->move_finger_to(addr); 3143 } else { 3144 // We move the task's region finger along. 3145 _task->move_region_finger_to(addr); 3146 } 3147 3148 _task->scan_object(oop(addr)); 3149 // we only partially drain the local queue and global stack 3150 _task->drain_local_queue(true); 3151 _task->drain_global_stack(true); 3152 3153 // if the has_aborted flag has been raised, we need to bail out of 3154 // the iteration 3155 return !_task->has_aborted(); 3156 } 3157 }; 3158 3159 // Closure for iterating over objects, currently only used for 3160 // processing SATB buffers. 3161 class CMObjectClosure : public ObjectClosure { 3162 private: 3163 CMTask* _task; 3164 3165 public: 3166 void do_object(oop obj) { 3167 _task->deal_with_reference(obj); 3168 } 3169 3170 CMObjectClosure(CMTask* task) : _task(task) { } 3171 }; 3172 3173 // Closure for iterating over object fields 3174 class CMOopClosure : public OopClosure { 3175 private: 3176 G1CollectedHeap* _g1h; 3177 ConcurrentMark* _cm; 3178 CMTask* _task; 3179 3180 public: 3181 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 3182 virtual void do_oop( oop* p) { do_oop_work(p); } 3183 3184 template <class T> void do_oop_work(T* p) { 3185 assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant"); 3186 assert(!_g1h->is_on_master_free_list( 3187 _g1h->heap_region_containing((HeapWord*) p)), "invariant"); 3188 3189 oop obj = oopDesc::load_decode_heap_oop(p); 3190 if (_cm->verbose_high()) 3191 gclog_or_tty->print_cr("[%d] we're looking at location " 3192 "*"PTR_FORMAT" = "PTR_FORMAT, 3193 _task->task_id(), p, (void*) obj); 3194 _task->deal_with_reference(obj); 3195 } 3196 3197 CMOopClosure(G1CollectedHeap* g1h, 3198 ConcurrentMark* cm, 3199 CMTask* task) 3200 : _g1h(g1h), _cm(cm), _task(task) 3201 { 3202 _ref_processor = g1h->ref_processor(); 3203 assert(_ref_processor != NULL, "should not be NULL"); 3204 } 3205 }; 3206 3207 void CMTask::setup_for_region(HeapRegion* hr) { 3208 // Separated the asserts so that we know which one fires. 3209 assert(hr != NULL, 3210 "claim_region() should have filtered out continues humongous regions"); 3211 assert(!hr->continuesHumongous(), 3212 "claim_region() should have filtered out continues humongous regions"); 3213 3214 if (_cm->verbose_low()) 3215 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3216 _task_id, hr); 3217 3218 _curr_region = hr; 3219 _finger = hr->bottom(); 3220 update_region_limit(); 3221 } 3222 3223 void CMTask::update_region_limit() { 3224 HeapRegion* hr = _curr_region; 3225 HeapWord* bottom = hr->bottom(); 3226 HeapWord* limit = hr->next_top_at_mark_start(); 3227 3228 if (limit == bottom) { 3229 if (_cm->verbose_low()) 3230 gclog_or_tty->print_cr("[%d] found an empty region " 3231 "["PTR_FORMAT", "PTR_FORMAT")", 3232 _task_id, bottom, limit); 3233 // The region was collected underneath our feet. 3234 // We set the finger to bottom to ensure that the bitmap 3235 // iteration that will follow this will not do anything. 3236 // (this is not a condition that holds when we set the region up, 3237 // as the region is not supposed to be empty in the first place) 3238 _finger = bottom; 3239 } else if (limit >= _region_limit) { 3240 assert(limit >= _finger, "peace of mind"); 3241 } else { 3242 assert(limit < _region_limit, "only way to get here"); 3243 // This can happen under some pretty unusual circumstances. An 3244 // evacuation pause empties the region underneath our feet (NTAMS 3245 // at bottom). We then do some allocation in the region (NTAMS 3246 // stays at bottom), followed by the region being used as a GC 3247 // alloc region (NTAMS will move to top() and the objects 3248 // originally below it will be grayed). All objects now marked in 3249 // the region are explicitly grayed, if below the global finger, 3250 // and we do not need in fact to scan anything else. So, we simply 3251 // set _finger to be limit to ensure that the bitmap iteration 3252 // doesn't do anything. 3253 _finger = limit; 3254 } 3255 3256 _region_limit = limit; 3257 } 3258 3259 void CMTask::giveup_current_region() { 3260 assert(_curr_region != NULL, "invariant"); 3261 if (_cm->verbose_low()) 3262 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3263 _task_id, _curr_region); 3264 clear_region_fields(); 3265 } 3266 3267 void CMTask::clear_region_fields() { 3268 // Values for these three fields that indicate that we're not 3269 // holding on to a region. 3270 _curr_region = NULL; 3271 _finger = NULL; 3272 _region_limit = NULL; 3273 3274 _region_finger = NULL; 3275 } 3276 3277 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3278 guarantee(nextMarkBitMap != NULL, "invariant"); 3279 3280 if (_cm->verbose_low()) 3281 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3282 3283 _nextMarkBitMap = nextMarkBitMap; 3284 clear_region_fields(); 3285 assert(_aborted_region.is_empty(), "should have been cleared"); 3286 3287 _calls = 0; 3288 _elapsed_time_ms = 0.0; 3289 _termination_time_ms = 0.0; 3290 _termination_start_time_ms = 0.0; 3291 3292 #if _MARKING_STATS_ 3293 _local_pushes = 0; 3294 _local_pops = 0; 3295 _local_max_size = 0; 3296 _objs_scanned = 0; 3297 _global_pushes = 0; 3298 _global_pops = 0; 3299 _global_max_size = 0; 3300 _global_transfers_to = 0; 3301 _global_transfers_from = 0; 3302 _region_stack_pops = 0; 3303 _regions_claimed = 0; 3304 _objs_found_on_bitmap = 0; 3305 _satb_buffers_processed = 0; 3306 _steal_attempts = 0; 3307 _steals = 0; 3308 _aborted = 0; 3309 _aborted_overflow = 0; 3310 _aborted_cm_aborted = 0; 3311 _aborted_yield = 0; 3312 _aborted_timed_out = 0; 3313 _aborted_satb = 0; 3314 _aborted_termination = 0; 3315 #endif // _MARKING_STATS_ 3316 } 3317 3318 bool CMTask::should_exit_termination() { 3319 regular_clock_call(); 3320 // This is called when we are in the termination protocol. We should 3321 // quit if, for some reason, this task wants to abort or the global 3322 // stack is not empty (this means that we can get work from it). 3323 return !_cm->mark_stack_empty() || has_aborted(); 3324 } 3325 3326 // This determines whether the method below will check both the local 3327 // and global fingers when determining whether to push on the stack a 3328 // gray object (value 1) or whether it will only check the global one 3329 // (value 0). The tradeoffs are that the former will be a bit more 3330 // accurate and possibly push less on the stack, but it might also be 3331 // a little bit slower. 3332 3333 #define _CHECK_BOTH_FINGERS_ 1 3334 3335 void CMTask::deal_with_reference(oop obj) { 3336 if (_cm->verbose_high()) 3337 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, 3338 _task_id, (void*) obj); 3339 3340 ++_refs_reached; 3341 3342 HeapWord* objAddr = (HeapWord*) obj; 3343 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 3344 if (_g1h->is_in_g1_reserved(objAddr)) { 3345 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 3346 HeapRegion* hr = _g1h->heap_region_containing(obj); 3347 if (_g1h->is_obj_ill(obj, hr)) { 3348 if (_cm->verbose_high()) 3349 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", 3350 _task_id, (void*) obj); 3351 3352 // we need to mark it first 3353 if (_nextMarkBitMap->parMark(objAddr)) { 3354 // No OrderAccess:store_load() is needed. It is implicit in the 3355 // CAS done in parMark(objAddr) above 3356 HeapWord* global_finger = _cm->finger(); 3357 3358 #if _CHECK_BOTH_FINGERS_ 3359 // we will check both the local and global fingers 3360 3361 if (_finger != NULL && objAddr < _finger) { 3362 if (_cm->verbose_high()) 3363 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " 3364 "pushing it", _task_id, _finger); 3365 push(obj); 3366 } else if (_curr_region != NULL && objAddr < _region_limit) { 3367 // do nothing 3368 } else if (objAddr < global_finger) { 3369 // Notice that the global finger might be moving forward 3370 // concurrently. This is not a problem. In the worst case, we 3371 // mark the object while it is above the global finger and, by 3372 // the time we read the global finger, it has moved forward 3373 // passed this object. In this case, the object will probably 3374 // be visited when a task is scanning the region and will also 3375 // be pushed on the stack. So, some duplicate work, but no 3376 // correctness problems. 3377 3378 if (_cm->verbose_high()) 3379 gclog_or_tty->print_cr("[%d] below the global finger " 3380 "("PTR_FORMAT"), pushing it", 3381 _task_id, global_finger); 3382 push(obj); 3383 } else { 3384 // do nothing 3385 } 3386 #else // _CHECK_BOTH_FINGERS_ 3387 // we will only check the global finger 3388 3389 if (objAddr < global_finger) { 3390 // see long comment above 3391 3392 if (_cm->verbose_high()) 3393 gclog_or_tty->print_cr("[%d] below the global finger " 3394 "("PTR_FORMAT"), pushing it", 3395 _task_id, global_finger); 3396 push(obj); 3397 } 3398 #endif // _CHECK_BOTH_FINGERS_ 3399 } 3400 } 3401 } 3402 } 3403 3404 void CMTask::push(oop obj) { 3405 HeapWord* objAddr = (HeapWord*) obj; 3406 assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); 3407 assert(!_g1h->is_on_master_free_list( 3408 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant"); 3409 assert(!_g1h->is_obj_ill(obj), "invariant"); 3410 assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); 3411 3412 if (_cm->verbose_high()) 3413 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); 3414 3415 if (!_task_queue->push(obj)) { 3416 // The local task queue looks full. We need to push some entries 3417 // to the global stack. 3418 3419 if (_cm->verbose_medium()) 3420 gclog_or_tty->print_cr("[%d] task queue overflow, " 3421 "moving entries to the global stack", 3422 _task_id); 3423 move_entries_to_global_stack(); 3424 3425 // this should succeed since, even if we overflow the global 3426 // stack, we should have definitely removed some entries from the 3427 // local queue. So, there must be space on it. 3428 bool success = _task_queue->push(obj); 3429 assert(success, "invariant"); 3430 } 3431 3432 statsOnly( int tmp_size = _task_queue->size(); 3433 if (tmp_size > _local_max_size) 3434 _local_max_size = tmp_size; 3435 ++_local_pushes ); 3436 } 3437 3438 void CMTask::reached_limit() { 3439 assert(_words_scanned >= _words_scanned_limit || 3440 _refs_reached >= _refs_reached_limit , 3441 "shouldn't have been called otherwise"); 3442 regular_clock_call(); 3443 } 3444 3445 void CMTask::regular_clock_call() { 3446 if (has_aborted()) 3447 return; 3448 3449 // First, we need to recalculate the words scanned and refs reached 3450 // limits for the next clock call. 3451 recalculate_limits(); 3452 3453 // During the regular clock call we do the following 3454 3455 // (1) If an overflow has been flagged, then we abort. 3456 if (_cm->has_overflown()) { 3457 set_has_aborted(); 3458 return; 3459 } 3460 3461 // If we are not concurrent (i.e. we're doing remark) we don't need 3462 // to check anything else. The other steps are only needed during 3463 // the concurrent marking phase. 3464 if (!concurrent()) 3465 return; 3466 3467 // (2) If marking has been aborted for Full GC, then we also abort. 3468 if (_cm->has_aborted()) { 3469 set_has_aborted(); 3470 statsOnly( ++_aborted_cm_aborted ); 3471 return; 3472 } 3473 3474 double curr_time_ms = os::elapsedVTime() * 1000.0; 3475 3476 // (3) If marking stats are enabled, then we update the step history. 3477 #if _MARKING_STATS_ 3478 if (_words_scanned >= _words_scanned_limit) 3479 ++_clock_due_to_scanning; 3480 if (_refs_reached >= _refs_reached_limit) 3481 ++_clock_due_to_marking; 3482 3483 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3484 _interval_start_time_ms = curr_time_ms; 3485 _all_clock_intervals_ms.add(last_interval_ms); 3486 3487 if (_cm->verbose_medium()) { 3488 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3489 "scanned = %d%s, refs reached = %d%s", 3490 _task_id, last_interval_ms, 3491 _words_scanned, 3492 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3493 _refs_reached, 3494 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3495 } 3496 #endif // _MARKING_STATS_ 3497 3498 // (4) We check whether we should yield. If we have to, then we abort. 3499 if (_cm->should_yield()) { 3500 // We should yield. To do this we abort the task. The caller is 3501 // responsible for yielding. 3502 set_has_aborted(); 3503 statsOnly( ++_aborted_yield ); 3504 return; 3505 } 3506 3507 // (5) We check whether we've reached our time quota. If we have, 3508 // then we abort. 3509 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3510 if (elapsed_time_ms > _time_target_ms) { 3511 set_has_aborted(); 3512 _has_timed_out = true; 3513 statsOnly( ++_aborted_timed_out ); 3514 return; 3515 } 3516 3517 // (6) Finally, we check whether there are enough completed STAB 3518 // buffers available for processing. If there are, we abort. 3519 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3520 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3521 if (_cm->verbose_low()) 3522 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3523 _task_id); 3524 // we do need to process SATB buffers, we'll abort and restart 3525 // the marking task to do so 3526 set_has_aborted(); 3527 statsOnly( ++_aborted_satb ); 3528 return; 3529 } 3530 } 3531 3532 void CMTask::recalculate_limits() { 3533 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3534 _words_scanned_limit = _real_words_scanned_limit; 3535 3536 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3537 _refs_reached_limit = _real_refs_reached_limit; 3538 } 3539 3540 void CMTask::decrease_limits() { 3541 // This is called when we believe that we're going to do an infrequent 3542 // operation which will increase the per byte scanned cost (i.e. move 3543 // entries to/from the global stack). It basically tries to decrease the 3544 // scanning limit so that the clock is called earlier. 3545 3546 if (_cm->verbose_medium()) 3547 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3548 3549 _words_scanned_limit = _real_words_scanned_limit - 3550 3 * words_scanned_period / 4; 3551 _refs_reached_limit = _real_refs_reached_limit - 3552 3 * refs_reached_period / 4; 3553 } 3554 3555 void CMTask::move_entries_to_global_stack() { 3556 // local array where we'll store the entries that will be popped 3557 // from the local queue 3558 oop buffer[global_stack_transfer_size]; 3559 3560 int n = 0; 3561 oop obj; 3562 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3563 buffer[n] = obj; 3564 ++n; 3565 } 3566 3567 if (n > 0) { 3568 // we popped at least one entry from the local queue 3569 3570 statsOnly( ++_global_transfers_to; _local_pops += n ); 3571 3572 if (!_cm->mark_stack_push(buffer, n)) { 3573 if (_cm->verbose_low()) 3574 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); 3575 set_has_aborted(); 3576 } else { 3577 // the transfer was successful 3578 3579 if (_cm->verbose_medium()) 3580 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3581 _task_id, n); 3582 statsOnly( int tmp_size = _cm->mark_stack_size(); 3583 if (tmp_size > _global_max_size) 3584 _global_max_size = tmp_size; 3585 _global_pushes += n ); 3586 } 3587 } 3588 3589 // this operation was quite expensive, so decrease the limits 3590 decrease_limits(); 3591 } 3592 3593 void CMTask::get_entries_from_global_stack() { 3594 // local array where we'll store the entries that will be popped 3595 // from the global stack. 3596 oop buffer[global_stack_transfer_size]; 3597 int n; 3598 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3599 assert(n <= global_stack_transfer_size, 3600 "we should not pop more than the given limit"); 3601 if (n > 0) { 3602 // yes, we did actually pop at least one entry 3603 3604 statsOnly( ++_global_transfers_from; _global_pops += n ); 3605 if (_cm->verbose_medium()) 3606 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3607 _task_id, n); 3608 for (int i = 0; i < n; ++i) { 3609 bool success = _task_queue->push(buffer[i]); 3610 // We only call this when the local queue is empty or under a 3611 // given target limit. So, we do not expect this push to fail. 3612 assert(success, "invariant"); 3613 } 3614 3615 statsOnly( int tmp_size = _task_queue->size(); 3616 if (tmp_size > _local_max_size) 3617 _local_max_size = tmp_size; 3618 _local_pushes += n ); 3619 } 3620 3621 // this operation was quite expensive, so decrease the limits 3622 decrease_limits(); 3623 } 3624 3625 void CMTask::drain_local_queue(bool partially) { 3626 if (has_aborted()) 3627 return; 3628 3629 // Decide what the target size is, depending whether we're going to 3630 // drain it partially (so that other tasks can steal if they run out 3631 // of things to do) or totally (at the very end). 3632 size_t target_size; 3633 if (partially) 3634 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3635 else 3636 target_size = 0; 3637 3638 if (_task_queue->size() > target_size) { 3639 if (_cm->verbose_high()) 3640 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3641 _task_id, target_size); 3642 3643 oop obj; 3644 bool ret = _task_queue->pop_local(obj); 3645 while (ret) { 3646 statsOnly( ++_local_pops ); 3647 3648 if (_cm->verbose_high()) 3649 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3650 (void*) obj); 3651 3652 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3653 assert(!_g1h->is_on_master_free_list( 3654 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3655 3656 scan_object(obj); 3657 3658 if (_task_queue->size() <= target_size || has_aborted()) 3659 ret = false; 3660 else 3661 ret = _task_queue->pop_local(obj); 3662 } 3663 3664 if (_cm->verbose_high()) 3665 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3666 _task_id, _task_queue->size()); 3667 } 3668 } 3669 3670 void CMTask::drain_global_stack(bool partially) { 3671 if (has_aborted()) 3672 return; 3673 3674 // We have a policy to drain the local queue before we attempt to 3675 // drain the global stack. 3676 assert(partially || _task_queue->size() == 0, "invariant"); 3677 3678 // Decide what the target size is, depending whether we're going to 3679 // drain it partially (so that other tasks can steal if they run out 3680 // of things to do) or totally (at the very end). Notice that, 3681 // because we move entries from the global stack in chunks or 3682 // because another task might be doing the same, we might in fact 3683 // drop below the target. But, this is not a problem. 3684 size_t target_size; 3685 if (partially) 3686 target_size = _cm->partial_mark_stack_size_target(); 3687 else 3688 target_size = 0; 3689 3690 if (_cm->mark_stack_size() > target_size) { 3691 if (_cm->verbose_low()) 3692 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3693 _task_id, target_size); 3694 3695 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3696 get_entries_from_global_stack(); 3697 drain_local_queue(partially); 3698 } 3699 3700 if (_cm->verbose_low()) 3701 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3702 _task_id, _cm->mark_stack_size()); 3703 } 3704 } 3705 3706 // SATB Queue has several assumptions on whether to call the par or 3707 // non-par versions of the methods. this is why some of the code is 3708 // replicated. We should really get rid of the single-threaded version 3709 // of the code to simplify things. 3710 void CMTask::drain_satb_buffers() { 3711 if (has_aborted()) 3712 return; 3713 3714 // We set this so that the regular clock knows that we're in the 3715 // middle of draining buffers and doesn't set the abort flag when it 3716 // notices that SATB buffers are available for draining. It'd be 3717 // very counter productive if it did that. :-) 3718 _draining_satb_buffers = true; 3719 3720 CMObjectClosure oc(this); 3721 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3722 if (G1CollectedHeap::use_parallel_gc_threads()) 3723 satb_mq_set.set_par_closure(_task_id, &oc); 3724 else 3725 satb_mq_set.set_closure(&oc); 3726 3727 // This keeps claiming and applying the closure to completed buffers 3728 // until we run out of buffers or we need to abort. 3729 if (G1CollectedHeap::use_parallel_gc_threads()) { 3730 while (!has_aborted() && 3731 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3732 if (_cm->verbose_medium()) 3733 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3734 statsOnly( ++_satb_buffers_processed ); 3735 regular_clock_call(); 3736 } 3737 } else { 3738 while (!has_aborted() && 3739 satb_mq_set.apply_closure_to_completed_buffer()) { 3740 if (_cm->verbose_medium()) 3741 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3742 statsOnly( ++_satb_buffers_processed ); 3743 regular_clock_call(); 3744 } 3745 } 3746 3747 if (!concurrent() && !has_aborted()) { 3748 // We should only do this during remark. 3749 if (G1CollectedHeap::use_parallel_gc_threads()) 3750 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3751 else 3752 satb_mq_set.iterate_closure_all_threads(); 3753 } 3754 3755 _draining_satb_buffers = false; 3756 3757 assert(has_aborted() || 3758 concurrent() || 3759 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3760 3761 if (G1CollectedHeap::use_parallel_gc_threads()) 3762 satb_mq_set.set_par_closure(_task_id, NULL); 3763 else 3764 satb_mq_set.set_closure(NULL); 3765 3766 // again, this was a potentially expensive operation, decrease the 3767 // limits to get the regular clock call early 3768 decrease_limits(); 3769 } 3770 3771 void CMTask::drain_region_stack(BitMapClosure* bc) { 3772 if (has_aborted()) 3773 return; 3774 3775 assert(_region_finger == NULL, 3776 "it should be NULL when we're not scanning a region"); 3777 3778 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3779 if (_cm->verbose_low()) 3780 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3781 _task_id, _cm->region_stack_size()); 3782 3783 MemRegion mr; 3784 3785 if (!_aborted_region.is_empty()) { 3786 mr = _aborted_region; 3787 _aborted_region = MemRegion(); 3788 3789 if (_cm->verbose_low()) 3790 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )", 3791 _task_id, mr.start(), mr.end()); 3792 } else { 3793 mr = _cm->region_stack_pop_lock_free(); 3794 // it returns MemRegion() if the pop fails 3795 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3796 } 3797 3798 while (mr.start() != NULL) { 3799 if (_cm->verbose_medium()) 3800 gclog_or_tty->print_cr("[%d] we are scanning region " 3801 "["PTR_FORMAT", "PTR_FORMAT")", 3802 _task_id, mr.start(), mr.end()); 3803 3804 assert(mr.end() <= _cm->finger(), 3805 "otherwise the region shouldn't be on the stack"); 3806 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3807 if (_nextMarkBitMap->iterate(bc, mr)) { 3808 assert(!has_aborted(), 3809 "cannot abort the task without aborting the bitmap iteration"); 3810 3811 // We finished iterating over the region without aborting. 3812 regular_clock_call(); 3813 if (has_aborted()) 3814 mr = MemRegion(); 3815 else { 3816 mr = _cm->region_stack_pop_lock_free(); 3817 // it returns MemRegion() if the pop fails 3818 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3819 } 3820 } else { 3821 assert(has_aborted(), "currently the only way to do so"); 3822 3823 // The only way to abort the bitmap iteration is to return 3824 // false from the do_bit() method. However, inside the 3825 // do_bit() method we move the _region_finger to point to the 3826 // object currently being looked at. So, if we bail out, we 3827 // have definitely set _region_finger to something non-null. 3828 assert(_region_finger != NULL, "invariant"); 3829 3830 // Make sure that any previously aborted region has been 3831 // cleared. 3832 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3833 3834 // The iteration was actually aborted. So now _region_finger 3835 // points to the address of the object we last scanned. If we 3836 // leave it there, when we restart this task, we will rescan 3837 // the object. It is easy to avoid this. We move the finger by 3838 // enough to point to the next possible object header (the 3839 // bitmap knows by how much we need to move it as it knows its 3840 // granularity). 3841 MemRegion newRegion = 3842 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3843 3844 if (!newRegion.is_empty()) { 3845 if (_cm->verbose_low()) { 3846 gclog_or_tty->print_cr("[%d] recording unscanned region" 3847 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3848 _task_id, 3849 newRegion.start(), newRegion.end()); 3850 } 3851 // Now record the part of the region we didn't scan to 3852 // make sure this task scans it later. 3853 _aborted_region = newRegion; 3854 } 3855 // break from while 3856 mr = MemRegion(); 3857 } 3858 _region_finger = NULL; 3859 } 3860 3861 if (_cm->verbose_low()) 3862 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3863 _task_id, _cm->region_stack_size()); 3864 } 3865 } 3866 3867 void CMTask::print_stats() { 3868 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3869 _task_id, _calls); 3870 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3871 _elapsed_time_ms, _termination_time_ms); 3872 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3873 _step_times_ms.num(), _step_times_ms.avg(), 3874 _step_times_ms.sd()); 3875 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3876 _step_times_ms.maximum(), _step_times_ms.sum()); 3877 3878 #if _MARKING_STATS_ 3879 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3880 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3881 _all_clock_intervals_ms.sd()); 3882 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3883 _all_clock_intervals_ms.maximum(), 3884 _all_clock_intervals_ms.sum()); 3885 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3886 _clock_due_to_scanning, _clock_due_to_marking); 3887 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3888 _objs_scanned, _objs_found_on_bitmap); 3889 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3890 _local_pushes, _local_pops, _local_max_size); 3891 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3892 _global_pushes, _global_pops, _global_max_size); 3893 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3894 _global_transfers_to,_global_transfers_from); 3895 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 3896 _regions_claimed, _region_stack_pops); 3897 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3898 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3899 _steal_attempts, _steals); 3900 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3901 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3902 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3903 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3904 _aborted_timed_out, _aborted_satb, _aborted_termination); 3905 #endif // _MARKING_STATS_ 3906 } 3907 3908 /***************************************************************************** 3909 3910 The do_marking_step(time_target_ms) method is the building block 3911 of the parallel marking framework. It can be called in parallel 3912 with other invocations of do_marking_step() on different tasks 3913 (but only one per task, obviously) and concurrently with the 3914 mutator threads, or during remark, hence it eliminates the need 3915 for two versions of the code. When called during remark, it will 3916 pick up from where the task left off during the concurrent marking 3917 phase. Interestingly, tasks are also claimable during evacuation 3918 pauses too, since do_marking_step() ensures that it aborts before 3919 it needs to yield. 3920 3921 The data structures that is uses to do marking work are the 3922 following: 3923 3924 (1) Marking Bitmap. If there are gray objects that appear only 3925 on the bitmap (this happens either when dealing with an overflow 3926 or when the initial marking phase has simply marked the roots 3927 and didn't push them on the stack), then tasks claim heap 3928 regions whose bitmap they then scan to find gray objects. A 3929 global finger indicates where the end of the last claimed region 3930 is. A local finger indicates how far into the region a task has 3931 scanned. The two fingers are used to determine how to gray an 3932 object (i.e. whether simply marking it is OK, as it will be 3933 visited by a task in the future, or whether it needs to be also 3934 pushed on a stack). 3935 3936 (2) Local Queue. The local queue of the task which is accessed 3937 reasonably efficiently by the task. Other tasks can steal from 3938 it when they run out of work. Throughout the marking phase, a 3939 task attempts to keep its local queue short but not totally 3940 empty, so that entries are available for stealing by other 3941 tasks. Only when there is no more work, a task will totally 3942 drain its local queue. 3943 3944 (3) Global Mark Stack. This handles local queue overflow. During 3945 marking only sets of entries are moved between it and the local 3946 queues, as access to it requires a mutex and more fine-grain 3947 interaction with it which might cause contention. If it 3948 overflows, then the marking phase should restart and iterate 3949 over the bitmap to identify gray objects. Throughout the marking 3950 phase, tasks attempt to keep the global mark stack at a small 3951 length but not totally empty, so that entries are available for 3952 popping by other tasks. Only when there is no more work, tasks 3953 will totally drain the global mark stack. 3954 3955 (4) Global Region Stack. Entries on it correspond to areas of 3956 the bitmap that need to be scanned since they contain gray 3957 objects. Pushes on the region stack only happen during 3958 evacuation pauses and typically correspond to areas covered by 3959 GC LABS. If it overflows, then the marking phase should restart 3960 and iterate over the bitmap to identify gray objects. Tasks will 3961 try to totally drain the region stack as soon as possible. 3962 3963 (5) SATB Buffer Queue. This is where completed SATB buffers are 3964 made available. Buffers are regularly removed from this queue 3965 and scanned for roots, so that the queue doesn't get too 3966 long. During remark, all completed buffers are processed, as 3967 well as the filled in parts of any uncompleted buffers. 3968 3969 The do_marking_step() method tries to abort when the time target 3970 has been reached. There are a few other cases when the 3971 do_marking_step() method also aborts: 3972 3973 (1) When the marking phase has been aborted (after a Full GC). 3974 3975 (2) When a global overflow (either on the global stack or the 3976 region stack) has been triggered. Before the task aborts, it 3977 will actually sync up with the other tasks to ensure that all 3978 the marking data structures (local queues, stacks, fingers etc.) 3979 are re-initialised so that when do_marking_step() completes, 3980 the marking phase can immediately restart. 3981 3982 (3) When enough completed SATB buffers are available. The 3983 do_marking_step() method only tries to drain SATB buffers right 3984 at the beginning. So, if enough buffers are available, the 3985 marking step aborts and the SATB buffers are processed at 3986 the beginning of the next invocation. 3987 3988 (4) To yield. when we have to yield then we abort and yield 3989 right at the end of do_marking_step(). This saves us from a lot 3990 of hassle as, by yielding we might allow a Full GC. If this 3991 happens then objects will be compacted underneath our feet, the 3992 heap might shrink, etc. We save checking for this by just 3993 aborting and doing the yield right at the end. 3994 3995 From the above it follows that the do_marking_step() method should 3996 be called in a loop (or, otherwise, regularly) until it completes. 3997 3998 If a marking step completes without its has_aborted() flag being 3999 true, it means it has completed the current marking phase (and 4000 also all other marking tasks have done so and have all synced up). 4001 4002 A method called regular_clock_call() is invoked "regularly" (in 4003 sub ms intervals) throughout marking. It is this clock method that 4004 checks all the abort conditions which were mentioned above and 4005 decides when the task should abort. A work-based scheme is used to 4006 trigger this clock method: when the number of object words the 4007 marking phase has scanned or the number of references the marking 4008 phase has visited reach a given limit. Additional invocations to 4009 the method clock have been planted in a few other strategic places 4010 too. The initial reason for the clock method was to avoid calling 4011 vtime too regularly, as it is quite expensive. So, once it was in 4012 place, it was natural to piggy-back all the other conditions on it 4013 too and not constantly check them throughout the code. 4014 4015 *****************************************************************************/ 4016 4017 void CMTask::do_marking_step(double time_target_ms, 4018 bool do_stealing, 4019 bool do_termination) { 4020 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4021 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4022 4023 assert(concurrent() || _cm->region_stack_empty(), 4024 "the region stack should have been cleared before remark"); 4025 assert(concurrent() || !_cm->has_aborted_regions(), 4026 "aborted regions should have been cleared before remark"); 4027 assert(_region_finger == NULL, 4028 "this should be non-null only when a region is being scanned"); 4029 4030 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4031 assert(_task_queues != NULL, "invariant"); 4032 assert(_task_queue != NULL, "invariant"); 4033 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4034 4035 assert(!_claimed, 4036 "only one thread should claim this task at any one time"); 4037 4038 // OK, this doesn't safeguard again all possible scenarios, as it is 4039 // possible for two threads to set the _claimed flag at the same 4040 // time. But it is only for debugging purposes anyway and it will 4041 // catch most problems. 4042 _claimed = true; 4043 4044 _start_time_ms = os::elapsedVTime() * 1000.0; 4045 statsOnly( _interval_start_time_ms = _start_time_ms ); 4046 4047 double diff_prediction_ms = 4048 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4049 _time_target_ms = time_target_ms - diff_prediction_ms; 4050 4051 // set up the variables that are used in the work-based scheme to 4052 // call the regular clock method 4053 _words_scanned = 0; 4054 _refs_reached = 0; 4055 recalculate_limits(); 4056 4057 // clear all flags 4058 clear_has_aborted(); 4059 _has_timed_out = false; 4060 _draining_satb_buffers = false; 4061 4062 ++_calls; 4063 4064 if (_cm->verbose_low()) 4065 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4066 "target = %1.2lfms >>>>>>>>>>", 4067 _task_id, _calls, _time_target_ms); 4068 4069 // Set up the bitmap and oop closures. Anything that uses them is 4070 // eventually called from this method, so it is OK to allocate these 4071 // statically. 4072 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4073 CMOopClosure oop_closure(_g1h, _cm, this); 4074 set_oop_closure(&oop_closure); 4075 4076 if (_cm->has_overflown()) { 4077 // This can happen if the region stack or the mark stack overflows 4078 // during a GC pause and this task, after a yield point, 4079 // restarts. We have to abort as we need to get into the overflow 4080 // protocol which happens right at the end of this task. 4081 set_has_aborted(); 4082 } 4083 4084 // First drain any available SATB buffers. After this, we will not 4085 // look at SATB buffers before the next invocation of this method. 4086 // If enough completed SATB buffers are queued up, the regular clock 4087 // will abort this task so that it restarts. 4088 drain_satb_buffers(); 4089 // ...then partially drain the local queue and the global stack 4090 drain_local_queue(true); 4091 drain_global_stack(true); 4092 4093 // Then totally drain the region stack. We will not look at 4094 // it again before the next invocation of this method. Entries on 4095 // the region stack are only added during evacuation pauses, for 4096 // which we have to yield. When we do, we abort the task anyway so 4097 // it will look at the region stack again when it restarts. 4098 bitmap_closure.set_scanning_heap_region(false); 4099 drain_region_stack(&bitmap_closure); 4100 // ...then partially drain the local queue and the global stack 4101 drain_local_queue(true); 4102 drain_global_stack(true); 4103 4104 do { 4105 if (!has_aborted() && _curr_region != NULL) { 4106 // This means that we're already holding on to a region. 4107 assert(_finger != NULL, "if region is not NULL, then the finger " 4108 "should not be NULL either"); 4109 4110 // We might have restarted this task after an evacuation pause 4111 // which might have evacuated the region we're holding on to 4112 // underneath our feet. Let's read its limit again to make sure 4113 // that we do not iterate over a region of the heap that 4114 // contains garbage (update_region_limit() will also move 4115 // _finger to the start of the region if it is found empty). 4116 update_region_limit(); 4117 // We will start from _finger not from the start of the region, 4118 // as we might be restarting this task after aborting half-way 4119 // through scanning this region. In this case, _finger points to 4120 // the address where we last found a marked object. If this is a 4121 // fresh region, _finger points to start(). 4122 MemRegion mr = MemRegion(_finger, _region_limit); 4123 4124 if (_cm->verbose_low()) 4125 gclog_or_tty->print_cr("[%d] we're scanning part " 4126 "["PTR_FORMAT", "PTR_FORMAT") " 4127 "of region "PTR_FORMAT, 4128 _task_id, _finger, _region_limit, _curr_region); 4129 4130 // Let's iterate over the bitmap of the part of the 4131 // region that is left. 4132 bitmap_closure.set_scanning_heap_region(true); 4133 if (mr.is_empty() || 4134 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4135 // We successfully completed iterating over the region. Now, 4136 // let's give up the region. 4137 giveup_current_region(); 4138 regular_clock_call(); 4139 } else { 4140 assert(has_aborted(), "currently the only way to do so"); 4141 // The only way to abort the bitmap iteration is to return 4142 // false from the do_bit() method. However, inside the 4143 // do_bit() method we move the _finger to point to the 4144 // object currently being looked at. So, if we bail out, we 4145 // have definitely set _finger to something non-null. 4146 assert(_finger != NULL, "invariant"); 4147 4148 // Region iteration was actually aborted. So now _finger 4149 // points to the address of the object we last scanned. If we 4150 // leave it there, when we restart this task, we will rescan 4151 // the object. It is easy to avoid this. We move the finger by 4152 // enough to point to the next possible object header (the 4153 // bitmap knows by how much we need to move it as it knows its 4154 // granularity). 4155 assert(_finger < _region_limit, "invariant"); 4156 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4157 // Check if bitmap iteration was aborted while scanning the last object 4158 if (new_finger >= _region_limit) { 4159 giveup_current_region(); 4160 } else { 4161 move_finger_to(new_finger); 4162 } 4163 } 4164 } 4165 // At this point we have either completed iterating over the 4166 // region we were holding on to, or we have aborted. 4167 4168 // We then partially drain the local queue and the global stack. 4169 // (Do we really need this?) 4170 drain_local_queue(true); 4171 drain_global_stack(true); 4172 4173 // Read the note on the claim_region() method on why it might 4174 // return NULL with potentially more regions available for 4175 // claiming and why we have to check out_of_regions() to determine 4176 // whether we're done or not. 4177 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4178 // We are going to try to claim a new region. We should have 4179 // given up on the previous one. 4180 // Separated the asserts so that we know which one fires. 4181 assert(_curr_region == NULL, "invariant"); 4182 assert(_finger == NULL, "invariant"); 4183 assert(_region_limit == NULL, "invariant"); 4184 if (_cm->verbose_low()) 4185 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4186 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4187 if (claimed_region != NULL) { 4188 // Yes, we managed to claim one 4189 statsOnly( ++_regions_claimed ); 4190 4191 if (_cm->verbose_low()) 4192 gclog_or_tty->print_cr("[%d] we successfully claimed " 4193 "region "PTR_FORMAT, 4194 _task_id, claimed_region); 4195 4196 setup_for_region(claimed_region); 4197 assert(_curr_region == claimed_region, "invariant"); 4198 } 4199 // It is important to call the regular clock here. It might take 4200 // a while to claim a region if, for example, we hit a large 4201 // block of empty regions. So we need to call the regular clock 4202 // method once round the loop to make sure it's called 4203 // frequently enough. 4204 regular_clock_call(); 4205 } 4206 4207 if (!has_aborted() && _curr_region == NULL) { 4208 assert(_cm->out_of_regions(), 4209 "at this point we should be out of regions"); 4210 } 4211 } while ( _curr_region != NULL && !has_aborted()); 4212 4213 if (!has_aborted()) { 4214 // We cannot check whether the global stack is empty, since other 4215 // tasks might be pushing objects to it concurrently. We also cannot 4216 // check if the region stack is empty because if a thread is aborting 4217 // it can push a partially done region back. 4218 assert(_cm->out_of_regions(), 4219 "at this point we should be out of regions"); 4220 4221 if (_cm->verbose_low()) 4222 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4223 4224 // Try to reduce the number of available SATB buffers so that 4225 // remark has less work to do. 4226 drain_satb_buffers(); 4227 } 4228 4229 // Since we've done everything else, we can now totally drain the 4230 // local queue and global stack. 4231 drain_local_queue(false); 4232 drain_global_stack(false); 4233 4234 // Attempt at work stealing from other task's queues. 4235 if (do_stealing && !has_aborted()) { 4236 // We have not aborted. This means that we have finished all that 4237 // we could. Let's try to do some stealing... 4238 4239 // We cannot check whether the global stack is empty, since other 4240 // tasks might be pushing objects to it concurrently. We also cannot 4241 // check if the region stack is empty because if a thread is aborting 4242 // it can push a partially done region back. 4243 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4244 "only way to reach here"); 4245 4246 if (_cm->verbose_low()) 4247 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4248 4249 while (!has_aborted()) { 4250 oop obj; 4251 statsOnly( ++_steal_attempts ); 4252 4253 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4254 if (_cm->verbose_medium()) 4255 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4256 _task_id, (void*) obj); 4257 4258 statsOnly( ++_steals ); 4259 4260 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4261 "any stolen object should be marked"); 4262 scan_object(obj); 4263 4264 // And since we're towards the end, let's totally drain the 4265 // local queue and global stack. 4266 drain_local_queue(false); 4267 drain_global_stack(false); 4268 } else { 4269 break; 4270 } 4271 } 4272 } 4273 4274 // We still haven't aborted. Now, let's try to get into the 4275 // termination protocol. 4276 if (do_termination && !has_aborted()) { 4277 // We cannot check whether the global stack is empty, since other 4278 // tasks might be concurrently pushing objects on it. We also cannot 4279 // check if the region stack is empty because if a thread is aborting 4280 // it can push a partially done region back. 4281 // Separated the asserts so that we know which one fires. 4282 assert(_cm->out_of_regions(), "only way to reach here"); 4283 assert(_task_queue->size() == 0, "only way to reach here"); 4284 4285 if (_cm->verbose_low()) 4286 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4287 4288 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4289 // The CMTask class also extends the TerminatorTerminator class, 4290 // hence its should_exit_termination() method will also decide 4291 // whether to exit the termination protocol or not. 4292 bool finished = _cm->terminator()->offer_termination(this); 4293 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4294 _termination_time_ms += 4295 termination_end_time_ms - _termination_start_time_ms; 4296 4297 if (finished) { 4298 // We're all done. 4299 4300 if (_task_id == 0) { 4301 // let's allow task 0 to do this 4302 if (concurrent()) { 4303 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4304 // we need to set this to false before the next 4305 // safepoint. This way we ensure that the marking phase 4306 // doesn't observe any more heap expansions. 4307 _cm->clear_concurrent_marking_in_progress(); 4308 } 4309 } 4310 4311 // We can now guarantee that the global stack is empty, since 4312 // all other tasks have finished. We separated the guarantees so 4313 // that, if a condition is false, we can immediately find out 4314 // which one. 4315 guarantee(_cm->out_of_regions(), "only way to reach here"); 4316 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4317 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4318 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4319 guarantee(_task_queue->size() == 0, "only way to reach here"); 4320 guarantee(!_cm->has_overflown(), "only way to reach here"); 4321 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4322 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4323 4324 if (_cm->verbose_low()) 4325 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4326 } else { 4327 // Apparently there's more work to do. Let's abort this task. It 4328 // will restart it and we can hopefully find more things to do. 4329 4330 if (_cm->verbose_low()) 4331 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); 4332 4333 set_has_aborted(); 4334 statsOnly( ++_aborted_termination ); 4335 } 4336 } 4337 4338 // Mainly for debugging purposes to make sure that a pointer to the 4339 // closure which was statically allocated in this frame doesn't 4340 // escape it by accident. 4341 set_oop_closure(NULL); 4342 double end_time_ms = os::elapsedVTime() * 1000.0; 4343 double elapsed_time_ms = end_time_ms - _start_time_ms; 4344 // Update the step history. 4345 _step_times_ms.add(elapsed_time_ms); 4346 4347 if (has_aborted()) { 4348 // The task was aborted for some reason. 4349 4350 statsOnly( ++_aborted ); 4351 4352 if (_has_timed_out) { 4353 double diff_ms = elapsed_time_ms - _time_target_ms; 4354 // Keep statistics of how well we did with respect to hitting 4355 // our target only if we actually timed out (if we aborted for 4356 // other reasons, then the results might get skewed). 4357 _marking_step_diffs_ms.add(diff_ms); 4358 } 4359 4360 if (_cm->has_overflown()) { 4361 // This is the interesting one. We aborted because a global 4362 // overflow was raised. This means we have to restart the 4363 // marking phase and start iterating over regions. However, in 4364 // order to do this we have to make sure that all tasks stop 4365 // what they are doing and re-initialise in a safe manner. We 4366 // will achieve this with the use of two barrier sync points. 4367 4368 if (_cm->verbose_low()) 4369 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4370 4371 _cm->enter_first_sync_barrier(_task_id); 4372 // When we exit this sync barrier we know that all tasks have 4373 // stopped doing marking work. So, it's now safe to 4374 // re-initialise our data structures. At the end of this method, 4375 // task 0 will clear the global data structures. 4376 4377 statsOnly( ++_aborted_overflow ); 4378 4379 // We clear the local state of this task... 4380 clear_region_fields(); 4381 4382 // ...and enter the second barrier. 4383 _cm->enter_second_sync_barrier(_task_id); 4384 // At this point everything has bee re-initialised and we're 4385 // ready to restart. 4386 } 4387 4388 if (_cm->verbose_low()) { 4389 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4390 "elapsed = %1.2lfms <<<<<<<<<<", 4391 _task_id, _time_target_ms, elapsed_time_ms); 4392 if (_cm->has_aborted()) 4393 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4394 _task_id); 4395 } 4396 } else { 4397 if (_cm->verbose_low()) 4398 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4399 "elapsed = %1.2lfms <<<<<<<<<<", 4400 _task_id, _time_target_ms, elapsed_time_ms); 4401 } 4402 4403 _claimed = false; 4404 } 4405 4406 CMTask::CMTask(int task_id, 4407 ConcurrentMark* cm, 4408 CMTaskQueue* task_queue, 4409 CMTaskQueueSet* task_queues) 4410 : _g1h(G1CollectedHeap::heap()), 4411 _task_id(task_id), _cm(cm), 4412 _claimed(false), 4413 _nextMarkBitMap(NULL), _hash_seed(17), 4414 _task_queue(task_queue), 4415 _task_queues(task_queues), 4416 _oop_closure(NULL), 4417 _aborted_region(MemRegion()) { 4418 guarantee(task_queue != NULL, "invariant"); 4419 guarantee(task_queues != NULL, "invariant"); 4420 4421 statsOnly( _clock_due_to_scanning = 0; 4422 _clock_due_to_marking = 0 ); 4423 4424 _marking_step_diffs_ms.add(0.5); 4425 }