1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1RemSet.hpp" 32 #include "gc_implementation/g1/heapRegionRemSet.hpp" 33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 34 #include "gc_implementation/shared/vmGCOperations.hpp" 35 #include "memory/genOopClosures.inline.hpp" 36 #include "memory/referencePolicy.hpp" 37 #include "memory/resourceArea.hpp" 38 #include "oops/oop.inline.hpp" 39 #include "runtime/handles.inline.hpp" 40 #include "runtime/java.hpp" 41 42 // 43 // CMS Bit Map Wrapper 44 45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 46 _bm((uintptr_t*)NULL,0), 47 _shifter(shifter) { 48 _bmStartWord = (HeapWord*)(rs.base()); 49 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 50 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 51 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 52 53 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 54 // For now we'll just commit all of the bit map up fromt. 55 // Later on we'll try to be more parsimonious with swap. 56 guarantee(_virtual_space.initialize(brs, brs.size()), 57 "couldn't reseve backing store for CMS bit map"); 58 assert(_virtual_space.committed_size() == brs.size(), 59 "didn't reserve backing store for all of CMS bit map?"); 60 _bm.set_map((uintptr_t*)_virtual_space.low()); 61 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 62 _bmWordSize, "inconsistency in bit map sizing"); 63 _bm.set_size(_bmWordSize >> _shifter); 64 } 65 66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 67 HeapWord* limit) const { 68 // First we must round addr *up* to a possible object boundary. 69 addr = (HeapWord*)align_size_up((intptr_t)addr, 70 HeapWordSize << _shifter); 71 size_t addrOffset = heapWordToOffset(addr); 72 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 73 size_t limitOffset = heapWordToOffset(limit); 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 76 assert(nextAddr >= addr, "get_next_one postcondition"); 77 assert(nextAddr == limit || isMarked(nextAddr), 78 "get_next_one postcondition"); 79 return nextAddr; 80 } 81 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 83 HeapWord* limit) const { 84 size_t addrOffset = heapWordToOffset(addr); 85 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 86 size_t limitOffset = heapWordToOffset(limit); 87 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 88 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 89 assert(nextAddr >= addr, "get_next_one postcondition"); 90 assert(nextAddr == limit || !isMarked(nextAddr), 91 "get_next_one postcondition"); 92 return nextAddr; 93 } 94 95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 96 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 97 return (int) (diff >> _shifter); 98 } 99 100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 101 HeapWord* left = MAX2(_bmStartWord, mr.start()); 102 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 103 if (right > left) { 104 // Right-open interval [leftOffset, rightOffset). 105 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 106 } else { 107 return true; 108 } 109 } 110 111 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 112 size_t from_start_index, 113 HeapWord* to_start_word, 114 size_t word_num) { 115 _bm.mostly_disjoint_range_union(from_bitmap, 116 from_start_index, 117 heapWordToOffset(to_start_word), 118 word_num); 119 } 120 121 #ifndef PRODUCT 122 bool CMBitMapRO::covers(ReservedSpace rs) const { 123 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 124 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 125 "size inconsistency"); 126 return _bmStartWord == (HeapWord*)(rs.base()) && 127 _bmWordSize == rs.size()>>LogHeapWordSize; 128 } 129 #endif 130 131 void CMBitMap::clearAll() { 132 _bm.clear(); 133 return; 134 } 135 136 void CMBitMap::markRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 140 ((HeapWord *) mr.end())), 141 "markRange memory region end is not card aligned"); 142 // convert address range into offset range 143 _bm.at_put_range(heapWordToOffset(mr.start()), 144 heapWordToOffset(mr.end()), true); 145 } 146 147 void CMBitMap::clearRange(MemRegion mr) { 148 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 149 assert(!mr.is_empty(), "unexpected empty region"); 150 // convert address range into offset range 151 _bm.at_put_range(heapWordToOffset(mr.start()), 152 heapWordToOffset(mr.end()), false); 153 } 154 155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 156 HeapWord* end_addr) { 157 HeapWord* start = getNextMarkedWordAddress(addr); 158 start = MIN2(start, end_addr); 159 HeapWord* end = getNextUnmarkedWordAddress(start); 160 end = MIN2(end, end_addr); 161 assert(start <= end, "Consistency check"); 162 MemRegion mr(start, end); 163 if (!mr.is_empty()) { 164 clearRange(mr); 165 } 166 return mr; 167 } 168 169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 170 _base(NULL), _cm(cm) 171 #ifdef ASSERT 172 , _drain_in_progress(false) 173 , _drain_in_progress_yields(false) 174 #endif 175 {} 176 177 void CMMarkStack::allocate(size_t size) { 178 _base = NEW_C_HEAP_ARRAY(oop, size); 179 if (_base == NULL) 180 vm_exit_during_initialization("Failed to allocate " 181 "CM region mark stack"); 182 _index = 0; 183 // QQQQ cast ... 184 _capacity = (jint) size; 185 _oops_do_bound = -1; 186 NOT_PRODUCT(_max_depth = 0); 187 } 188 189 CMMarkStack::~CMMarkStack() { 190 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 191 } 192 193 void CMMarkStack::par_push(oop ptr) { 194 while (true) { 195 if (isFull()) { 196 _overflow = true; 197 return; 198 } 199 // Otherwise... 200 jint index = _index; 201 jint next_index = index+1; 202 jint res = Atomic::cmpxchg(next_index, &_index, index); 203 if (res == index) { 204 _base[index] = ptr; 205 // Note that we don't maintain this atomically. We could, but it 206 // doesn't seem necessary. 207 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 208 return; 209 } 210 // Otherwise, we need to try again. 211 } 212 } 213 214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 215 while (true) { 216 if (isFull()) { 217 _overflow = true; 218 return; 219 } 220 // Otherwise... 221 jint index = _index; 222 jint next_index = index + n; 223 if (next_index > _capacity) { 224 _overflow = true; 225 return; 226 } 227 jint res = Atomic::cmpxchg(next_index, &_index, index); 228 if (res == index) { 229 for (int i = 0; i < n; i++) { 230 int ind = index + i; 231 assert(ind < _capacity, "By overflow test above."); 232 _base[ind] = ptr_arr[i]; 233 } 234 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 235 return; 236 } 237 // Otherwise, we need to try again. 238 } 239 } 240 241 242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 243 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 244 jint start = _index; 245 jint next_index = start + n; 246 if (next_index > _capacity) { 247 _overflow = true; 248 return; 249 } 250 // Otherwise. 251 _index = next_index; 252 for (int i = 0; i < n; i++) { 253 int ind = start + i; 254 assert(ind < _capacity, "By overflow test above."); 255 _base[ind] = ptr_arr[i]; 256 } 257 } 258 259 260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 261 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 262 jint index = _index; 263 if (index == 0) { 264 *n = 0; 265 return false; 266 } else { 267 int k = MIN2(max, index); 268 jint new_ind = index - k; 269 for (int j = 0; j < k; j++) { 270 ptr_arr[j] = _base[new_ind + j]; 271 } 272 _index = new_ind; 273 *n = k; 274 return true; 275 } 276 } 277 278 279 CMRegionStack::CMRegionStack() : _base(NULL) {} 280 281 void CMRegionStack::allocate(size_t size) { 282 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 283 if (_base == NULL) 284 vm_exit_during_initialization("Failed to allocate " 285 "CM region mark stack"); 286 _index = 0; 287 // QQQQ cast ... 288 _capacity = (jint) size; 289 } 290 291 CMRegionStack::~CMRegionStack() { 292 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 293 } 294 295 void CMRegionStack::push_lock_free(MemRegion mr) { 296 assert(mr.word_size() > 0, "Precondition"); 297 while (true) { 298 jint index = _index; 299 300 if (index >= _capacity) { 301 _overflow = true; 302 return; 303 } 304 // Otherwise... 305 jint next_index = index+1; 306 jint res = Atomic::cmpxchg(next_index, &_index, index); 307 if (res == index) { 308 _base[index] = mr; 309 return; 310 } 311 // Otherwise, we need to try again. 312 } 313 } 314 315 // Lock-free pop of the region stack. Called during the concurrent 316 // marking / remark phases. Should only be called in tandem with 317 // other lock-free pops. 318 MemRegion CMRegionStack::pop_lock_free() { 319 while (true) { 320 jint index = _index; 321 322 if (index == 0) { 323 return MemRegion(); 324 } 325 // Otherwise... 326 jint next_index = index-1; 327 jint res = Atomic::cmpxchg(next_index, &_index, index); 328 if (res == index) { 329 MemRegion mr = _base[next_index]; 330 if (mr.start() != NULL) { 331 assert(mr.end() != NULL, "invariant"); 332 assert(mr.word_size() > 0, "invariant"); 333 return mr; 334 } else { 335 // that entry was invalidated... let's skip it 336 assert(mr.end() == NULL, "invariant"); 337 } 338 } 339 // Otherwise, we need to try again. 340 } 341 } 342 343 #if 0 344 // The routines that manipulate the region stack with a lock are 345 // not currently used. They should be retained, however, as a 346 // diagnostic aid. 347 348 void CMRegionStack::push_with_lock(MemRegion mr) { 349 assert(mr.word_size() > 0, "Precondition"); 350 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 351 352 if (isFull()) { 353 _overflow = true; 354 return; 355 } 356 357 _base[_index] = mr; 358 _index += 1; 359 } 360 361 MemRegion CMRegionStack::pop_with_lock() { 362 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 363 364 while (true) { 365 if (_index == 0) { 366 return MemRegion(); 367 } 368 _index -= 1; 369 370 MemRegion mr = _base[_index]; 371 if (mr.start() != NULL) { 372 assert(mr.end() != NULL, "invariant"); 373 assert(mr.word_size() > 0, "invariant"); 374 return mr; 375 } else { 376 // that entry was invalidated... let's skip it 377 assert(mr.end() == NULL, "invariant"); 378 } 379 } 380 } 381 #endif 382 383 bool CMRegionStack::invalidate_entries_into_cset() { 384 bool result = false; 385 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 386 for (int i = 0; i < _oops_do_bound; ++i) { 387 MemRegion mr = _base[i]; 388 if (mr.start() != NULL) { 389 assert(mr.end() != NULL, "invariant"); 390 assert(mr.word_size() > 0, "invariant"); 391 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 392 assert(hr != NULL, "invariant"); 393 if (hr->in_collection_set()) { 394 // The region points into the collection set 395 _base[i] = MemRegion(); 396 result = true; 397 } 398 } else { 399 // that entry was invalidated... let's skip it 400 assert(mr.end() == NULL, "invariant"); 401 } 402 } 403 return result; 404 } 405 406 template<class OopClosureClass> 407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 408 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 409 || SafepointSynchronize::is_at_safepoint(), 410 "Drain recursion must be yield-safe."); 411 bool res = true; 412 debug_only(_drain_in_progress = true); 413 debug_only(_drain_in_progress_yields = yield_after); 414 while (!isEmpty()) { 415 oop newOop = pop(); 416 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 417 assert(newOop->is_oop(), "Expected an oop"); 418 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 419 "only grey objects on this stack"); 420 // iterate over the oops in this oop, marking and pushing 421 // the ones in CMS generation. 422 newOop->oop_iterate(cl); 423 if (yield_after && _cm->do_yield_check()) { 424 res = false; break; 425 } 426 } 427 debug_only(_drain_in_progress = false); 428 return res; 429 } 430 431 void CMMarkStack::oops_do(OopClosure* f) { 432 if (_index == 0) return; 433 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 434 "Bound must be set."); 435 for (int i = 0; i < _oops_do_bound; i++) { 436 f->do_oop(&_base[i]); 437 } 438 _oops_do_bound = -1; 439 } 440 441 bool ConcurrentMark::not_yet_marked(oop obj) const { 442 return (_g1h->is_obj_ill(obj) 443 || (_g1h->is_in_permanent(obj) 444 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 445 } 446 447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 449 #endif // _MSC_VER 450 451 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 452 int max_regions) : 453 _markBitMap1(rs, MinObjAlignment - 1), 454 _markBitMap2(rs, MinObjAlignment - 1), 455 456 _parallel_marking_threads(0), 457 _sleep_factor(0.0), 458 _marking_task_overhead(1.0), 459 _cleanup_sleep_factor(0.0), 460 _cleanup_task_overhead(1.0), 461 _cleanup_list("Cleanup List"), 462 _region_bm(max_regions, false /* in_resource_area*/), 463 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 464 CardTableModRefBS::card_shift, 465 false /* in_resource_area*/), 466 _prevMarkBitMap(&_markBitMap1), 467 _nextMarkBitMap(&_markBitMap2), 468 _at_least_one_mark_complete(false), 469 470 _markStack(this), 471 _regionStack(), 472 // _finger set in set_non_marking_state 473 474 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 475 // _active_tasks set in set_non_marking_state 476 // _tasks set inside the constructor 477 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 478 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 479 480 _has_overflown(false), 481 _concurrent(false), 482 _has_aborted(false), 483 _restart_for_overflow(false), 484 _concurrent_marking_in_progress(false), 485 _should_gray_objects(false), 486 487 // _verbose_level set below 488 489 _init_times(), 490 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 491 _cleanup_times(), 492 _total_counting_time(0.0), 493 _total_rs_scrub_time(0.0), 494 495 _parallel_workers(NULL) 496 { 497 CMVerboseLevel verbose_level = 498 (CMVerboseLevel) G1MarkingVerboseLevel; 499 if (verbose_level < no_verbose) 500 verbose_level = no_verbose; 501 if (verbose_level > high_verbose) 502 verbose_level = high_verbose; 503 _verbose_level = verbose_level; 504 505 if (verbose_low()) 506 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 507 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 508 509 _markStack.allocate(MarkStackSize); 510 _regionStack.allocate(G1MarkRegionStackSize); 511 512 // Create & start a ConcurrentMark thread. 513 _cmThread = new ConcurrentMarkThread(this); 514 assert(cmThread() != NULL, "CM Thread should have been created"); 515 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 516 517 _g1h = G1CollectedHeap::heap(); 518 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 519 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 520 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 521 522 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 523 satb_qs.set_buffer_size(G1SATBBufferSize); 524 525 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 526 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 527 528 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 529 _active_tasks = _max_task_num; 530 for (int i = 0; i < (int) _max_task_num; ++i) { 531 CMTaskQueue* task_queue = new CMTaskQueue(); 532 task_queue->initialize(); 533 _task_queues->register_queue(i, task_queue); 534 535 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 536 _accum_task_vtime[i] = 0.0; 537 } 538 539 if (ConcGCThreads > ParallelGCThreads) { 540 vm_exit_during_initialization("Can't have more ConcGCThreads " 541 "than ParallelGCThreads."); 542 } 543 if (ParallelGCThreads == 0) { 544 // if we are not running with any parallel GC threads we will not 545 // spawn any marking threads either 546 _parallel_marking_threads = 0; 547 _sleep_factor = 0.0; 548 _marking_task_overhead = 1.0; 549 } else { 550 if (ConcGCThreads > 0) { 551 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 552 // if both are set 553 554 _parallel_marking_threads = ConcGCThreads; 555 _sleep_factor = 0.0; 556 _marking_task_overhead = 1.0; 557 } else if (G1MarkingOverheadPercent > 0) { 558 // we will calculate the number of parallel marking threads 559 // based on a target overhead with respect to the soft real-time 560 // goal 561 562 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 563 double overall_cm_overhead = 564 (double) MaxGCPauseMillis * marking_overhead / 565 (double) GCPauseIntervalMillis; 566 double cpu_ratio = 1.0 / (double) os::processor_count(); 567 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 568 double marking_task_overhead = 569 overall_cm_overhead / marking_thread_num * 570 (double) os::processor_count(); 571 double sleep_factor = 572 (1.0 - marking_task_overhead) / marking_task_overhead; 573 574 _parallel_marking_threads = (size_t) marking_thread_num; 575 _sleep_factor = sleep_factor; 576 _marking_task_overhead = marking_task_overhead; 577 } else { 578 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 579 _sleep_factor = 0.0; 580 _marking_task_overhead = 1.0; 581 } 582 583 if (parallel_marking_threads() > 1) 584 _cleanup_task_overhead = 1.0; 585 else 586 _cleanup_task_overhead = marking_task_overhead(); 587 _cleanup_sleep_factor = 588 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 589 590 #if 0 591 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 592 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 593 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 594 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 595 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 596 #endif 597 598 guarantee(parallel_marking_threads() > 0, "peace of mind"); 599 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 600 (int) _parallel_marking_threads, false, true); 601 if (_parallel_workers == NULL) { 602 vm_exit_during_initialization("Failed necessary allocation."); 603 } else { 604 _parallel_workers->initialize_workers(); 605 } 606 } 607 608 // so that the call below can read a sensible value 609 _heap_start = (HeapWord*) rs.base(); 610 set_non_marking_state(); 611 } 612 613 void ConcurrentMark::update_g1_committed(bool force) { 614 // If concurrent marking is not in progress, then we do not need to 615 // update _heap_end. This has a subtle and important 616 // side-effect. Imagine that two evacuation pauses happen between 617 // marking completion and remark. The first one can grow the 618 // heap (hence now the finger is below the heap end). Then, the 619 // second one could unnecessarily push regions on the region 620 // stack. This causes the invariant that the region stack is empty 621 // at the beginning of remark to be false. By ensuring that we do 622 // not observe heap expansions after marking is complete, then we do 623 // not have this problem. 624 if (!concurrent_marking_in_progress() && !force) 625 return; 626 627 MemRegion committed = _g1h->g1_committed(); 628 assert(committed.start() == _heap_start, "start shouldn't change"); 629 HeapWord* new_end = committed.end(); 630 if (new_end > _heap_end) { 631 // The heap has been expanded. 632 633 _heap_end = new_end; 634 } 635 // Notice that the heap can also shrink. However, this only happens 636 // during a Full GC (at least currently) and the entire marking 637 // phase will bail out and the task will not be restarted. So, let's 638 // do nothing. 639 } 640 641 void ConcurrentMark::reset() { 642 // Starting values for these two. This should be called in a STW 643 // phase. CM will be notified of any future g1_committed expansions 644 // will be at the end of evacuation pauses, when tasks are 645 // inactive. 646 MemRegion committed = _g1h->g1_committed(); 647 _heap_start = committed.start(); 648 _heap_end = committed.end(); 649 650 // Separated the asserts so that we know which one fires. 651 assert(_heap_start != NULL, "heap bounds should look ok"); 652 assert(_heap_end != NULL, "heap bounds should look ok"); 653 assert(_heap_start < _heap_end, "heap bounds should look ok"); 654 655 // reset all the marking data structures and any necessary flags 656 clear_marking_state(); 657 658 if (verbose_low()) 659 gclog_or_tty->print_cr("[global] resetting"); 660 661 // We do reset all of them, since different phases will use 662 // different number of active threads. So, it's easiest to have all 663 // of them ready. 664 for (int i = 0; i < (int) _max_task_num; ++i) { 665 _tasks[i]->reset(_nextMarkBitMap); 666 } 667 668 // we need this to make sure that the flag is on during the evac 669 // pause with initial mark piggy-backed 670 set_concurrent_marking_in_progress(); 671 } 672 673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 674 assert(active_tasks <= _max_task_num, "we should not have more"); 675 676 _active_tasks = active_tasks; 677 // Need to update the three data structures below according to the 678 // number of active threads for this phase. 679 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 680 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 681 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 682 683 _concurrent = concurrent; 684 // We propagate this to all tasks, not just the active ones. 685 for (int i = 0; i < (int) _max_task_num; ++i) 686 _tasks[i]->set_concurrent(concurrent); 687 688 if (concurrent) { 689 set_concurrent_marking_in_progress(); 690 } else { 691 // We currently assume that the concurrent flag has been set to 692 // false before we start remark. At this point we should also be 693 // in a STW phase. 694 assert(!concurrent_marking_in_progress(), "invariant"); 695 assert(_finger == _heap_end, "only way to get here"); 696 update_g1_committed(true); 697 } 698 } 699 700 void ConcurrentMark::set_non_marking_state() { 701 // We set the global marking state to some default values when we're 702 // not doing marking. 703 clear_marking_state(); 704 _active_tasks = 0; 705 clear_concurrent_marking_in_progress(); 706 } 707 708 ConcurrentMark::~ConcurrentMark() { 709 for (int i = 0; i < (int) _max_task_num; ++i) { 710 delete _task_queues->queue(i); 711 delete _tasks[i]; 712 } 713 delete _task_queues; 714 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 715 } 716 717 // This closure is used to mark refs into the g1 generation 718 // from external roots in the CMS bit map. 719 // Called at the first checkpoint. 720 // 721 722 void ConcurrentMark::clearNextBitmap() { 723 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 724 G1CollectorPolicy* g1p = g1h->g1_policy(); 725 726 // Make sure that the concurrent mark thread looks to still be in 727 // the current cycle. 728 guarantee(cmThread()->during_cycle(), "invariant"); 729 730 // We are finishing up the current cycle by clearing the next 731 // marking bitmap and getting it ready for the next cycle. During 732 // this time no other cycle can start. So, let's make sure that this 733 // is the case. 734 guarantee(!g1h->mark_in_progress(), "invariant"); 735 736 // clear the mark bitmap (no grey objects to start with). 737 // We need to do this in chunks and offer to yield in between 738 // each chunk. 739 HeapWord* start = _nextMarkBitMap->startWord(); 740 HeapWord* end = _nextMarkBitMap->endWord(); 741 HeapWord* cur = start; 742 size_t chunkSize = M; 743 while (cur < end) { 744 HeapWord* next = cur + chunkSize; 745 if (next > end) 746 next = end; 747 MemRegion mr(cur,next); 748 _nextMarkBitMap->clearRange(mr); 749 cur = next; 750 do_yield_check(); 751 752 // Repeat the asserts from above. We'll do them as asserts here to 753 // minimize their overhead on the product. However, we'll have 754 // them as guarantees at the beginning / end of the bitmap 755 // clearing to get some checking in the product. 756 assert(cmThread()->during_cycle(), "invariant"); 757 assert(!g1h->mark_in_progress(), "invariant"); 758 } 759 760 // Repeat the asserts from above. 761 guarantee(cmThread()->during_cycle(), "invariant"); 762 guarantee(!g1h->mark_in_progress(), "invariant"); 763 } 764 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 766 public: 767 bool doHeapRegion(HeapRegion* r) { 768 if (!r->continuesHumongous()) { 769 r->note_start_of_marking(true); 770 } 771 return false; 772 } 773 }; 774 775 void ConcurrentMark::checkpointRootsInitialPre() { 776 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 777 G1CollectorPolicy* g1p = g1h->g1_policy(); 778 779 _has_aborted = false; 780 781 #ifndef PRODUCT 782 if (G1PrintReachableAtInitialMark) { 783 print_reachable("at-cycle-start", 784 true /* use_prev_marking */, true /* all */); 785 } 786 #endif 787 788 // Initialise marking structures. This has to be done in a STW phase. 789 reset(); 790 } 791 792 class CMMarkRootsClosure: public OopsInGenClosure { 793 private: 794 ConcurrentMark* _cm; 795 G1CollectedHeap* _g1h; 796 bool _do_barrier; 797 798 public: 799 CMMarkRootsClosure(ConcurrentMark* cm, 800 G1CollectedHeap* g1h, 801 bool do_barrier) : _cm(cm), _g1h(g1h), 802 _do_barrier(do_barrier) { } 803 804 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 805 virtual void do_oop( oop* p) { do_oop_work(p); } 806 807 template <class T> void do_oop_work(T* p) { 808 T heap_oop = oopDesc::load_heap_oop(p); 809 if (!oopDesc::is_null(heap_oop)) { 810 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 811 assert(obj->is_oop() || obj->mark() == NULL, 812 "expected an oop, possibly with mark word displaced"); 813 HeapWord* addr = (HeapWord*)obj; 814 if (_g1h->is_in_g1_reserved(addr)) { 815 _cm->grayRoot(obj); 816 } 817 } 818 if (_do_barrier) { 819 assert(!_g1h->is_in_g1_reserved(p), 820 "Should be called on external roots"); 821 do_barrier(p); 822 } 823 } 824 }; 825 826 void ConcurrentMark::checkpointRootsInitialPost() { 827 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 828 829 // If we force an overflow during remark, the remark operation will 830 // actually abort and we'll restart concurrent marking. If we always 831 // force an oveflow during remark we'll never actually complete the 832 // marking phase. So, we initilize this here, at the start of the 833 // cycle, so that at the remaining overflow number will decrease at 834 // every remark and we'll eventually not need to cause one. 835 force_overflow_stw()->init(); 836 837 // For each region note start of marking. 838 NoteStartOfMarkHRClosure startcl; 839 g1h->heap_region_iterate(&startcl); 840 841 // Start weak-reference discovery. 842 ReferenceProcessor* rp = g1h->ref_processor(); 843 rp->verify_no_references_recorded(); 844 rp->enable_discovery(); // enable ("weak") refs discovery 845 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 846 847 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 848 // This is the start of the marking cycle, we're expected all 849 // threads to have SATB queues with active set to false. 850 satb_mq_set.set_active_all_threads(true, /* new active value */ 851 false /* expected_active */); 852 853 // update_g1_committed() will be called at the end of an evac pause 854 // when marking is on. So, it's also called at the end of the 855 // initial-mark pause to update the heap end, if the heap expands 856 // during it. No need to call it here. 857 } 858 859 // Checkpoint the roots into this generation from outside 860 // this generation. [Note this initial checkpoint need only 861 // be approximate -- we'll do a catch up phase subsequently.] 862 void ConcurrentMark::checkpointRootsInitial() { 863 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 864 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 865 866 double start = os::elapsedTime(); 867 868 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 869 g1p->record_concurrent_mark_init_start(); 870 checkpointRootsInitialPre(); 871 872 // YSR: when concurrent precleaning is in place, we'll 873 // need to clear the cached card table here 874 875 ResourceMark rm; 876 HandleMark hm; 877 878 g1h->ensure_parsability(false); 879 g1h->perm_gen()->save_marks(); 880 881 CMMarkRootsClosure notOlder(this, g1h, false); 882 CMMarkRootsClosure older(this, g1h, true); 883 884 g1h->set_marking_started(); 885 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 886 887 g1h->process_strong_roots(true, // activate StrongRootsScope 888 false, // fake perm gen collection 889 SharedHeap::SO_AllClasses, 890 ¬Older, // Regular roots 891 NULL, // do not visit active blobs 892 &older // Perm Gen Roots 893 ); 894 checkpointRootsInitialPost(); 895 896 // Statistics. 897 double end = os::elapsedTime(); 898 _init_times.add((end - start) * 1000.0); 899 900 g1p->record_concurrent_mark_init_end(); 901 } 902 903 /* 904 * Notice that in the next two methods, we actually leave the STS 905 * during the barrier sync and join it immediately afterwards. If we 906 * do not do this, the following deadlock can occur: one thread could 907 * be in the barrier sync code, waiting for the other thread to also 908 * sync up, whereas another one could be trying to yield, while also 909 * waiting for the other threads to sync up too. 910 * 911 * Note, however, that this code is also used during remark and in 912 * this case we should not attempt to leave / enter the STS, otherwise 913 * we'll either hit an asseert (debug / fastdebug) or deadlock 914 * (product). So we should only leave / enter the STS if we are 915 * operating concurrently. 916 * 917 * Because the thread that does the sync barrier has left the STS, it 918 * is possible to be suspended for a Full GC or an evacuation pause 919 * could occur. This is actually safe, since the entering the sync 920 * barrier is one of the last things do_marking_step() does, and it 921 * doesn't manipulate any data structures afterwards. 922 */ 923 924 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 925 if (verbose_low()) 926 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 927 928 if (concurrent()) { 929 ConcurrentGCThread::stsLeave(); 930 } 931 _first_overflow_barrier_sync.enter(); 932 if (concurrent()) { 933 ConcurrentGCThread::stsJoin(); 934 } 935 // at this point everyone should have synced up and not be doing any 936 // more work 937 938 if (verbose_low()) 939 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 940 941 // let task 0 do this 942 if (task_num == 0) { 943 // task 0 is responsible for clearing the global data structures 944 // We should be here because of an overflow. During STW we should 945 // not clear the overflow flag since we rely on it being true when 946 // we exit this method to abort the pause and restart concurent 947 // marking. 948 clear_marking_state(concurrent() /* clear_overflow */); 949 force_overflow()->update(); 950 951 if (PrintGC) { 952 gclog_or_tty->date_stamp(PrintGCDateStamps); 953 gclog_or_tty->stamp(PrintGCTimeStamps); 954 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 955 } 956 } 957 958 // after this, each task should reset its own data structures then 959 // then go into the second barrier 960 } 961 962 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 963 if (verbose_low()) 964 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 965 966 if (concurrent()) { 967 ConcurrentGCThread::stsLeave(); 968 } 969 _second_overflow_barrier_sync.enter(); 970 if (concurrent()) { 971 ConcurrentGCThread::stsJoin(); 972 } 973 // at this point everything should be re-initialised and ready to go 974 975 if (verbose_low()) 976 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 977 } 978 979 #ifndef PRODUCT 980 void ForceOverflowSettings::init() { 981 _num_remaining = G1ConcMarkForceOverflow; 982 _force = false; 983 update(); 984 } 985 986 void ForceOverflowSettings::update() { 987 if (_num_remaining > 0) { 988 _num_remaining -= 1; 989 _force = true; 990 } else { 991 _force = false; 992 } 993 } 994 995 bool ForceOverflowSettings::should_force() { 996 if (_force) { 997 _force = false; 998 return true; 999 } else { 1000 return false; 1001 } 1002 } 1003 #endif // !PRODUCT 1004 1005 void ConcurrentMark::grayRoot(oop p) { 1006 HeapWord* addr = (HeapWord*) p; 1007 // We can't really check against _heap_start and _heap_end, since it 1008 // is possible during an evacuation pause with piggy-backed 1009 // initial-mark that the committed space is expanded during the 1010 // pause without CM observing this change. So the assertions below 1011 // is a bit conservative; but better than nothing. 1012 assert(_g1h->g1_committed().contains(addr), 1013 "address should be within the heap bounds"); 1014 1015 if (!_nextMarkBitMap->isMarked(addr)) 1016 _nextMarkBitMap->parMark(addr); 1017 } 1018 1019 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 1020 // The objects on the region have already been marked "in bulk" by 1021 // the caller. We only need to decide whether to push the region on 1022 // the region stack or not. 1023 1024 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1025 // We're done with marking and waiting for remark. We do not need to 1026 // push anything else on the region stack. 1027 return; 1028 1029 HeapWord* finger = _finger; 1030 1031 if (verbose_low()) 1032 gclog_or_tty->print_cr("[global] attempting to push " 1033 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 1034 PTR_FORMAT, mr.start(), mr.end(), finger); 1035 1036 if (mr.start() < finger) { 1037 // The finger is always heap region aligned and it is not possible 1038 // for mr to span heap regions. 1039 assert(mr.end() <= finger, "invariant"); 1040 1041 // Separated the asserts so that we know which one fires. 1042 assert(mr.start() <= mr.end(), 1043 "region boundaries should fall within the committed space"); 1044 assert(_heap_start <= mr.start(), 1045 "region boundaries should fall within the committed space"); 1046 assert(mr.end() <= _heap_end, 1047 "region boundaries should fall within the committed space"); 1048 if (verbose_low()) 1049 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1050 "below the finger, pushing it", 1051 mr.start(), mr.end()); 1052 1053 if (!region_stack_push_lock_free(mr)) { 1054 if (verbose_low()) 1055 gclog_or_tty->print_cr("[global] region stack has overflown."); 1056 } 1057 } 1058 } 1059 1060 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1061 // The object is not marked by the caller. We need to at least mark 1062 // it and maybe push in on the stack. 1063 1064 HeapWord* addr = (HeapWord*)p; 1065 if (!_nextMarkBitMap->isMarked(addr)) { 1066 // We definitely need to mark it, irrespective whether we bail out 1067 // because we're done with marking. 1068 if (_nextMarkBitMap->parMark(addr)) { 1069 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1070 // If we're done with concurrent marking and we're waiting for 1071 // remark, then we're not pushing anything on the stack. 1072 return; 1073 1074 // No OrderAccess:store_load() is needed. It is implicit in the 1075 // CAS done in parMark(addr) above 1076 HeapWord* finger = _finger; 1077 1078 if (addr < finger) { 1079 if (!mark_stack_push(oop(addr))) { 1080 if (verbose_low()) 1081 gclog_or_tty->print_cr("[global] global stack overflow " 1082 "during parMark"); 1083 } 1084 } 1085 } 1086 } 1087 } 1088 1089 class CMConcurrentMarkingTask: public AbstractGangTask { 1090 private: 1091 ConcurrentMark* _cm; 1092 ConcurrentMarkThread* _cmt; 1093 1094 public: 1095 void work(int worker_i) { 1096 assert(Thread::current()->is_ConcurrentGC_thread(), 1097 "this should only be done by a conc GC thread"); 1098 ResourceMark rm; 1099 1100 double start_vtime = os::elapsedVTime(); 1101 1102 ConcurrentGCThread::stsJoin(); 1103 1104 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1105 CMTask* the_task = _cm->task(worker_i); 1106 the_task->record_start_time(); 1107 if (!_cm->has_aborted()) { 1108 do { 1109 double start_vtime_sec = os::elapsedVTime(); 1110 double start_time_sec = os::elapsedTime(); 1111 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1112 1113 the_task->do_marking_step(mark_step_duration_ms, 1114 true /* do_stealing */, 1115 true /* do_termination */); 1116 1117 double end_time_sec = os::elapsedTime(); 1118 double end_vtime_sec = os::elapsedVTime(); 1119 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1120 double elapsed_time_sec = end_time_sec - start_time_sec; 1121 _cm->clear_has_overflown(); 1122 1123 bool ret = _cm->do_yield_check(worker_i); 1124 1125 jlong sleep_time_ms; 1126 if (!_cm->has_aborted() && the_task->has_aborted()) { 1127 sleep_time_ms = 1128 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1129 ConcurrentGCThread::stsLeave(); 1130 os::sleep(Thread::current(), sleep_time_ms, false); 1131 ConcurrentGCThread::stsJoin(); 1132 } 1133 double end_time2_sec = os::elapsedTime(); 1134 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1135 1136 #if 0 1137 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1138 "overhead %1.4lf", 1139 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1140 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1141 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1142 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1143 #endif 1144 } while (!_cm->has_aborted() && the_task->has_aborted()); 1145 } 1146 the_task->record_end_time(); 1147 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1148 1149 ConcurrentGCThread::stsLeave(); 1150 1151 double end_vtime = os::elapsedVTime(); 1152 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1153 } 1154 1155 CMConcurrentMarkingTask(ConcurrentMark* cm, 1156 ConcurrentMarkThread* cmt) : 1157 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1158 1159 ~CMConcurrentMarkingTask() { } 1160 }; 1161 1162 void ConcurrentMark::markFromRoots() { 1163 // we might be tempted to assert that: 1164 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1165 // "inconsistent argument?"); 1166 // However that wouldn't be right, because it's possible that 1167 // a safepoint is indeed in progress as a younger generation 1168 // stop-the-world GC happens even as we mark in this generation. 1169 1170 _restart_for_overflow = false; 1171 1172 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1173 force_overflow_conc()->init(); 1174 set_phase(active_workers, true /* concurrent */); 1175 1176 CMConcurrentMarkingTask markingTask(this, cmThread()); 1177 if (parallel_marking_threads() > 0) 1178 _parallel_workers->run_task(&markingTask); 1179 else 1180 markingTask.work(0); 1181 print_stats(); 1182 } 1183 1184 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1185 // world is stopped at this checkpoint 1186 assert(SafepointSynchronize::is_at_safepoint(), 1187 "world should be stopped"); 1188 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1189 1190 // If a full collection has happened, we shouldn't do this. 1191 if (has_aborted()) { 1192 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1193 return; 1194 } 1195 1196 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1197 1198 if (VerifyDuringGC) { 1199 HandleMark hm; // handle scope 1200 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1201 Universe::heap()->prepare_for_verify(); 1202 Universe::verify(true, false, true); 1203 } 1204 1205 G1CollectorPolicy* g1p = g1h->g1_policy(); 1206 g1p->record_concurrent_mark_remark_start(); 1207 1208 double start = os::elapsedTime(); 1209 1210 checkpointRootsFinalWork(); 1211 1212 double mark_work_end = os::elapsedTime(); 1213 1214 weakRefsWork(clear_all_soft_refs); 1215 1216 if (has_overflown()) { 1217 // Oops. We overflowed. Restart concurrent marking. 1218 _restart_for_overflow = true; 1219 // Clear the flag. We do not need it any more. 1220 clear_has_overflown(); 1221 if (G1TraceMarkStackOverflow) 1222 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1223 } else { 1224 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1225 // We're done with marking. 1226 // This is the end of the marking cycle, we're expected all 1227 // threads to have SATB queues with active set to true. 1228 satb_mq_set.set_active_all_threads(false, /* new active value */ 1229 true /* expected_active */); 1230 1231 if (VerifyDuringGC) { 1232 HandleMark hm; // handle scope 1233 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1234 Universe::heap()->prepare_for_verify(); 1235 Universe::heap()->verify(/* allow_dirty */ true, 1236 /* silent */ false, 1237 /* use_prev_marking */ false); 1238 } 1239 assert(!restart_for_overflow(), "sanity"); 1240 } 1241 1242 // Reset the marking state if marking completed 1243 if (!restart_for_overflow()) { 1244 set_non_marking_state(); 1245 } 1246 1247 #if VERIFY_OBJS_PROCESSED 1248 _scan_obj_cl.objs_processed = 0; 1249 ThreadLocalObjQueue::objs_enqueued = 0; 1250 #endif 1251 1252 // Statistics 1253 double now = os::elapsedTime(); 1254 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1255 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1256 _remark_times.add((now - start) * 1000.0); 1257 1258 g1p->record_concurrent_mark_remark_end(); 1259 } 1260 1261 #define CARD_BM_TEST_MODE 0 1262 1263 class CalcLiveObjectsClosure: public HeapRegionClosure { 1264 1265 CMBitMapRO* _bm; 1266 ConcurrentMark* _cm; 1267 bool _changed; 1268 bool _yield; 1269 size_t _words_done; 1270 size_t _tot_live; 1271 size_t _tot_used; 1272 size_t _regions_done; 1273 double _start_vtime_sec; 1274 1275 BitMap* _region_bm; 1276 BitMap* _card_bm; 1277 intptr_t _bottom_card_num; 1278 bool _final; 1279 1280 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1281 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1282 #if CARD_BM_TEST_MODE 1283 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1284 #else 1285 _card_bm->par_at_put(i - _bottom_card_num, 1); 1286 #endif 1287 } 1288 } 1289 1290 public: 1291 CalcLiveObjectsClosure(bool final, 1292 CMBitMapRO *bm, ConcurrentMark *cm, 1293 BitMap* region_bm, BitMap* card_bm) : 1294 _bm(bm), _cm(cm), _changed(false), _yield(true), 1295 _words_done(0), _tot_live(0), _tot_used(0), 1296 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1297 _regions_done(0), _start_vtime_sec(0.0) 1298 { 1299 _bottom_card_num = 1300 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1301 CardTableModRefBS::card_shift); 1302 } 1303 1304 // It takes a region that's not empty (i.e., it has at least one 1305 // live object in it and sets its corresponding bit on the region 1306 // bitmap to 1. If the region is "starts humongous" it will also set 1307 // to 1 the bits on the region bitmap that correspond to its 1308 // associated "continues humongous" regions. 1309 void set_bit_for_region(HeapRegion* hr) { 1310 assert(!hr->continuesHumongous(), "should have filtered those out"); 1311 1312 size_t index = hr->hrs_index(); 1313 if (!hr->startsHumongous()) { 1314 // Normal (non-humongous) case: just set the bit. 1315 _region_bm->par_at_put((BitMap::idx_t) index, true); 1316 } else { 1317 // Starts humongous case: calculate how many regions are part of 1318 // this humongous region and then set the bit range. It might 1319 // have been a bit more efficient to look at the object that 1320 // spans these humongous regions to calculate their number from 1321 // the object's size. However, it's a good idea to calculate 1322 // this based on the metadata itself, and not the region 1323 // contents, so that this code is not aware of what goes into 1324 // the humongous regions (in case this changes in the future). 1325 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1326 size_t end_index = index + 1; 1327 while (end_index < g1h->n_regions()) { 1328 HeapRegion* chr = g1h->region_at(end_index); 1329 if (!chr->continuesHumongous()) { 1330 break; 1331 } 1332 end_index += 1; 1333 } 1334 _region_bm->par_at_put_range((BitMap::idx_t) index, 1335 (BitMap::idx_t) end_index, true); 1336 } 1337 } 1338 1339 bool doHeapRegion(HeapRegion* hr) { 1340 if (!_final && _regions_done == 0) 1341 _start_vtime_sec = os::elapsedVTime(); 1342 1343 if (hr->continuesHumongous()) { 1344 // We will ignore these here and process them when their 1345 // associated "starts humongous" region is processed (see 1346 // set_bit_for_heap_region()). Note that we cannot rely on their 1347 // associated "starts humongous" region to have their bit set to 1348 // 1 since, due to the region chunking in the parallel region 1349 // iteration, a "continues humongous" region might be visited 1350 // before its associated "starts humongous". 1351 return false; 1352 } 1353 1354 HeapWord* nextTop = hr->next_top_at_mark_start(); 1355 HeapWord* start = hr->top_at_conc_mark_count(); 1356 assert(hr->bottom() <= start && start <= hr->end() && 1357 hr->bottom() <= nextTop && nextTop <= hr->end() && 1358 start <= nextTop, 1359 "Preconditions."); 1360 // Otherwise, record the number of word's we'll examine. 1361 size_t words_done = (nextTop - start); 1362 // Find the first marked object at or after "start". 1363 start = _bm->getNextMarkedWordAddress(start, nextTop); 1364 size_t marked_bytes = 0; 1365 1366 // Below, the term "card num" means the result of shifting an address 1367 // by the card shift -- address 0 corresponds to card number 0. One 1368 // must subtract the card num of the bottom of the heap to obtain a 1369 // card table index. 1370 // The first card num of the sequence of live cards currently being 1371 // constructed. -1 ==> no sequence. 1372 intptr_t start_card_num = -1; 1373 // The last card num of the sequence of live cards currently being 1374 // constructed. -1 ==> no sequence. 1375 intptr_t last_card_num = -1; 1376 1377 while (start < nextTop) { 1378 if (_yield && _cm->do_yield_check()) { 1379 // We yielded. It might be for a full collection, in which case 1380 // all bets are off; terminate the traversal. 1381 if (_cm->has_aborted()) { 1382 _changed = false; 1383 return true; 1384 } else { 1385 // Otherwise, it might be a collection pause, and the region 1386 // we're looking at might be in the collection set. We'll 1387 // abandon this region. 1388 return false; 1389 } 1390 } 1391 oop obj = oop(start); 1392 int obj_sz = obj->size(); 1393 // The card num of the start of the current object. 1394 intptr_t obj_card_num = 1395 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1396 1397 HeapWord* obj_last = start + obj_sz - 1; 1398 intptr_t obj_last_card_num = 1399 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1400 1401 if (obj_card_num != last_card_num) { 1402 if (start_card_num == -1) { 1403 assert(last_card_num == -1, "Both or neither."); 1404 start_card_num = obj_card_num; 1405 } else { 1406 assert(last_card_num != -1, "Both or neither."); 1407 assert(obj_card_num >= last_card_num, "Inv"); 1408 if ((obj_card_num - last_card_num) > 1) { 1409 // Mark the last run, and start a new one. 1410 mark_card_num_range(start_card_num, last_card_num); 1411 start_card_num = obj_card_num; 1412 } 1413 } 1414 #if CARD_BM_TEST_MODE 1415 /* 1416 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1417 obj_card_num - _bottom_card_num, 1418 obj_last_card_num - _bottom_card_num); 1419 */ 1420 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1421 _card_bm->par_at_put(j - _bottom_card_num, 1); 1422 } 1423 #endif 1424 } 1425 // In any case, we set the last card num. 1426 last_card_num = obj_last_card_num; 1427 1428 marked_bytes += (size_t)obj_sz * HeapWordSize; 1429 // Find the next marked object after this one. 1430 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1431 _changed = true; 1432 } 1433 // Handle the last range, if any. 1434 if (start_card_num != -1) 1435 mark_card_num_range(start_card_num, last_card_num); 1436 if (_final) { 1437 // Mark the allocated-since-marking portion... 1438 HeapWord* tp = hr->top(); 1439 if (nextTop < tp) { 1440 start_card_num = 1441 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1442 last_card_num = 1443 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1444 mark_card_num_range(start_card_num, last_card_num); 1445 // This definitely means the region has live objects. 1446 set_bit_for_region(hr); 1447 } 1448 } 1449 1450 hr->add_to_marked_bytes(marked_bytes); 1451 // Update the live region bitmap. 1452 if (marked_bytes > 0) { 1453 set_bit_for_region(hr); 1454 } 1455 hr->set_top_at_conc_mark_count(nextTop); 1456 _tot_live += hr->next_live_bytes(); 1457 _tot_used += hr->used(); 1458 _words_done = words_done; 1459 1460 if (!_final) { 1461 ++_regions_done; 1462 if (_regions_done % 10 == 0) { 1463 double end_vtime_sec = os::elapsedVTime(); 1464 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1465 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1466 jlong sleep_time_ms = 1467 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1468 os::sleep(Thread::current(), sleep_time_ms, false); 1469 _start_vtime_sec = end_vtime_sec; 1470 } 1471 } 1472 } 1473 1474 return false; 1475 } 1476 1477 bool changed() { return _changed; } 1478 void reset() { _changed = false; _words_done = 0; } 1479 void no_yield() { _yield = false; } 1480 size_t words_done() { return _words_done; } 1481 size_t tot_live() { return _tot_live; } 1482 size_t tot_used() { return _tot_used; } 1483 }; 1484 1485 1486 void ConcurrentMark::calcDesiredRegions() { 1487 _region_bm.clear(); 1488 _card_bm.clear(); 1489 CalcLiveObjectsClosure calccl(false /*final*/, 1490 nextMarkBitMap(), this, 1491 &_region_bm, &_card_bm); 1492 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1493 g1h->heap_region_iterate(&calccl); 1494 1495 do { 1496 calccl.reset(); 1497 g1h->heap_region_iterate(&calccl); 1498 } while (calccl.changed()); 1499 } 1500 1501 class G1ParFinalCountTask: public AbstractGangTask { 1502 protected: 1503 G1CollectedHeap* _g1h; 1504 CMBitMap* _bm; 1505 size_t _n_workers; 1506 size_t *_live_bytes; 1507 size_t *_used_bytes; 1508 BitMap* _region_bm; 1509 BitMap* _card_bm; 1510 public: 1511 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1512 BitMap* region_bm, BitMap* card_bm) : 1513 AbstractGangTask("G1 final counting"), _g1h(g1h), 1514 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) 1515 { 1516 if (ParallelGCThreads > 0) 1517 _n_workers = _g1h->workers()->total_workers(); 1518 else 1519 _n_workers = 1; 1520 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1521 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1522 } 1523 1524 ~G1ParFinalCountTask() { 1525 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1526 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1527 } 1528 1529 void work(int i) { 1530 CalcLiveObjectsClosure calccl(true /*final*/, 1531 _bm, _g1h->concurrent_mark(), 1532 _region_bm, _card_bm); 1533 calccl.no_yield(); 1534 if (G1CollectedHeap::use_parallel_gc_threads()) { 1535 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1536 HeapRegion::FinalCountClaimValue); 1537 } else { 1538 _g1h->heap_region_iterate(&calccl); 1539 } 1540 assert(calccl.complete(), "Shouldn't have yielded!"); 1541 1542 assert((size_t) i < _n_workers, "invariant"); 1543 _live_bytes[i] = calccl.tot_live(); 1544 _used_bytes[i] = calccl.tot_used(); 1545 } 1546 size_t live_bytes() { 1547 size_t live_bytes = 0; 1548 for (size_t i = 0; i < _n_workers; ++i) 1549 live_bytes += _live_bytes[i]; 1550 return live_bytes; 1551 } 1552 size_t used_bytes() { 1553 size_t used_bytes = 0; 1554 for (size_t i = 0; i < _n_workers; ++i) 1555 used_bytes += _used_bytes[i]; 1556 return used_bytes; 1557 } 1558 }; 1559 1560 class G1ParNoteEndTask; 1561 1562 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1563 G1CollectedHeap* _g1; 1564 int _worker_num; 1565 size_t _max_live_bytes; 1566 size_t _regions_claimed; 1567 size_t _freed_bytes; 1568 FreeRegionList* _local_cleanup_list; 1569 HumongousRegionSet* _humongous_proxy_set; 1570 HRRSCleanupTask* _hrrs_cleanup_task; 1571 double _claimed_region_time; 1572 double _max_region_time; 1573 1574 public: 1575 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1576 int worker_num, 1577 FreeRegionList* local_cleanup_list, 1578 HumongousRegionSet* humongous_proxy_set, 1579 HRRSCleanupTask* hrrs_cleanup_task); 1580 size_t freed_bytes() { return _freed_bytes; } 1581 1582 bool doHeapRegion(HeapRegion *r); 1583 1584 size_t max_live_bytes() { return _max_live_bytes; } 1585 size_t regions_claimed() { return _regions_claimed; } 1586 double claimed_region_time_sec() { return _claimed_region_time; } 1587 double max_region_time_sec() { return _max_region_time; } 1588 }; 1589 1590 class G1ParNoteEndTask: public AbstractGangTask { 1591 friend class G1NoteEndOfConcMarkClosure; 1592 1593 protected: 1594 G1CollectedHeap* _g1h; 1595 size_t _max_live_bytes; 1596 size_t _freed_bytes; 1597 FreeRegionList* _cleanup_list; 1598 1599 public: 1600 G1ParNoteEndTask(G1CollectedHeap* g1h, 1601 FreeRegionList* cleanup_list) : 1602 AbstractGangTask("G1 note end"), _g1h(g1h), 1603 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1604 1605 void work(int i) { 1606 double start = os::elapsedTime(); 1607 FreeRegionList local_cleanup_list("Local Cleanup List"); 1608 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1609 HRRSCleanupTask hrrs_cleanup_task; 1610 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1611 &humongous_proxy_set, 1612 &hrrs_cleanup_task); 1613 if (G1CollectedHeap::use_parallel_gc_threads()) { 1614 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1615 HeapRegion::NoteEndClaimValue); 1616 } else { 1617 _g1h->heap_region_iterate(&g1_note_end); 1618 } 1619 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1620 1621 // Now update the lists 1622 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1623 NULL /* free_list */, 1624 &humongous_proxy_set, 1625 true /* par */); 1626 { 1627 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1628 _max_live_bytes += g1_note_end.max_live_bytes(); 1629 _freed_bytes += g1_note_end.freed_bytes(); 1630 1631 _cleanup_list->add_as_tail(&local_cleanup_list); 1632 assert(local_cleanup_list.is_empty(), "post-condition"); 1633 1634 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1635 } 1636 double end = os::elapsedTime(); 1637 if (G1PrintParCleanupStats) { 1638 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1639 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1640 i, start, end, (end-start)*1000.0, 1641 g1_note_end.regions_claimed(), 1642 g1_note_end.claimed_region_time_sec()*1000.0, 1643 g1_note_end.max_region_time_sec()*1000.0); 1644 } 1645 } 1646 size_t max_live_bytes() { return _max_live_bytes; } 1647 size_t freed_bytes() { return _freed_bytes; } 1648 }; 1649 1650 class G1ParScrubRemSetTask: public AbstractGangTask { 1651 protected: 1652 G1RemSet* _g1rs; 1653 BitMap* _region_bm; 1654 BitMap* _card_bm; 1655 public: 1656 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1657 BitMap* region_bm, BitMap* card_bm) : 1658 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1659 _region_bm(region_bm), _card_bm(card_bm) 1660 {} 1661 1662 void work(int i) { 1663 if (G1CollectedHeap::use_parallel_gc_threads()) { 1664 _g1rs->scrub_par(_region_bm, _card_bm, i, 1665 HeapRegion::ScrubRemSetClaimValue); 1666 } else { 1667 _g1rs->scrub(_region_bm, _card_bm); 1668 } 1669 } 1670 1671 }; 1672 1673 G1NoteEndOfConcMarkClosure:: 1674 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1675 int worker_num, 1676 FreeRegionList* local_cleanup_list, 1677 HumongousRegionSet* humongous_proxy_set, 1678 HRRSCleanupTask* hrrs_cleanup_task) 1679 : _g1(g1), _worker_num(worker_num), 1680 _max_live_bytes(0), _regions_claimed(0), 1681 _freed_bytes(0), 1682 _claimed_region_time(0.0), _max_region_time(0.0), 1683 _local_cleanup_list(local_cleanup_list), 1684 _humongous_proxy_set(humongous_proxy_set), 1685 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1686 1687 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { 1688 // We use a claim value of zero here because all regions 1689 // were claimed with value 1 in the FinalCount task. 1690 hr->reset_gc_time_stamp(); 1691 if (!hr->continuesHumongous()) { 1692 double start = os::elapsedTime(); 1693 _regions_claimed++; 1694 hr->note_end_of_marking(); 1695 _max_live_bytes += hr->max_live_bytes(); 1696 _g1->free_region_if_empty(hr, 1697 &_freed_bytes, 1698 _local_cleanup_list, 1699 _humongous_proxy_set, 1700 _hrrs_cleanup_task, 1701 true /* par */); 1702 double region_time = (os::elapsedTime() - start); 1703 _claimed_region_time += region_time; 1704 if (region_time > _max_region_time) _max_region_time = region_time; 1705 } 1706 return false; 1707 } 1708 1709 void ConcurrentMark::cleanup() { 1710 // world is stopped at this checkpoint 1711 assert(SafepointSynchronize::is_at_safepoint(), 1712 "world should be stopped"); 1713 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1714 1715 // If a full collection has happened, we shouldn't do this. 1716 if (has_aborted()) { 1717 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1718 return; 1719 } 1720 1721 g1h->verify_region_sets_optional(); 1722 1723 if (VerifyDuringGC) { 1724 HandleMark hm; // handle scope 1725 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1726 Universe::heap()->prepare_for_verify(); 1727 Universe::verify(/* allow dirty */ true, 1728 /* silent */ false, 1729 /* prev marking */ true); 1730 } 1731 1732 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1733 g1p->record_concurrent_mark_cleanup_start(); 1734 1735 double start = os::elapsedTime(); 1736 1737 HeapRegionRemSet::reset_for_cleanup_tasks(); 1738 1739 // Do counting once more with the world stopped for good measure. 1740 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1741 &_region_bm, &_card_bm); 1742 if (G1CollectedHeap::use_parallel_gc_threads()) { 1743 assert(g1h->check_heap_region_claim_values( 1744 HeapRegion::InitialClaimValue), 1745 "sanity check"); 1746 1747 int n_workers = g1h->workers()->total_workers(); 1748 g1h->set_par_threads(n_workers); 1749 g1h->workers()->run_task(&g1_par_count_task); 1750 g1h->set_par_threads(0); 1751 1752 assert(g1h->check_heap_region_claim_values( 1753 HeapRegion::FinalCountClaimValue), 1754 "sanity check"); 1755 } else { 1756 g1_par_count_task.work(0); 1757 } 1758 1759 size_t known_garbage_bytes = 1760 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1761 #if 0 1762 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1763 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1764 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1765 (double) known_garbage_bytes / (double) (1024 * 1024)); 1766 #endif // 0 1767 g1p->set_known_garbage_bytes(known_garbage_bytes); 1768 1769 size_t start_used_bytes = g1h->used(); 1770 _at_least_one_mark_complete = true; 1771 g1h->set_marking_complete(); 1772 1773 double count_end = os::elapsedTime(); 1774 double this_final_counting_time = (count_end - start); 1775 if (G1PrintParCleanupStats) { 1776 gclog_or_tty->print_cr("Cleanup:"); 1777 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1778 this_final_counting_time*1000.0); 1779 } 1780 _total_counting_time += this_final_counting_time; 1781 1782 if (G1PrintRegionLivenessInfo) { 1783 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1784 _g1h->heap_region_iterate(&cl); 1785 } 1786 1787 // Install newly created mark bitMap as "prev". 1788 swapMarkBitMaps(); 1789 1790 g1h->reset_gc_time_stamp(); 1791 1792 // Note end of marking in all heap regions. 1793 double note_end_start = os::elapsedTime(); 1794 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1795 if (G1CollectedHeap::use_parallel_gc_threads()) { 1796 int n_workers = g1h->workers()->total_workers(); 1797 g1h->set_par_threads(n_workers); 1798 g1h->workers()->run_task(&g1_par_note_end_task); 1799 g1h->set_par_threads(0); 1800 1801 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1802 "sanity check"); 1803 } else { 1804 g1_par_note_end_task.work(0); 1805 } 1806 1807 if (!cleanup_list_is_empty()) { 1808 // The cleanup list is not empty, so we'll have to process it 1809 // concurrently. Notify anyone else that might be wanting free 1810 // regions that there will be more free regions coming soon. 1811 g1h->set_free_regions_coming(); 1812 } 1813 double note_end_end = os::elapsedTime(); 1814 if (G1PrintParCleanupStats) { 1815 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1816 (note_end_end - note_end_start)*1000.0); 1817 } 1818 1819 1820 // call below, since it affects the metric by which we sort the heap 1821 // regions. 1822 if (G1ScrubRemSets) { 1823 double rs_scrub_start = os::elapsedTime(); 1824 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1825 if (G1CollectedHeap::use_parallel_gc_threads()) { 1826 int n_workers = g1h->workers()->total_workers(); 1827 g1h->set_par_threads(n_workers); 1828 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1829 g1h->set_par_threads(0); 1830 1831 assert(g1h->check_heap_region_claim_values( 1832 HeapRegion::ScrubRemSetClaimValue), 1833 "sanity check"); 1834 } else { 1835 g1_par_scrub_rs_task.work(0); 1836 } 1837 1838 double rs_scrub_end = os::elapsedTime(); 1839 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1840 _total_rs_scrub_time += this_rs_scrub_time; 1841 } 1842 1843 // this will also free any regions totally full of garbage objects, 1844 // and sort the regions. 1845 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1846 g1_par_note_end_task.freed_bytes(), 1847 g1_par_note_end_task.max_live_bytes()); 1848 1849 // Statistics. 1850 double end = os::elapsedTime(); 1851 _cleanup_times.add((end - start) * 1000.0); 1852 1853 // G1CollectedHeap::heap()->print(); 1854 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1855 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1856 1857 if (PrintGC || PrintGCDetails) { 1858 g1h->print_size_transition(gclog_or_tty, 1859 start_used_bytes, 1860 g1h->used(), 1861 g1h->capacity()); 1862 } 1863 1864 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1865 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1866 1867 // We need to make this be a "collection" so any collection pause that 1868 // races with it goes around and waits for completeCleanup to finish. 1869 g1h->increment_total_collections(); 1870 1871 if (VerifyDuringGC) { 1872 HandleMark hm; // handle scope 1873 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1874 Universe::heap()->prepare_for_verify(); 1875 Universe::verify(/* allow dirty */ true, 1876 /* silent */ false, 1877 /* prev marking */ true); 1878 } 1879 1880 g1h->verify_region_sets_optional(); 1881 } 1882 1883 void ConcurrentMark::completeCleanup() { 1884 if (has_aborted()) return; 1885 1886 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1887 1888 _cleanup_list.verify_optional(); 1889 FreeRegionList tmp_free_list("Tmp Free List"); 1890 1891 if (G1ConcRegionFreeingVerbose) { 1892 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1893 "cleanup list has "SIZE_FORMAT" entries", 1894 _cleanup_list.length()); 1895 } 1896 1897 // Noone else should be accessing the _cleanup_list at this point, 1898 // so it's not necessary to take any locks 1899 while (!_cleanup_list.is_empty()) { 1900 HeapRegion* hr = _cleanup_list.remove_head(); 1901 assert(hr != NULL, "the list was not empty"); 1902 hr->par_clear(); 1903 tmp_free_list.add_as_tail(hr); 1904 1905 // Instead of adding one region at a time to the secondary_free_list, 1906 // we accumulate them in the local list and move them a few at a 1907 // time. This also cuts down on the number of notify_all() calls 1908 // we do during this process. We'll also append the local list when 1909 // _cleanup_list is empty (which means we just removed the last 1910 // region from the _cleanup_list). 1911 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1912 _cleanup_list.is_empty()) { 1913 if (G1ConcRegionFreeingVerbose) { 1914 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1915 "appending "SIZE_FORMAT" entries to the " 1916 "secondary_free_list, clean list still has " 1917 SIZE_FORMAT" entries", 1918 tmp_free_list.length(), 1919 _cleanup_list.length()); 1920 } 1921 1922 { 1923 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1924 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1925 SecondaryFreeList_lock->notify_all(); 1926 } 1927 1928 if (G1StressConcRegionFreeing) { 1929 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1930 os::sleep(Thread::current(), (jlong) 1, false); 1931 } 1932 } 1933 } 1934 } 1935 assert(tmp_free_list.is_empty(), "post-condition"); 1936 } 1937 1938 // Support closures for reference procssing in G1 1939 1940 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1941 HeapWord* addr = (HeapWord*)obj; 1942 return addr != NULL && 1943 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1944 } 1945 1946 class G1CMKeepAliveClosure: public OopClosure { 1947 G1CollectedHeap* _g1; 1948 ConcurrentMark* _cm; 1949 CMBitMap* _bitMap; 1950 public: 1951 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1952 CMBitMap* bitMap) : 1953 _g1(g1), _cm(cm), 1954 _bitMap(bitMap) {} 1955 1956 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1957 virtual void do_oop( oop* p) { do_oop_work(p); } 1958 1959 template <class T> void do_oop_work(T* p) { 1960 oop obj = oopDesc::load_decode_heap_oop(p); 1961 HeapWord* addr = (HeapWord*)obj; 1962 1963 if (_cm->verbose_high()) 1964 gclog_or_tty->print_cr("\t[0] we're looking at location " 1965 "*"PTR_FORMAT" = "PTR_FORMAT, 1966 p, (void*) obj); 1967 1968 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 1969 _bitMap->mark(addr); 1970 _cm->mark_stack_push(obj); 1971 } 1972 } 1973 }; 1974 1975 class G1CMDrainMarkingStackClosure: public VoidClosure { 1976 CMMarkStack* _markStack; 1977 CMBitMap* _bitMap; 1978 G1CMKeepAliveClosure* _oopClosure; 1979 public: 1980 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 1981 G1CMKeepAliveClosure* oopClosure) : 1982 _bitMap(bitMap), 1983 _markStack(markStack), 1984 _oopClosure(oopClosure) 1985 {} 1986 1987 void do_void() { 1988 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 1989 } 1990 }; 1991 1992 // 'Keep Alive' closure used by parallel reference processing. 1993 // An instance of this closure is used in the parallel reference processing 1994 // code rather than an instance of G1CMKeepAliveClosure. We could have used 1995 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 1996 // placed on to discovered ref lists once so we can mark and push with no 1997 // need to check whether the object has already been marked. Using the 1998 // G1CMKeepAliveClosure would mean, however, having all the worker threads 1999 // operating on the global mark stack. This means that an individual 2000 // worker would be doing lock-free pushes while it processes its own 2001 // discovered ref list followed by drain call. If the discovered ref lists 2002 // are unbalanced then this could cause interference with the other 2003 // workers. Using a CMTask (and its embedded local data structures) 2004 // avoids that potential interference. 2005 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2006 ConcurrentMark* _cm; 2007 CMTask* _task; 2008 CMBitMap* _bitMap; 2009 int _ref_counter_limit; 2010 int _ref_counter; 2011 public: 2012 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 2013 CMTask* task, 2014 CMBitMap* bitMap) : 2015 _cm(cm), _task(task), _bitMap(bitMap), 2016 _ref_counter_limit(G1RefProcDrainInterval) 2017 { 2018 assert(_ref_counter_limit > 0, "sanity"); 2019 _ref_counter = _ref_counter_limit; 2020 } 2021 2022 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2023 virtual void do_oop( oop* p) { do_oop_work(p); } 2024 2025 template <class T> void do_oop_work(T* p) { 2026 if (!_cm->has_overflown()) { 2027 oop obj = oopDesc::load_decode_heap_oop(p); 2028 if (_cm->verbose_high()) 2029 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2030 "*"PTR_FORMAT" = "PTR_FORMAT, 2031 _task->task_id(), p, (void*) obj); 2032 2033 _task->deal_with_reference(obj); 2034 _ref_counter--; 2035 2036 if (_ref_counter == 0) { 2037 // We have dealt with _ref_counter_limit references, pushing them and objects 2038 // reachable from them on to the local stack (and possibly the global stack). 2039 // Call do_marking_step() to process these entries. We call the routine in a 2040 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2041 // with the entries that we've pushed as a result of the deal_with_reference 2042 // calls above) or we overflow. 2043 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2044 // while there may still be some work to do. (See the comment at the 2045 // beginning of CMTask::do_marking_step() for those conditions - one of which 2046 // is reaching the specified time target.) It is only when 2047 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2048 // that the marking has completed. 2049 do { 2050 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2051 _task->do_marking_step(mark_step_duration_ms, 2052 false /* do_stealing */, 2053 false /* do_termination */); 2054 } while (_task->has_aborted() && !_cm->has_overflown()); 2055 _ref_counter = _ref_counter_limit; 2056 } 2057 } else { 2058 if (_cm->verbose_high()) 2059 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2060 } 2061 } 2062 }; 2063 2064 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2065 ConcurrentMark* _cm; 2066 CMTask* _task; 2067 public: 2068 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2069 _cm(cm), _task(task) 2070 {} 2071 2072 void do_void() { 2073 do { 2074 if (_cm->verbose_high()) 2075 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id()); 2076 2077 // We call CMTask::do_marking_step() to completely drain the local and 2078 // global marking stacks. The routine is called in a loop, which we'll 2079 // exit if there's nothing more to do (i.e. we'completely drained the 2080 // entries that were pushed as a result of applying the 2081 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2082 // lists above) or we overflow the global marking stack. 2083 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2084 // while there may still be some work to do. (See the comment at the 2085 // beginning of CMTask::do_marking_step() for those conditions - one of which 2086 // is reaching the specified time target.) It is only when 2087 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2088 // that the marking has completed. 2089 2090 _task->do_marking_step(1000000000.0 /* something very large */, 2091 true /* do_stealing */, 2092 true /* do_termination */); 2093 } while (_task->has_aborted() && !_cm->has_overflown()); 2094 } 2095 }; 2096 2097 // Implementation of AbstractRefProcTaskExecutor for G1 2098 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2099 private: 2100 G1CollectedHeap* _g1h; 2101 ConcurrentMark* _cm; 2102 CMBitMap* _bitmap; 2103 WorkGang* _workers; 2104 int _active_workers; 2105 2106 public: 2107 G1RefProcTaskExecutor(G1CollectedHeap* g1h, 2108 ConcurrentMark* cm, 2109 CMBitMap* bitmap, 2110 WorkGang* workers, 2111 int n_workers) : 2112 _g1h(g1h), _cm(cm), _bitmap(bitmap), 2113 _workers(workers), _active_workers(n_workers) 2114 { } 2115 2116 // Executes the given task using concurrent marking worker threads. 2117 virtual void execute(ProcessTask& task); 2118 virtual void execute(EnqueueTask& task); 2119 }; 2120 2121 class G1RefProcTaskProxy: public AbstractGangTask { 2122 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2123 ProcessTask& _proc_task; 2124 G1CollectedHeap* _g1h; 2125 ConcurrentMark* _cm; 2126 CMBitMap* _bitmap; 2127 2128 public: 2129 G1RefProcTaskProxy(ProcessTask& proc_task, 2130 G1CollectedHeap* g1h, 2131 ConcurrentMark* cm, 2132 CMBitMap* bitmap) : 2133 AbstractGangTask("Process reference objects in parallel"), 2134 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 2135 {} 2136 2137 virtual void work(int i) { 2138 CMTask* marking_task = _cm->task(i); 2139 G1CMIsAliveClosure g1_is_alive(_g1h); 2140 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 2141 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2142 2143 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2144 } 2145 }; 2146 2147 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) { 2148 assert(_workers != NULL, "Need parallel worker threads."); 2149 2150 G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 2151 2152 // We need to reset the phase for each task execution so that 2153 // the termination protocol of CMTask::do_marking_step works. 2154 _cm->set_phase(_active_workers, false /* concurrent */); 2155 _g1h->set_par_threads(_active_workers); 2156 _workers->run_task(&proc_task_proxy); 2157 _g1h->set_par_threads(0); 2158 } 2159 2160 class G1RefEnqueueTaskProxy: public AbstractGangTask { 2161 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2162 EnqueueTask& _enq_task; 2163 2164 public: 2165 G1RefEnqueueTaskProxy(EnqueueTask& enq_task) : 2166 AbstractGangTask("Enqueue reference objects in parallel"), 2167 _enq_task(enq_task) 2168 { } 2169 2170 virtual void work(int i) { 2171 _enq_task.work(i); 2172 } 2173 }; 2174 2175 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2176 assert(_workers != NULL, "Need parallel worker threads."); 2177 2178 G1RefEnqueueTaskProxy enq_task_proxy(enq_task); 2179 2180 _g1h->set_par_threads(_active_workers); 2181 _workers->run_task(&enq_task_proxy); 2182 _g1h->set_par_threads(0); 2183 } 2184 2185 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2186 ResourceMark rm; 2187 HandleMark hm; 2188 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2189 ReferenceProcessor* rp = g1h->ref_processor(); 2190 2191 // See the comment in G1CollectedHeap::ref_processing_init() 2192 // about how reference processing currently works in G1. 2193 2194 // Process weak references. 2195 rp->setup_policy(clear_all_soft_refs); 2196 assert(_markStack.isEmpty(), "mark stack should be empty"); 2197 2198 G1CMIsAliveClosure g1_is_alive(g1h); 2199 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2200 G1CMDrainMarkingStackClosure 2201 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2202 // We use the work gang from the G1CollectedHeap and we utilize all 2203 // the worker threads. 2204 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 2205 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2206 2207 G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 2208 g1h->workers(), active_workers); 2209 2210 2211 if (rp->processing_is_mt()) { 2212 // Set the degree of MT here. If the discovery is done MT, there 2213 // may have been a different number of threads doing the discovery 2214 // and a different number of discovered lists may have Ref objects. 2215 // That is OK as long as the Reference lists are balanced (see 2216 // balance_all_queues() and balance_queues()). 2217 rp->set_active_mt_degree(active_workers); 2218 2219 rp->process_discovered_references(&g1_is_alive, 2220 &g1_keep_alive, 2221 &g1_drain_mark_stack, 2222 &par_task_executor); 2223 2224 // The work routines of the parallel keep_alive and drain_marking_stack 2225 // will set the has_overflown flag if we overflow the global marking 2226 // stack. 2227 } else { 2228 rp->process_discovered_references(&g1_is_alive, 2229 &g1_keep_alive, 2230 &g1_drain_mark_stack, 2231 NULL); 2232 2233 } 2234 2235 assert(_markStack.overflow() || _markStack.isEmpty(), 2236 "mark stack should be empty (unless it overflowed)"); 2237 if (_markStack.overflow()) { 2238 // Should have been done already when we tried to push an 2239 // entry on to the global mark stack. But let's do it again. 2240 set_has_overflown(); 2241 } 2242 2243 if (rp->processing_is_mt()) { 2244 assert(rp->num_q() == active_workers, "why not"); 2245 rp->enqueue_discovered_references(&par_task_executor); 2246 } else { 2247 rp->enqueue_discovered_references(); 2248 } 2249 2250 rp->verify_no_references_recorded(); 2251 assert(!rp->discovery_enabled(), "should have been disabled"); 2252 2253 // Now clean up stale oops in StringTable 2254 StringTable::unlink(&g1_is_alive); 2255 // Clean up unreferenced symbols in symbol table. 2256 SymbolTable::unlink(); 2257 } 2258 2259 void ConcurrentMark::swapMarkBitMaps() { 2260 CMBitMapRO* temp = _prevMarkBitMap; 2261 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2262 _nextMarkBitMap = (CMBitMap*) temp; 2263 } 2264 2265 class CMRemarkTask: public AbstractGangTask { 2266 private: 2267 ConcurrentMark *_cm; 2268 2269 public: 2270 void work(int worker_i) { 2271 // Since all available tasks are actually started, we should 2272 // only proceed if we're supposed to be actived. 2273 if ((size_t)worker_i < _cm->active_tasks()) { 2274 CMTask* task = _cm->task(worker_i); 2275 task->record_start_time(); 2276 do { 2277 task->do_marking_step(1000000000.0 /* something very large */, 2278 true /* do_stealing */, 2279 true /* do_termination */); 2280 } while (task->has_aborted() && !_cm->has_overflown()); 2281 // If we overflow, then we do not want to restart. We instead 2282 // want to abort remark and do concurrent marking again. 2283 task->record_end_time(); 2284 } 2285 } 2286 2287 CMRemarkTask(ConcurrentMark* cm) : 2288 AbstractGangTask("Par Remark"), _cm(cm) { } 2289 }; 2290 2291 void ConcurrentMark::checkpointRootsFinalWork() { 2292 ResourceMark rm; 2293 HandleMark hm; 2294 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2295 2296 g1h->ensure_parsability(false); 2297 2298 if (G1CollectedHeap::use_parallel_gc_threads()) { 2299 G1CollectedHeap::StrongRootsScope srs(g1h); 2300 // this is remark, so we'll use up all available threads 2301 int active_workers = ParallelGCThreads; 2302 set_phase(active_workers, false /* concurrent */); 2303 2304 CMRemarkTask remarkTask(this); 2305 // We will start all available threads, even if we decide that the 2306 // active_workers will be fewer. The extra ones will just bail out 2307 // immediately. 2308 int n_workers = g1h->workers()->total_workers(); 2309 g1h->set_par_threads(n_workers); 2310 g1h->workers()->run_task(&remarkTask); 2311 g1h->set_par_threads(0); 2312 } else { 2313 G1CollectedHeap::StrongRootsScope srs(g1h); 2314 // this is remark, so we'll use up all available threads 2315 int active_workers = 1; 2316 set_phase(active_workers, false /* concurrent */); 2317 2318 CMRemarkTask remarkTask(this); 2319 // We will start all available threads, even if we decide that the 2320 // active_workers will be fewer. The extra ones will just bail out 2321 // immediately. 2322 remarkTask.work(0); 2323 } 2324 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2325 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2326 2327 print_stats(); 2328 2329 #if VERIFY_OBJS_PROCESSED 2330 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2331 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2332 _scan_obj_cl.objs_processed, 2333 ThreadLocalObjQueue::objs_enqueued); 2334 guarantee(_scan_obj_cl.objs_processed == 2335 ThreadLocalObjQueue::objs_enqueued, 2336 "Different number of objs processed and enqueued."); 2337 } 2338 #endif 2339 } 2340 2341 #ifndef PRODUCT 2342 2343 class PrintReachableOopClosure: public OopClosure { 2344 private: 2345 G1CollectedHeap* _g1h; 2346 CMBitMapRO* _bitmap; 2347 outputStream* _out; 2348 bool _use_prev_marking; 2349 bool _all; 2350 2351 public: 2352 PrintReachableOopClosure(CMBitMapRO* bitmap, 2353 outputStream* out, 2354 bool use_prev_marking, 2355 bool all) : 2356 _g1h(G1CollectedHeap::heap()), 2357 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2358 2359 void do_oop(narrowOop* p) { do_oop_work(p); } 2360 void do_oop( oop* p) { do_oop_work(p); } 2361 2362 template <class T> void do_oop_work(T* p) { 2363 oop obj = oopDesc::load_decode_heap_oop(p); 2364 const char* str = NULL; 2365 const char* str2 = ""; 2366 2367 if (obj == NULL) { 2368 str = ""; 2369 } else if (!_g1h->is_in_g1_reserved(obj)) { 2370 str = " O"; 2371 } else { 2372 HeapRegion* hr = _g1h->heap_region_containing(obj); 2373 guarantee(hr != NULL, "invariant"); 2374 bool over_tams = false; 2375 if (_use_prev_marking) { 2376 over_tams = hr->obj_allocated_since_prev_marking(obj); 2377 } else { 2378 over_tams = hr->obj_allocated_since_next_marking(obj); 2379 } 2380 bool marked = _bitmap->isMarked((HeapWord*) obj); 2381 2382 if (over_tams) { 2383 str = " >"; 2384 if (marked) { 2385 str2 = " AND MARKED"; 2386 } 2387 } else if (marked) { 2388 str = " M"; 2389 } else { 2390 str = " NOT"; 2391 } 2392 } 2393 2394 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2395 p, (void*) obj, str, str2); 2396 } 2397 }; 2398 2399 class PrintReachableObjectClosure : public ObjectClosure { 2400 private: 2401 CMBitMapRO* _bitmap; 2402 outputStream* _out; 2403 bool _use_prev_marking; 2404 bool _all; 2405 HeapRegion* _hr; 2406 2407 public: 2408 PrintReachableObjectClosure(CMBitMapRO* bitmap, 2409 outputStream* out, 2410 bool use_prev_marking, 2411 bool all, 2412 HeapRegion* hr) : 2413 _bitmap(bitmap), _out(out), 2414 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { } 2415 2416 void do_object(oop o) { 2417 bool over_tams; 2418 if (_use_prev_marking) { 2419 over_tams = _hr->obj_allocated_since_prev_marking(o); 2420 } else { 2421 over_tams = _hr->obj_allocated_since_next_marking(o); 2422 } 2423 bool marked = _bitmap->isMarked((HeapWord*) o); 2424 bool print_it = _all || over_tams || marked; 2425 2426 if (print_it) { 2427 _out->print_cr(" "PTR_FORMAT"%s", 2428 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2429 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all); 2430 o->oop_iterate(&oopCl); 2431 } 2432 } 2433 }; 2434 2435 class PrintReachableRegionClosure : public HeapRegionClosure { 2436 private: 2437 CMBitMapRO* _bitmap; 2438 outputStream* _out; 2439 bool _use_prev_marking; 2440 bool _all; 2441 2442 public: 2443 bool doHeapRegion(HeapRegion* hr) { 2444 HeapWord* b = hr->bottom(); 2445 HeapWord* e = hr->end(); 2446 HeapWord* t = hr->top(); 2447 HeapWord* p = NULL; 2448 if (_use_prev_marking) { 2449 p = hr->prev_top_at_mark_start(); 2450 } else { 2451 p = hr->next_top_at_mark_start(); 2452 } 2453 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2454 "TAMS: "PTR_FORMAT, b, e, t, p); 2455 _out->cr(); 2456 2457 HeapWord* from = b; 2458 HeapWord* to = t; 2459 2460 if (to > from) { 2461 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2462 _out->cr(); 2463 PrintReachableObjectClosure ocl(_bitmap, _out, 2464 _use_prev_marking, _all, hr); 2465 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2466 _out->cr(); 2467 } 2468 2469 return false; 2470 } 2471 2472 PrintReachableRegionClosure(CMBitMapRO* bitmap, 2473 outputStream* out, 2474 bool use_prev_marking, 2475 bool all) : 2476 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2477 }; 2478 2479 void ConcurrentMark::print_reachable(const char* str, 2480 bool use_prev_marking, 2481 bool all) { 2482 gclog_or_tty->cr(); 2483 gclog_or_tty->print_cr("== Doing heap dump... "); 2484 2485 if (G1PrintReachableBaseFile == NULL) { 2486 gclog_or_tty->print_cr(" #### error: no base file defined"); 2487 return; 2488 } 2489 2490 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2491 (JVM_MAXPATHLEN - 1)) { 2492 gclog_or_tty->print_cr(" #### error: file name too long"); 2493 return; 2494 } 2495 2496 char file_name[JVM_MAXPATHLEN]; 2497 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2498 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2499 2500 fileStream fout(file_name); 2501 if (!fout.is_open()) { 2502 gclog_or_tty->print_cr(" #### error: could not open file"); 2503 return; 2504 } 2505 2506 outputStream* out = &fout; 2507 2508 CMBitMapRO* bitmap = NULL; 2509 if (use_prev_marking) { 2510 bitmap = _prevMarkBitMap; 2511 } else { 2512 bitmap = _nextMarkBitMap; 2513 } 2514 2515 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS"); 2516 out->cr(); 2517 2518 out->print_cr("--- ITERATING OVER REGIONS"); 2519 out->cr(); 2520 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all); 2521 _g1h->heap_region_iterate(&rcl); 2522 out->cr(); 2523 2524 gclog_or_tty->print_cr(" done"); 2525 gclog_or_tty->flush(); 2526 } 2527 2528 #endif // PRODUCT 2529 2530 // This note is for drainAllSATBBuffers and the code in between. 2531 // In the future we could reuse a task to do this work during an 2532 // evacuation pause (since now tasks are not active and can be claimed 2533 // during an evacuation pause). This was a late change to the code and 2534 // is currently not being taken advantage of. 2535 2536 class CMGlobalObjectClosure : public ObjectClosure { 2537 private: 2538 ConcurrentMark* _cm; 2539 2540 public: 2541 void do_object(oop obj) { 2542 _cm->deal_with_reference(obj); 2543 } 2544 2545 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2546 }; 2547 2548 void ConcurrentMark::deal_with_reference(oop obj) { 2549 if (verbose_high()) 2550 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2551 (void*) obj); 2552 2553 2554 HeapWord* objAddr = (HeapWord*) obj; 2555 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2556 if (_g1h->is_in_g1_reserved(objAddr)) { 2557 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 2558 HeapRegion* hr = _g1h->heap_region_containing(obj); 2559 if (_g1h->is_obj_ill(obj, hr)) { 2560 if (verbose_high()) 2561 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2562 "marked", (void*) obj); 2563 2564 // we need to mark it first 2565 if (_nextMarkBitMap->parMark(objAddr)) { 2566 // No OrderAccess:store_load() is needed. It is implicit in the 2567 // CAS done in parMark(objAddr) above 2568 HeapWord* finger = _finger; 2569 if (objAddr < finger) { 2570 if (verbose_high()) 2571 gclog_or_tty->print_cr("[global] below the global finger " 2572 "("PTR_FORMAT"), pushing it", finger); 2573 if (!mark_stack_push(obj)) { 2574 if (verbose_low()) 2575 gclog_or_tty->print_cr("[global] global stack overflow during " 2576 "deal_with_reference"); 2577 } 2578 } 2579 } 2580 } 2581 } 2582 } 2583 2584 void ConcurrentMark::drainAllSATBBuffers() { 2585 CMGlobalObjectClosure oc(this); 2586 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2587 satb_mq_set.set_closure(&oc); 2588 2589 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2590 if (verbose_medium()) 2591 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2592 } 2593 2594 // no need to check whether we should do this, as this is only 2595 // called during an evacuation pause 2596 satb_mq_set.iterate_closure_all_threads(); 2597 2598 satb_mq_set.set_closure(NULL); 2599 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2600 } 2601 2602 void ConcurrentMark::markPrev(oop p) { 2603 // Note we are overriding the read-only view of the prev map here, via 2604 // the cast. 2605 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2606 } 2607 2608 void ConcurrentMark::clear(oop p) { 2609 assert(p != NULL && p->is_oop(), "expected an oop"); 2610 HeapWord* addr = (HeapWord*)p; 2611 assert(addr >= _nextMarkBitMap->startWord() || 2612 addr < _nextMarkBitMap->endWord(), "in a region"); 2613 2614 _nextMarkBitMap->clear(addr); 2615 } 2616 2617 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2618 // Note we are overriding the read-only view of the prev map here, via 2619 // the cast. 2620 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2621 _nextMarkBitMap->clearRange(mr); 2622 } 2623 2624 HeapRegion* 2625 ConcurrentMark::claim_region(int task_num) { 2626 // "checkpoint" the finger 2627 HeapWord* finger = _finger; 2628 2629 // _heap_end will not change underneath our feet; it only changes at 2630 // yield points. 2631 while (finger < _heap_end) { 2632 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2633 2634 // is the gap between reading the finger and doing the CAS too long? 2635 2636 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2637 HeapWord* bottom = curr_region->bottom(); 2638 HeapWord* end = curr_region->end(); 2639 HeapWord* limit = curr_region->next_top_at_mark_start(); 2640 2641 if (verbose_low()) 2642 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2643 "["PTR_FORMAT", "PTR_FORMAT"), " 2644 "limit = "PTR_FORMAT, 2645 task_num, curr_region, bottom, end, limit); 2646 2647 HeapWord* res = 2648 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2649 if (res == finger) { 2650 // we succeeded 2651 2652 // notice that _finger == end cannot be guaranteed here since, 2653 // someone else might have moved the finger even further 2654 assert(_finger >= end, "the finger should have moved forward"); 2655 2656 if (verbose_low()) 2657 gclog_or_tty->print_cr("[%d] we were successful with region = " 2658 PTR_FORMAT, task_num, curr_region); 2659 2660 if (limit > bottom) { 2661 if (verbose_low()) 2662 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2663 "returning it ", task_num, curr_region); 2664 return curr_region; 2665 } else { 2666 assert(limit == bottom, 2667 "the region limit should be at bottom"); 2668 if (verbose_low()) 2669 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2670 "returning NULL", task_num, curr_region); 2671 // we return NULL and the caller should try calling 2672 // claim_region() again. 2673 return NULL; 2674 } 2675 } else { 2676 assert(_finger > finger, "the finger should have moved forward"); 2677 if (verbose_low()) 2678 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2679 "global finger = "PTR_FORMAT", " 2680 "our finger = "PTR_FORMAT, 2681 task_num, _finger, finger); 2682 2683 // read it again 2684 finger = _finger; 2685 } 2686 } 2687 2688 return NULL; 2689 } 2690 2691 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2692 bool result = false; 2693 for (int i = 0; i < (int)_max_task_num; ++i) { 2694 CMTask* the_task = _tasks[i]; 2695 MemRegion mr = the_task->aborted_region(); 2696 if (mr.start() != NULL) { 2697 assert(mr.end() != NULL, "invariant"); 2698 assert(mr.word_size() > 0, "invariant"); 2699 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2700 assert(hr != NULL, "invariant"); 2701 if (hr->in_collection_set()) { 2702 // The region points into the collection set 2703 the_task->set_aborted_region(MemRegion()); 2704 result = true; 2705 } 2706 } 2707 } 2708 return result; 2709 } 2710 2711 bool ConcurrentMark::has_aborted_regions() { 2712 for (int i = 0; i < (int)_max_task_num; ++i) { 2713 CMTask* the_task = _tasks[i]; 2714 MemRegion mr = the_task->aborted_region(); 2715 if (mr.start() != NULL) { 2716 assert(mr.end() != NULL, "invariant"); 2717 assert(mr.word_size() > 0, "invariant"); 2718 return true; 2719 } 2720 } 2721 return false; 2722 } 2723 2724 void ConcurrentMark::oops_do(OopClosure* cl) { 2725 if (_markStack.size() > 0 && verbose_low()) 2726 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2727 "size = %d", _markStack.size()); 2728 // we first iterate over the contents of the mark stack... 2729 _markStack.oops_do(cl); 2730 2731 for (int i = 0; i < (int)_max_task_num; ++i) { 2732 OopTaskQueue* queue = _task_queues->queue((int)i); 2733 2734 if (queue->size() > 0 && verbose_low()) 2735 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2736 "size = %d", i, queue->size()); 2737 2738 // ...then over the contents of the all the task queues. 2739 queue->oops_do(cl); 2740 } 2741 2742 // Invalidate any entries, that are in the region stack, that 2743 // point into the collection set 2744 if (_regionStack.invalidate_entries_into_cset()) { 2745 // otherwise, any gray objects copied during the evacuation pause 2746 // might not be visited. 2747 assert(_should_gray_objects, "invariant"); 2748 } 2749 2750 // Invalidate any aborted regions, recorded in the individual CM 2751 // tasks, that point into the collection set. 2752 if (invalidate_aborted_regions_in_cset()) { 2753 // otherwise, any gray objects copied during the evacuation pause 2754 // might not be visited. 2755 assert(_should_gray_objects, "invariant"); 2756 } 2757 2758 } 2759 2760 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2761 _markStack.setEmpty(); 2762 _markStack.clear_overflow(); 2763 _regionStack.setEmpty(); 2764 _regionStack.clear_overflow(); 2765 if (clear_overflow) { 2766 clear_has_overflown(); 2767 } else { 2768 assert(has_overflown(), "pre-condition"); 2769 } 2770 _finger = _heap_start; 2771 2772 for (int i = 0; i < (int)_max_task_num; ++i) { 2773 OopTaskQueue* queue = _task_queues->queue(i); 2774 queue->set_empty(); 2775 // Clear any partial regions from the CMTasks 2776 _tasks[i]->clear_aborted_region(); 2777 } 2778 } 2779 2780 void ConcurrentMark::print_stats() { 2781 if (verbose_stats()) { 2782 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2783 for (size_t i = 0; i < _active_tasks; ++i) { 2784 _tasks[i]->print_stats(); 2785 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2786 } 2787 } 2788 } 2789 2790 class CSMarkOopClosure: public OopClosure { 2791 friend class CSMarkBitMapClosure; 2792 2793 G1CollectedHeap* _g1h; 2794 CMBitMap* _bm; 2795 ConcurrentMark* _cm; 2796 oop* _ms; 2797 jint* _array_ind_stack; 2798 int _ms_size; 2799 int _ms_ind; 2800 int _array_increment; 2801 2802 bool push(oop obj, int arr_ind = 0) { 2803 if (_ms_ind == _ms_size) { 2804 gclog_or_tty->print_cr("Mark stack is full."); 2805 return false; 2806 } 2807 _ms[_ms_ind] = obj; 2808 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; 2809 _ms_ind++; 2810 return true; 2811 } 2812 2813 oop pop() { 2814 if (_ms_ind == 0) return NULL; 2815 else { 2816 _ms_ind--; 2817 return _ms[_ms_ind]; 2818 } 2819 } 2820 2821 template <class T> bool drain() { 2822 while (_ms_ind > 0) { 2823 oop obj = pop(); 2824 assert(obj != NULL, "Since index was non-zero."); 2825 if (obj->is_objArray()) { 2826 jint arr_ind = _array_ind_stack[_ms_ind]; 2827 objArrayOop aobj = objArrayOop(obj); 2828 jint len = aobj->length(); 2829 jint next_arr_ind = arr_ind + _array_increment; 2830 if (next_arr_ind < len) { 2831 push(obj, next_arr_ind); 2832 } 2833 // Now process this portion of this one. 2834 int lim = MIN2(next_arr_ind, len); 2835 for (int j = arr_ind; j < lim; j++) { 2836 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2837 } 2838 2839 } else { 2840 obj->oop_iterate(this); 2841 } 2842 if (abort()) return false; 2843 } 2844 return true; 2845 } 2846 2847 public: 2848 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2849 _g1h(G1CollectedHeap::heap()), 2850 _cm(cm), 2851 _bm(cm->nextMarkBitMap()), 2852 _ms_size(ms_size), _ms_ind(0), 2853 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2854 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2855 _array_increment(MAX2(ms_size/8, 16)) 2856 {} 2857 2858 ~CSMarkOopClosure() { 2859 FREE_C_HEAP_ARRAY(oop, _ms); 2860 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2861 } 2862 2863 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2864 virtual void do_oop( oop* p) { do_oop_work(p); } 2865 2866 template <class T> void do_oop_work(T* p) { 2867 T heap_oop = oopDesc::load_heap_oop(p); 2868 if (oopDesc::is_null(heap_oop)) return; 2869 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2870 if (obj->is_forwarded()) { 2871 // If the object has already been forwarded, we have to make sure 2872 // that it's marked. So follow the forwarding pointer. Note that 2873 // this does the right thing for self-forwarding pointers in the 2874 // evacuation failure case. 2875 obj = obj->forwardee(); 2876 } 2877 HeapRegion* hr = _g1h->heap_region_containing(obj); 2878 if (hr != NULL) { 2879 if (hr->in_collection_set()) { 2880 if (_g1h->is_obj_ill(obj)) { 2881 _bm->mark((HeapWord*)obj); 2882 if (!push(obj)) { 2883 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 2884 set_abort(); 2885 } 2886 } 2887 } else { 2888 // Outside the collection set; we need to gray it 2889 _cm->deal_with_reference(obj); 2890 } 2891 } 2892 } 2893 }; 2894 2895 class CSMarkBitMapClosure: public BitMapClosure { 2896 G1CollectedHeap* _g1h; 2897 CMBitMap* _bitMap; 2898 ConcurrentMark* _cm; 2899 CSMarkOopClosure _oop_cl; 2900 public: 2901 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 2902 _g1h(G1CollectedHeap::heap()), 2903 _bitMap(cm->nextMarkBitMap()), 2904 _oop_cl(cm, ms_size) 2905 {} 2906 2907 ~CSMarkBitMapClosure() {} 2908 2909 bool do_bit(size_t offset) { 2910 // convert offset into a HeapWord* 2911 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 2912 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 2913 "address out of range"); 2914 assert(_bitMap->isMarked(addr), "tautology"); 2915 oop obj = oop(addr); 2916 if (!obj->is_forwarded()) { 2917 if (!_oop_cl.push(obj)) return false; 2918 if (UseCompressedOops) { 2919 if (!_oop_cl.drain<narrowOop>()) return false; 2920 } else { 2921 if (!_oop_cl.drain<oop>()) return false; 2922 } 2923 } 2924 // Otherwise... 2925 return true; 2926 } 2927 }; 2928 2929 2930 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 2931 CMBitMap* _bm; 2932 CSMarkBitMapClosure _bit_cl; 2933 enum SomePrivateConstants { 2934 MSSize = 1000 2935 }; 2936 bool _completed; 2937 public: 2938 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 2939 _bm(cm->nextMarkBitMap()), 2940 _bit_cl(cm, MSSize), 2941 _completed(true) 2942 {} 2943 2944 ~CompleteMarkingInCSHRClosure() {} 2945 2946 bool doHeapRegion(HeapRegion* r) { 2947 if (!r->evacuation_failed()) { 2948 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 2949 if (!mr.is_empty()) { 2950 if (!_bm->iterate(&_bit_cl, mr)) { 2951 _completed = false; 2952 return true; 2953 } 2954 } 2955 } 2956 return false; 2957 } 2958 2959 bool completed() { return _completed; } 2960 }; 2961 2962 class ClearMarksInHRClosure: public HeapRegionClosure { 2963 CMBitMap* _bm; 2964 public: 2965 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 2966 2967 bool doHeapRegion(HeapRegion* r) { 2968 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 2969 MemRegion usedMR = r->used_region(); 2970 _bm->clearRange(r->used_region()); 2971 } 2972 return false; 2973 } 2974 }; 2975 2976 void ConcurrentMark::complete_marking_in_collection_set() { 2977 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2978 2979 if (!g1h->mark_in_progress()) { 2980 g1h->g1_policy()->record_mark_closure_time(0.0); 2981 return; 2982 } 2983 2984 int i = 1; 2985 double start = os::elapsedTime(); 2986 while (true) { 2987 i++; 2988 CompleteMarkingInCSHRClosure cmplt(this); 2989 g1h->collection_set_iterate(&cmplt); 2990 if (cmplt.completed()) break; 2991 } 2992 double end_time = os::elapsedTime(); 2993 double elapsed_time_ms = (end_time - start) * 1000.0; 2994 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 2995 2996 ClearMarksInHRClosure clr(nextMarkBitMap()); 2997 g1h->collection_set_iterate(&clr); 2998 } 2999 3000 // The next two methods deal with the following optimisation. Some 3001 // objects are gray by being marked and located above the finger. If 3002 // they are copied, during an evacuation pause, below the finger then 3003 // the need to be pushed on the stack. The observation is that, if 3004 // there are no regions in the collection set located above the 3005 // finger, then the above cannot happen, hence we do not need to 3006 // explicitly gray any objects when copying them to below the 3007 // finger. The global stack will be scanned to ensure that, if it 3008 // points to objects being copied, it will update their 3009 // location. There is a tricky situation with the gray objects in 3010 // region stack that are being coped, however. See the comment in 3011 // newCSet(). 3012 3013 void ConcurrentMark::newCSet() { 3014 if (!concurrent_marking_in_progress()) 3015 // nothing to do if marking is not in progress 3016 return; 3017 3018 // find what the lowest finger is among the global and local fingers 3019 _min_finger = _finger; 3020 for (int i = 0; i < (int)_max_task_num; ++i) { 3021 CMTask* task = _tasks[i]; 3022 HeapWord* task_finger = task->finger(); 3023 if (task_finger != NULL && task_finger < _min_finger) 3024 _min_finger = task_finger; 3025 } 3026 3027 _should_gray_objects = false; 3028 3029 // This fixes a very subtle and fustrating bug. It might be the case 3030 // that, during en evacuation pause, heap regions that contain 3031 // objects that are gray (by being in regions contained in the 3032 // region stack) are included in the collection set. Since such gray 3033 // objects will be moved, and because it's not easy to redirect 3034 // region stack entries to point to a new location (because objects 3035 // in one region might be scattered to multiple regions after they 3036 // are copied), one option is to ensure that all marked objects 3037 // copied during a pause are pushed on the stack. Notice, however, 3038 // that this problem can only happen when the region stack is not 3039 // empty during an evacuation pause. So, we make the fix a bit less 3040 // conservative and ensure that regions are pushed on the stack, 3041 // irrespective whether all collection set regions are below the 3042 // finger, if the region stack is not empty. This is expected to be 3043 // a rare case, so I don't think it's necessary to be smarted about it. 3044 if (!region_stack_empty() || has_aborted_regions()) 3045 _should_gray_objects = true; 3046 } 3047 3048 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3049 if (!concurrent_marking_in_progress()) 3050 return; 3051 3052 HeapWord* region_end = hr->end(); 3053 if (region_end > _min_finger) 3054 _should_gray_objects = true; 3055 } 3056 3057 // abandon current marking iteration due to a Full GC 3058 void ConcurrentMark::abort() { 3059 // Clear all marks to force marking thread to do nothing 3060 _nextMarkBitMap->clearAll(); 3061 // Empty mark stack 3062 clear_marking_state(); 3063 for (int i = 0; i < (int)_max_task_num; ++i) { 3064 _tasks[i]->clear_region_fields(); 3065 } 3066 _has_aborted = true; 3067 3068 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3069 satb_mq_set.abandon_partial_marking(); 3070 // This can be called either during or outside marking, we'll read 3071 // the expected_active value from the SATB queue set. 3072 satb_mq_set.set_active_all_threads( 3073 false, /* new active value */ 3074 satb_mq_set.is_active() /* expected_active */); 3075 } 3076 3077 static void print_ms_time_info(const char* prefix, const char* name, 3078 NumberSeq& ns) { 3079 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3080 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3081 if (ns.num() > 0) { 3082 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3083 prefix, ns.sd(), ns.maximum()); 3084 } 3085 } 3086 3087 void ConcurrentMark::print_summary_info() { 3088 gclog_or_tty->print_cr(" Concurrent marking:"); 3089 print_ms_time_info(" ", "init marks", _init_times); 3090 print_ms_time_info(" ", "remarks", _remark_times); 3091 { 3092 print_ms_time_info(" ", "final marks", _remark_mark_times); 3093 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3094 3095 } 3096 print_ms_time_info(" ", "cleanups", _cleanup_times); 3097 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3098 _total_counting_time, 3099 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3100 (double)_cleanup_times.num() 3101 : 0.0)); 3102 if (G1ScrubRemSets) { 3103 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3104 _total_rs_scrub_time, 3105 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3106 (double)_cleanup_times.num() 3107 : 0.0)); 3108 } 3109 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3110 (_init_times.sum() + _remark_times.sum() + 3111 _cleanup_times.sum())/1000.0); 3112 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3113 "(%8.2f s marking, %8.2f s counting).", 3114 cmThread()->vtime_accum(), 3115 cmThread()->vtime_mark_accum(), 3116 cmThread()->vtime_count_accum()); 3117 } 3118 3119 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3120 _parallel_workers->print_worker_threads_on(st); 3121 } 3122 3123 // Closures 3124 // XXX: there seems to be a lot of code duplication here; 3125 // should refactor and consolidate the shared code. 3126 3127 // This closure is used to mark refs into the CMS generation in 3128 // the CMS bit map. Called at the first checkpoint. 3129 3130 // We take a break if someone is trying to stop the world. 3131 bool ConcurrentMark::do_yield_check(int worker_i) { 3132 if (should_yield()) { 3133 if (worker_i == 0) 3134 _g1h->g1_policy()->record_concurrent_pause(); 3135 cmThread()->yield(); 3136 if (worker_i == 0) 3137 _g1h->g1_policy()->record_concurrent_pause_end(); 3138 return true; 3139 } else { 3140 return false; 3141 } 3142 } 3143 3144 bool ConcurrentMark::should_yield() { 3145 return cmThread()->should_yield(); 3146 } 3147 3148 bool ConcurrentMark::containing_card_is_marked(void* p) { 3149 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3150 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3151 } 3152 3153 bool ConcurrentMark::containing_cards_are_marked(void* start, 3154 void* last) { 3155 return 3156 containing_card_is_marked(start) && 3157 containing_card_is_marked(last); 3158 } 3159 3160 #ifndef PRODUCT 3161 // for debugging purposes 3162 void ConcurrentMark::print_finger() { 3163 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3164 _heap_start, _heap_end, _finger); 3165 for (int i = 0; i < (int) _max_task_num; ++i) { 3166 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3167 } 3168 gclog_or_tty->print_cr(""); 3169 } 3170 #endif 3171 3172 // Closure for iteration over bitmaps 3173 class CMBitMapClosure : public BitMapClosure { 3174 private: 3175 // the bitmap that is being iterated over 3176 CMBitMap* _nextMarkBitMap; 3177 ConcurrentMark* _cm; 3178 CMTask* _task; 3179 // true if we're scanning a heap region claimed by the task (so that 3180 // we move the finger along), false if we're not, i.e. currently when 3181 // scanning a heap region popped from the region stack (so that we 3182 // do not move the task finger along; it'd be a mistake if we did so). 3183 bool _scanning_heap_region; 3184 3185 public: 3186 CMBitMapClosure(CMTask *task, 3187 ConcurrentMark* cm, 3188 CMBitMap* nextMarkBitMap) 3189 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3190 3191 void set_scanning_heap_region(bool scanning_heap_region) { 3192 _scanning_heap_region = scanning_heap_region; 3193 } 3194 3195 bool do_bit(size_t offset) { 3196 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3197 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3198 assert( addr < _cm->finger(), "invariant"); 3199 3200 if (_scanning_heap_region) { 3201 statsOnly( _task->increase_objs_found_on_bitmap() ); 3202 assert(addr >= _task->finger(), "invariant"); 3203 // We move that task's local finger along. 3204 _task->move_finger_to(addr); 3205 } else { 3206 // We move the task's region finger along. 3207 _task->move_region_finger_to(addr); 3208 } 3209 3210 _task->scan_object(oop(addr)); 3211 // we only partially drain the local queue and global stack 3212 _task->drain_local_queue(true); 3213 _task->drain_global_stack(true); 3214 3215 // if the has_aborted flag has been raised, we need to bail out of 3216 // the iteration 3217 return !_task->has_aborted(); 3218 } 3219 }; 3220 3221 // Closure for iterating over objects, currently only used for 3222 // processing SATB buffers. 3223 class CMObjectClosure : public ObjectClosure { 3224 private: 3225 CMTask* _task; 3226 3227 public: 3228 void do_object(oop obj) { 3229 _task->deal_with_reference(obj); 3230 } 3231 3232 CMObjectClosure(CMTask* task) : _task(task) { } 3233 }; 3234 3235 // Closure for iterating over object fields 3236 class CMOopClosure : public OopClosure { 3237 private: 3238 G1CollectedHeap* _g1h; 3239 ConcurrentMark* _cm; 3240 CMTask* _task; 3241 3242 public: 3243 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 3244 virtual void do_oop( oop* p) { do_oop_work(p); } 3245 3246 template <class T> void do_oop_work(T* p) { 3247 assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant"); 3248 assert(!_g1h->is_on_master_free_list( 3249 _g1h->heap_region_containing((HeapWord*) p)), "invariant"); 3250 3251 oop obj = oopDesc::load_decode_heap_oop(p); 3252 if (_cm->verbose_high()) 3253 gclog_or_tty->print_cr("[%d] we're looking at location " 3254 "*"PTR_FORMAT" = "PTR_FORMAT, 3255 _task->task_id(), p, (void*) obj); 3256 _task->deal_with_reference(obj); 3257 } 3258 3259 CMOopClosure(G1CollectedHeap* g1h, 3260 ConcurrentMark* cm, 3261 CMTask* task) 3262 : _g1h(g1h), _cm(cm), _task(task) 3263 { 3264 assert(_ref_processor == NULL, "should be initialized to NULL"); 3265 3266 if (G1UseConcMarkReferenceProcessing) { 3267 _ref_processor = g1h->ref_processor(); 3268 assert(_ref_processor != NULL, "should not be NULL"); 3269 } 3270 } 3271 }; 3272 3273 void CMTask::setup_for_region(HeapRegion* hr) { 3274 // Separated the asserts so that we know which one fires. 3275 assert(hr != NULL, 3276 "claim_region() should have filtered out continues humongous regions"); 3277 assert(!hr->continuesHumongous(), 3278 "claim_region() should have filtered out continues humongous regions"); 3279 3280 if (_cm->verbose_low()) 3281 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3282 _task_id, hr); 3283 3284 _curr_region = hr; 3285 _finger = hr->bottom(); 3286 update_region_limit(); 3287 } 3288 3289 void CMTask::update_region_limit() { 3290 HeapRegion* hr = _curr_region; 3291 HeapWord* bottom = hr->bottom(); 3292 HeapWord* limit = hr->next_top_at_mark_start(); 3293 3294 if (limit == bottom) { 3295 if (_cm->verbose_low()) 3296 gclog_or_tty->print_cr("[%d] found an empty region " 3297 "["PTR_FORMAT", "PTR_FORMAT")", 3298 _task_id, bottom, limit); 3299 // The region was collected underneath our feet. 3300 // We set the finger to bottom to ensure that the bitmap 3301 // iteration that will follow this will not do anything. 3302 // (this is not a condition that holds when we set the region up, 3303 // as the region is not supposed to be empty in the first place) 3304 _finger = bottom; 3305 } else if (limit >= _region_limit) { 3306 assert(limit >= _finger, "peace of mind"); 3307 } else { 3308 assert(limit < _region_limit, "only way to get here"); 3309 // This can happen under some pretty unusual circumstances. An 3310 // evacuation pause empties the region underneath our feet (NTAMS 3311 // at bottom). We then do some allocation in the region (NTAMS 3312 // stays at bottom), followed by the region being used as a GC 3313 // alloc region (NTAMS will move to top() and the objects 3314 // originally below it will be grayed). All objects now marked in 3315 // the region are explicitly grayed, if below the global finger, 3316 // and we do not need in fact to scan anything else. So, we simply 3317 // set _finger to be limit to ensure that the bitmap iteration 3318 // doesn't do anything. 3319 _finger = limit; 3320 } 3321 3322 _region_limit = limit; 3323 } 3324 3325 void CMTask::giveup_current_region() { 3326 assert(_curr_region != NULL, "invariant"); 3327 if (_cm->verbose_low()) 3328 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3329 _task_id, _curr_region); 3330 clear_region_fields(); 3331 } 3332 3333 void CMTask::clear_region_fields() { 3334 // Values for these three fields that indicate that we're not 3335 // holding on to a region. 3336 _curr_region = NULL; 3337 _finger = NULL; 3338 _region_limit = NULL; 3339 3340 _region_finger = NULL; 3341 } 3342 3343 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3344 guarantee(nextMarkBitMap != NULL, "invariant"); 3345 3346 if (_cm->verbose_low()) 3347 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3348 3349 _nextMarkBitMap = nextMarkBitMap; 3350 clear_region_fields(); 3351 assert(_aborted_region.is_empty(), "should have been cleared"); 3352 3353 _calls = 0; 3354 _elapsed_time_ms = 0.0; 3355 _termination_time_ms = 0.0; 3356 _termination_start_time_ms = 0.0; 3357 3358 #if _MARKING_STATS_ 3359 _local_pushes = 0; 3360 _local_pops = 0; 3361 _local_max_size = 0; 3362 _objs_scanned = 0; 3363 _global_pushes = 0; 3364 _global_pops = 0; 3365 _global_max_size = 0; 3366 _global_transfers_to = 0; 3367 _global_transfers_from = 0; 3368 _region_stack_pops = 0; 3369 _regions_claimed = 0; 3370 _objs_found_on_bitmap = 0; 3371 _satb_buffers_processed = 0; 3372 _steal_attempts = 0; 3373 _steals = 0; 3374 _aborted = 0; 3375 _aborted_overflow = 0; 3376 _aborted_cm_aborted = 0; 3377 _aborted_yield = 0; 3378 _aborted_timed_out = 0; 3379 _aborted_satb = 0; 3380 _aborted_termination = 0; 3381 #endif // _MARKING_STATS_ 3382 } 3383 3384 bool CMTask::should_exit_termination() { 3385 regular_clock_call(); 3386 // This is called when we are in the termination protocol. We should 3387 // quit if, for some reason, this task wants to abort or the global 3388 // stack is not empty (this means that we can get work from it). 3389 return !_cm->mark_stack_empty() || has_aborted(); 3390 } 3391 3392 // This determines whether the method below will check both the local 3393 // and global fingers when determining whether to push on the stack a 3394 // gray object (value 1) or whether it will only check the global one 3395 // (value 0). The tradeoffs are that the former will be a bit more 3396 // accurate and possibly push less on the stack, but it might also be 3397 // a little bit slower. 3398 3399 #define _CHECK_BOTH_FINGERS_ 1 3400 3401 void CMTask::deal_with_reference(oop obj) { 3402 if (_cm->verbose_high()) 3403 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, 3404 _task_id, (void*) obj); 3405 3406 ++_refs_reached; 3407 3408 HeapWord* objAddr = (HeapWord*) obj; 3409 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 3410 if (_g1h->is_in_g1_reserved(objAddr)) { 3411 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 3412 HeapRegion* hr = _g1h->heap_region_containing(obj); 3413 if (_g1h->is_obj_ill(obj, hr)) { 3414 if (_cm->verbose_high()) 3415 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", 3416 _task_id, (void*) obj); 3417 3418 // we need to mark it first 3419 if (_nextMarkBitMap->parMark(objAddr)) { 3420 // No OrderAccess:store_load() is needed. It is implicit in the 3421 // CAS done in parMark(objAddr) above 3422 HeapWord* global_finger = _cm->finger(); 3423 3424 #if _CHECK_BOTH_FINGERS_ 3425 // we will check both the local and global fingers 3426 3427 if (_finger != NULL && objAddr < _finger) { 3428 if (_cm->verbose_high()) 3429 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " 3430 "pushing it", _task_id, _finger); 3431 push(obj); 3432 } else if (_curr_region != NULL && objAddr < _region_limit) { 3433 // do nothing 3434 } else if (objAddr < global_finger) { 3435 // Notice that the global finger might be moving forward 3436 // concurrently. This is not a problem. In the worst case, we 3437 // mark the object while it is above the global finger and, by 3438 // the time we read the global finger, it has moved forward 3439 // passed this object. In this case, the object will probably 3440 // be visited when a task is scanning the region and will also 3441 // be pushed on the stack. So, some duplicate work, but no 3442 // correctness problems. 3443 3444 if (_cm->verbose_high()) 3445 gclog_or_tty->print_cr("[%d] below the global finger " 3446 "("PTR_FORMAT"), pushing it", 3447 _task_id, global_finger); 3448 push(obj); 3449 } else { 3450 // do nothing 3451 } 3452 #else // _CHECK_BOTH_FINGERS_ 3453 // we will only check the global finger 3454 3455 if (objAddr < global_finger) { 3456 // see long comment above 3457 3458 if (_cm->verbose_high()) 3459 gclog_or_tty->print_cr("[%d] below the global finger " 3460 "("PTR_FORMAT"), pushing it", 3461 _task_id, global_finger); 3462 push(obj); 3463 } 3464 #endif // _CHECK_BOTH_FINGERS_ 3465 } 3466 } 3467 } 3468 } 3469 3470 void CMTask::push(oop obj) { 3471 HeapWord* objAddr = (HeapWord*) obj; 3472 assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); 3473 assert(!_g1h->is_on_master_free_list( 3474 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant"); 3475 assert(!_g1h->is_obj_ill(obj), "invariant"); 3476 assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); 3477 3478 if (_cm->verbose_high()) 3479 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); 3480 3481 if (!_task_queue->push(obj)) { 3482 // The local task queue looks full. We need to push some entries 3483 // to the global stack. 3484 3485 if (_cm->verbose_medium()) 3486 gclog_or_tty->print_cr("[%d] task queue overflow, " 3487 "moving entries to the global stack", 3488 _task_id); 3489 move_entries_to_global_stack(); 3490 3491 // this should succeed since, even if we overflow the global 3492 // stack, we should have definitely removed some entries from the 3493 // local queue. So, there must be space on it. 3494 bool success = _task_queue->push(obj); 3495 assert(success, "invariant"); 3496 } 3497 3498 statsOnly( int tmp_size = _task_queue->size(); 3499 if (tmp_size > _local_max_size) 3500 _local_max_size = tmp_size; 3501 ++_local_pushes ); 3502 } 3503 3504 void CMTask::reached_limit() { 3505 assert(_words_scanned >= _words_scanned_limit || 3506 _refs_reached >= _refs_reached_limit , 3507 "shouldn't have been called otherwise"); 3508 regular_clock_call(); 3509 } 3510 3511 void CMTask::regular_clock_call() { 3512 if (has_aborted()) 3513 return; 3514 3515 // First, we need to recalculate the words scanned and refs reached 3516 // limits for the next clock call. 3517 recalculate_limits(); 3518 3519 // During the regular clock call we do the following 3520 3521 // (1) If an overflow has been flagged, then we abort. 3522 if (_cm->has_overflown()) { 3523 set_has_aborted(); 3524 return; 3525 } 3526 3527 // If we are not concurrent (i.e. we're doing remark) we don't need 3528 // to check anything else. The other steps are only needed during 3529 // the concurrent marking phase. 3530 if (!concurrent()) 3531 return; 3532 3533 // (2) If marking has been aborted for Full GC, then we also abort. 3534 if (_cm->has_aborted()) { 3535 set_has_aborted(); 3536 statsOnly( ++_aborted_cm_aborted ); 3537 return; 3538 } 3539 3540 double curr_time_ms = os::elapsedVTime() * 1000.0; 3541 3542 // (3) If marking stats are enabled, then we update the step history. 3543 #if _MARKING_STATS_ 3544 if (_words_scanned >= _words_scanned_limit) 3545 ++_clock_due_to_scanning; 3546 if (_refs_reached >= _refs_reached_limit) 3547 ++_clock_due_to_marking; 3548 3549 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3550 _interval_start_time_ms = curr_time_ms; 3551 _all_clock_intervals_ms.add(last_interval_ms); 3552 3553 if (_cm->verbose_medium()) { 3554 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3555 "scanned = %d%s, refs reached = %d%s", 3556 _task_id, last_interval_ms, 3557 _words_scanned, 3558 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3559 _refs_reached, 3560 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3561 } 3562 #endif // _MARKING_STATS_ 3563 3564 // (4) We check whether we should yield. If we have to, then we abort. 3565 if (_cm->should_yield()) { 3566 // We should yield. To do this we abort the task. The caller is 3567 // responsible for yielding. 3568 set_has_aborted(); 3569 statsOnly( ++_aborted_yield ); 3570 return; 3571 } 3572 3573 // (5) We check whether we've reached our time quota. If we have, 3574 // then we abort. 3575 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3576 if (elapsed_time_ms > _time_target_ms) { 3577 set_has_aborted(); 3578 _has_timed_out = true; 3579 statsOnly( ++_aborted_timed_out ); 3580 return; 3581 } 3582 3583 // (6) Finally, we check whether there are enough completed STAB 3584 // buffers available for processing. If there are, we abort. 3585 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3586 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3587 if (_cm->verbose_low()) 3588 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3589 _task_id); 3590 // we do need to process SATB buffers, we'll abort and restart 3591 // the marking task to do so 3592 set_has_aborted(); 3593 statsOnly( ++_aborted_satb ); 3594 return; 3595 } 3596 } 3597 3598 void CMTask::recalculate_limits() { 3599 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3600 _words_scanned_limit = _real_words_scanned_limit; 3601 3602 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3603 _refs_reached_limit = _real_refs_reached_limit; 3604 } 3605 3606 void CMTask::decrease_limits() { 3607 // This is called when we believe that we're going to do an infrequent 3608 // operation which will increase the per byte scanned cost (i.e. move 3609 // entries to/from the global stack). It basically tries to decrease the 3610 // scanning limit so that the clock is called earlier. 3611 3612 if (_cm->verbose_medium()) 3613 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3614 3615 _words_scanned_limit = _real_words_scanned_limit - 3616 3 * words_scanned_period / 4; 3617 _refs_reached_limit = _real_refs_reached_limit - 3618 3 * refs_reached_period / 4; 3619 } 3620 3621 void CMTask::move_entries_to_global_stack() { 3622 // local array where we'll store the entries that will be popped 3623 // from the local queue 3624 oop buffer[global_stack_transfer_size]; 3625 3626 int n = 0; 3627 oop obj; 3628 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3629 buffer[n] = obj; 3630 ++n; 3631 } 3632 3633 if (n > 0) { 3634 // we popped at least one entry from the local queue 3635 3636 statsOnly( ++_global_transfers_to; _local_pops += n ); 3637 3638 if (!_cm->mark_stack_push(buffer, n)) { 3639 if (_cm->verbose_low()) 3640 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); 3641 set_has_aborted(); 3642 } else { 3643 // the transfer was successful 3644 3645 if (_cm->verbose_medium()) 3646 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3647 _task_id, n); 3648 statsOnly( int tmp_size = _cm->mark_stack_size(); 3649 if (tmp_size > _global_max_size) 3650 _global_max_size = tmp_size; 3651 _global_pushes += n ); 3652 } 3653 } 3654 3655 // this operation was quite expensive, so decrease the limits 3656 decrease_limits(); 3657 } 3658 3659 void CMTask::get_entries_from_global_stack() { 3660 // local array where we'll store the entries that will be popped 3661 // from the global stack. 3662 oop buffer[global_stack_transfer_size]; 3663 int n; 3664 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3665 assert(n <= global_stack_transfer_size, 3666 "we should not pop more than the given limit"); 3667 if (n > 0) { 3668 // yes, we did actually pop at least one entry 3669 3670 statsOnly( ++_global_transfers_from; _global_pops += n ); 3671 if (_cm->verbose_medium()) 3672 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3673 _task_id, n); 3674 for (int i = 0; i < n; ++i) { 3675 bool success = _task_queue->push(buffer[i]); 3676 // We only call this when the local queue is empty or under a 3677 // given target limit. So, we do not expect this push to fail. 3678 assert(success, "invariant"); 3679 } 3680 3681 statsOnly( int tmp_size = _task_queue->size(); 3682 if (tmp_size > _local_max_size) 3683 _local_max_size = tmp_size; 3684 _local_pushes += n ); 3685 } 3686 3687 // this operation was quite expensive, so decrease the limits 3688 decrease_limits(); 3689 } 3690 3691 void CMTask::drain_local_queue(bool partially) { 3692 if (has_aborted()) 3693 return; 3694 3695 // Decide what the target size is, depending whether we're going to 3696 // drain it partially (so that other tasks can steal if they run out 3697 // of things to do) or totally (at the very end). 3698 size_t target_size; 3699 if (partially) 3700 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3701 else 3702 target_size = 0; 3703 3704 if (_task_queue->size() > target_size) { 3705 if (_cm->verbose_high()) 3706 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3707 _task_id, target_size); 3708 3709 oop obj; 3710 bool ret = _task_queue->pop_local(obj); 3711 while (ret) { 3712 statsOnly( ++_local_pops ); 3713 3714 if (_cm->verbose_high()) 3715 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3716 (void*) obj); 3717 3718 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3719 assert(!_g1h->is_on_master_free_list( 3720 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3721 3722 scan_object(obj); 3723 3724 if (_task_queue->size() <= target_size || has_aborted()) 3725 ret = false; 3726 else 3727 ret = _task_queue->pop_local(obj); 3728 } 3729 3730 if (_cm->verbose_high()) 3731 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3732 _task_id, _task_queue->size()); 3733 } 3734 } 3735 3736 void CMTask::drain_global_stack(bool partially) { 3737 if (has_aborted()) 3738 return; 3739 3740 // We have a policy to drain the local queue before we attempt to 3741 // drain the global stack. 3742 assert(partially || _task_queue->size() == 0, "invariant"); 3743 3744 // Decide what the target size is, depending whether we're going to 3745 // drain it partially (so that other tasks can steal if they run out 3746 // of things to do) or totally (at the very end). Notice that, 3747 // because we move entries from the global stack in chunks or 3748 // because another task might be doing the same, we might in fact 3749 // drop below the target. But, this is not a problem. 3750 size_t target_size; 3751 if (partially) 3752 target_size = _cm->partial_mark_stack_size_target(); 3753 else 3754 target_size = 0; 3755 3756 if (_cm->mark_stack_size() > target_size) { 3757 if (_cm->verbose_low()) 3758 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3759 _task_id, target_size); 3760 3761 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3762 get_entries_from_global_stack(); 3763 drain_local_queue(partially); 3764 } 3765 3766 if (_cm->verbose_low()) 3767 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3768 _task_id, _cm->mark_stack_size()); 3769 } 3770 } 3771 3772 // SATB Queue has several assumptions on whether to call the par or 3773 // non-par versions of the methods. this is why some of the code is 3774 // replicated. We should really get rid of the single-threaded version 3775 // of the code to simplify things. 3776 void CMTask::drain_satb_buffers() { 3777 if (has_aborted()) 3778 return; 3779 3780 // We set this so that the regular clock knows that we're in the 3781 // middle of draining buffers and doesn't set the abort flag when it 3782 // notices that SATB buffers are available for draining. It'd be 3783 // very counter productive if it did that. :-) 3784 _draining_satb_buffers = true; 3785 3786 CMObjectClosure oc(this); 3787 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3788 if (G1CollectedHeap::use_parallel_gc_threads()) 3789 satb_mq_set.set_par_closure(_task_id, &oc); 3790 else 3791 satb_mq_set.set_closure(&oc); 3792 3793 // This keeps claiming and applying the closure to completed buffers 3794 // until we run out of buffers or we need to abort. 3795 if (G1CollectedHeap::use_parallel_gc_threads()) { 3796 while (!has_aborted() && 3797 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3798 if (_cm->verbose_medium()) 3799 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3800 statsOnly( ++_satb_buffers_processed ); 3801 regular_clock_call(); 3802 } 3803 } else { 3804 while (!has_aborted() && 3805 satb_mq_set.apply_closure_to_completed_buffer()) { 3806 if (_cm->verbose_medium()) 3807 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3808 statsOnly( ++_satb_buffers_processed ); 3809 regular_clock_call(); 3810 } 3811 } 3812 3813 if (!concurrent() && !has_aborted()) { 3814 // We should only do this during remark. 3815 if (G1CollectedHeap::use_parallel_gc_threads()) 3816 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3817 else 3818 satb_mq_set.iterate_closure_all_threads(); 3819 } 3820 3821 _draining_satb_buffers = false; 3822 3823 assert(has_aborted() || 3824 concurrent() || 3825 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3826 3827 if (G1CollectedHeap::use_parallel_gc_threads()) 3828 satb_mq_set.set_par_closure(_task_id, NULL); 3829 else 3830 satb_mq_set.set_closure(NULL); 3831 3832 // again, this was a potentially expensive operation, decrease the 3833 // limits to get the regular clock call early 3834 decrease_limits(); 3835 } 3836 3837 void CMTask::drain_region_stack(BitMapClosure* bc) { 3838 if (has_aborted()) 3839 return; 3840 3841 assert(_region_finger == NULL, 3842 "it should be NULL when we're not scanning a region"); 3843 3844 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3845 if (_cm->verbose_low()) 3846 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3847 _task_id, _cm->region_stack_size()); 3848 3849 MemRegion mr; 3850 3851 if (!_aborted_region.is_empty()) { 3852 mr = _aborted_region; 3853 _aborted_region = MemRegion(); 3854 3855 if (_cm->verbose_low()) 3856 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )", 3857 _task_id, mr.start(), mr.end()); 3858 } else { 3859 mr = _cm->region_stack_pop_lock_free(); 3860 // it returns MemRegion() if the pop fails 3861 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3862 } 3863 3864 while (mr.start() != NULL) { 3865 if (_cm->verbose_medium()) 3866 gclog_or_tty->print_cr("[%d] we are scanning region " 3867 "["PTR_FORMAT", "PTR_FORMAT")", 3868 _task_id, mr.start(), mr.end()); 3869 3870 assert(mr.end() <= _cm->finger(), 3871 "otherwise the region shouldn't be on the stack"); 3872 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3873 if (_nextMarkBitMap->iterate(bc, mr)) { 3874 assert(!has_aborted(), 3875 "cannot abort the task without aborting the bitmap iteration"); 3876 3877 // We finished iterating over the region without aborting. 3878 regular_clock_call(); 3879 if (has_aborted()) 3880 mr = MemRegion(); 3881 else { 3882 mr = _cm->region_stack_pop_lock_free(); 3883 // it returns MemRegion() if the pop fails 3884 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3885 } 3886 } else { 3887 assert(has_aborted(), "currently the only way to do so"); 3888 3889 // The only way to abort the bitmap iteration is to return 3890 // false from the do_bit() method. However, inside the 3891 // do_bit() method we move the _region_finger to point to the 3892 // object currently being looked at. So, if we bail out, we 3893 // have definitely set _region_finger to something non-null. 3894 assert(_region_finger != NULL, "invariant"); 3895 3896 // Make sure that any previously aborted region has been 3897 // cleared. 3898 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3899 3900 // The iteration was actually aborted. So now _region_finger 3901 // points to the address of the object we last scanned. If we 3902 // leave it there, when we restart this task, we will rescan 3903 // the object. It is easy to avoid this. We move the finger by 3904 // enough to point to the next possible object header (the 3905 // bitmap knows by how much we need to move it as it knows its 3906 // granularity). 3907 MemRegion newRegion = 3908 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3909 3910 if (!newRegion.is_empty()) { 3911 if (_cm->verbose_low()) { 3912 gclog_or_tty->print_cr("[%d] recording unscanned region" 3913 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3914 _task_id, 3915 newRegion.start(), newRegion.end()); 3916 } 3917 // Now record the part of the region we didn't scan to 3918 // make sure this task scans it later. 3919 _aborted_region = newRegion; 3920 } 3921 // break from while 3922 mr = MemRegion(); 3923 } 3924 _region_finger = NULL; 3925 } 3926 3927 if (_cm->verbose_low()) 3928 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3929 _task_id, _cm->region_stack_size()); 3930 } 3931 } 3932 3933 void CMTask::print_stats() { 3934 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3935 _task_id, _calls); 3936 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3937 _elapsed_time_ms, _termination_time_ms); 3938 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3939 _step_times_ms.num(), _step_times_ms.avg(), 3940 _step_times_ms.sd()); 3941 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3942 _step_times_ms.maximum(), _step_times_ms.sum()); 3943 3944 #if _MARKING_STATS_ 3945 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3946 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3947 _all_clock_intervals_ms.sd()); 3948 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3949 _all_clock_intervals_ms.maximum(), 3950 _all_clock_intervals_ms.sum()); 3951 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3952 _clock_due_to_scanning, _clock_due_to_marking); 3953 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3954 _objs_scanned, _objs_found_on_bitmap); 3955 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3956 _local_pushes, _local_pops, _local_max_size); 3957 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3958 _global_pushes, _global_pops, _global_max_size); 3959 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3960 _global_transfers_to,_global_transfers_from); 3961 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 3962 _regions_claimed, _region_stack_pops); 3963 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3964 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3965 _steal_attempts, _steals); 3966 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3967 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3968 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3969 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3970 _aborted_timed_out, _aborted_satb, _aborted_termination); 3971 #endif // _MARKING_STATS_ 3972 } 3973 3974 /***************************************************************************** 3975 3976 The do_marking_step(time_target_ms) method is the building block 3977 of the parallel marking framework. It can be called in parallel 3978 with other invocations of do_marking_step() on different tasks 3979 (but only one per task, obviously) and concurrently with the 3980 mutator threads, or during remark, hence it eliminates the need 3981 for two versions of the code. When called during remark, it will 3982 pick up from where the task left off during the concurrent marking 3983 phase. Interestingly, tasks are also claimable during evacuation 3984 pauses too, since do_marking_step() ensures that it aborts before 3985 it needs to yield. 3986 3987 The data structures that is uses to do marking work are the 3988 following: 3989 3990 (1) Marking Bitmap. If there are gray objects that appear only 3991 on the bitmap (this happens either when dealing with an overflow 3992 or when the initial marking phase has simply marked the roots 3993 and didn't push them on the stack), then tasks claim heap 3994 regions whose bitmap they then scan to find gray objects. A 3995 global finger indicates where the end of the last claimed region 3996 is. A local finger indicates how far into the region a task has 3997 scanned. The two fingers are used to determine how to gray an 3998 object (i.e. whether simply marking it is OK, as it will be 3999 visited by a task in the future, or whether it needs to be also 4000 pushed on a stack). 4001 4002 (2) Local Queue. The local queue of the task which is accessed 4003 reasonably efficiently by the task. Other tasks can steal from 4004 it when they run out of work. Throughout the marking phase, a 4005 task attempts to keep its local queue short but not totally 4006 empty, so that entries are available for stealing by other 4007 tasks. Only when there is no more work, a task will totally 4008 drain its local queue. 4009 4010 (3) Global Mark Stack. This handles local queue overflow. During 4011 marking only sets of entries are moved between it and the local 4012 queues, as access to it requires a mutex and more fine-grain 4013 interaction with it which might cause contention. If it 4014 overflows, then the marking phase should restart and iterate 4015 over the bitmap to identify gray objects. Throughout the marking 4016 phase, tasks attempt to keep the global mark stack at a small 4017 length but not totally empty, so that entries are available for 4018 popping by other tasks. Only when there is no more work, tasks 4019 will totally drain the global mark stack. 4020 4021 (4) Global Region Stack. Entries on it correspond to areas of 4022 the bitmap that need to be scanned since they contain gray 4023 objects. Pushes on the region stack only happen during 4024 evacuation pauses and typically correspond to areas covered by 4025 GC LABS. If it overflows, then the marking phase should restart 4026 and iterate over the bitmap to identify gray objects. Tasks will 4027 try to totally drain the region stack as soon as possible. 4028 4029 (5) SATB Buffer Queue. This is where completed SATB buffers are 4030 made available. Buffers are regularly removed from this queue 4031 and scanned for roots, so that the queue doesn't get too 4032 long. During remark, all completed buffers are processed, as 4033 well as the filled in parts of any uncompleted buffers. 4034 4035 The do_marking_step() method tries to abort when the time target 4036 has been reached. There are a few other cases when the 4037 do_marking_step() method also aborts: 4038 4039 (1) When the marking phase has been aborted (after a Full GC). 4040 4041 (2) When a global overflow (either on the global stack or the 4042 region stack) has been triggered. Before the task aborts, it 4043 will actually sync up with the other tasks to ensure that all 4044 the marking data structures (local queues, stacks, fingers etc.) 4045 are re-initialised so that when do_marking_step() completes, 4046 the marking phase can immediately restart. 4047 4048 (3) When enough completed SATB buffers are available. The 4049 do_marking_step() method only tries to drain SATB buffers right 4050 at the beginning. So, if enough buffers are available, the 4051 marking step aborts and the SATB buffers are processed at 4052 the beginning of the next invocation. 4053 4054 (4) To yield. when we have to yield then we abort and yield 4055 right at the end of do_marking_step(). This saves us from a lot 4056 of hassle as, by yielding we might allow a Full GC. If this 4057 happens then objects will be compacted underneath our feet, the 4058 heap might shrink, etc. We save checking for this by just 4059 aborting and doing the yield right at the end. 4060 4061 From the above it follows that the do_marking_step() method should 4062 be called in a loop (or, otherwise, regularly) until it completes. 4063 4064 If a marking step completes without its has_aborted() flag being 4065 true, it means it has completed the current marking phase (and 4066 also all other marking tasks have done so and have all synced up). 4067 4068 A method called regular_clock_call() is invoked "regularly" (in 4069 sub ms intervals) throughout marking. It is this clock method that 4070 checks all the abort conditions which were mentioned above and 4071 decides when the task should abort. A work-based scheme is used to 4072 trigger this clock method: when the number of object words the 4073 marking phase has scanned or the number of references the marking 4074 phase has visited reach a given limit. Additional invocations to 4075 the method clock have been planted in a few other strategic places 4076 too. The initial reason for the clock method was to avoid calling 4077 vtime too regularly, as it is quite expensive. So, once it was in 4078 place, it was natural to piggy-back all the other conditions on it 4079 too and not constantly check them throughout the code. 4080 4081 *****************************************************************************/ 4082 4083 void CMTask::do_marking_step(double time_target_ms, 4084 bool do_stealing, 4085 bool do_termination) { 4086 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4087 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4088 4089 assert(concurrent() || _cm->region_stack_empty(), 4090 "the region stack should have been cleared before remark"); 4091 assert(concurrent() || !_cm->has_aborted_regions(), 4092 "aborted regions should have been cleared before remark"); 4093 assert(_region_finger == NULL, 4094 "this should be non-null only when a region is being scanned"); 4095 4096 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4097 assert(_task_queues != NULL, "invariant"); 4098 assert(_task_queue != NULL, "invariant"); 4099 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4100 4101 assert(!_claimed, 4102 "only one thread should claim this task at any one time"); 4103 4104 // OK, this doesn't safeguard again all possible scenarios, as it is 4105 // possible for two threads to set the _claimed flag at the same 4106 // time. But it is only for debugging purposes anyway and it will 4107 // catch most problems. 4108 _claimed = true; 4109 4110 _start_time_ms = os::elapsedVTime() * 1000.0; 4111 statsOnly( _interval_start_time_ms = _start_time_ms ); 4112 4113 double diff_prediction_ms = 4114 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4115 _time_target_ms = time_target_ms - diff_prediction_ms; 4116 4117 // set up the variables that are used in the work-based scheme to 4118 // call the regular clock method 4119 _words_scanned = 0; 4120 _refs_reached = 0; 4121 recalculate_limits(); 4122 4123 // clear all flags 4124 clear_has_aborted(); 4125 _has_timed_out = false; 4126 _draining_satb_buffers = false; 4127 4128 ++_calls; 4129 4130 if (_cm->verbose_low()) 4131 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4132 "target = %1.2lfms >>>>>>>>>>", 4133 _task_id, _calls, _time_target_ms); 4134 4135 // Set up the bitmap and oop closures. Anything that uses them is 4136 // eventually called from this method, so it is OK to allocate these 4137 // statically. 4138 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4139 CMOopClosure oop_closure(_g1h, _cm, this); 4140 set_oop_closure(&oop_closure); 4141 4142 if (_cm->has_overflown()) { 4143 // This can happen if the region stack or the mark stack overflows 4144 // during a GC pause and this task, after a yield point, 4145 // restarts. We have to abort as we need to get into the overflow 4146 // protocol which happens right at the end of this task. 4147 set_has_aborted(); 4148 } 4149 4150 // First drain any available SATB buffers. After this, we will not 4151 // look at SATB buffers before the next invocation of this method. 4152 // If enough completed SATB buffers are queued up, the regular clock 4153 // will abort this task so that it restarts. 4154 drain_satb_buffers(); 4155 // ...then partially drain the local queue and the global stack 4156 drain_local_queue(true); 4157 drain_global_stack(true); 4158 4159 // Then totally drain the region stack. We will not look at 4160 // it again before the next invocation of this method. Entries on 4161 // the region stack are only added during evacuation pauses, for 4162 // which we have to yield. When we do, we abort the task anyway so 4163 // it will look at the region stack again when it restarts. 4164 bitmap_closure.set_scanning_heap_region(false); 4165 drain_region_stack(&bitmap_closure); 4166 // ...then partially drain the local queue and the global stack 4167 drain_local_queue(true); 4168 drain_global_stack(true); 4169 4170 do { 4171 if (!has_aborted() && _curr_region != NULL) { 4172 // This means that we're already holding on to a region. 4173 assert(_finger != NULL, "if region is not NULL, then the finger " 4174 "should not be NULL either"); 4175 4176 // We might have restarted this task after an evacuation pause 4177 // which might have evacuated the region we're holding on to 4178 // underneath our feet. Let's read its limit again to make sure 4179 // that we do not iterate over a region of the heap that 4180 // contains garbage (update_region_limit() will also move 4181 // _finger to the start of the region if it is found empty). 4182 update_region_limit(); 4183 // We will start from _finger not from the start of the region, 4184 // as we might be restarting this task after aborting half-way 4185 // through scanning this region. In this case, _finger points to 4186 // the address where we last found a marked object. If this is a 4187 // fresh region, _finger points to start(). 4188 MemRegion mr = MemRegion(_finger, _region_limit); 4189 4190 if (_cm->verbose_low()) 4191 gclog_or_tty->print_cr("[%d] we're scanning part " 4192 "["PTR_FORMAT", "PTR_FORMAT") " 4193 "of region "PTR_FORMAT, 4194 _task_id, _finger, _region_limit, _curr_region); 4195 4196 // Let's iterate over the bitmap of the part of the 4197 // region that is left. 4198 bitmap_closure.set_scanning_heap_region(true); 4199 if (mr.is_empty() || 4200 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4201 // We successfully completed iterating over the region. Now, 4202 // let's give up the region. 4203 giveup_current_region(); 4204 regular_clock_call(); 4205 } else { 4206 assert(has_aborted(), "currently the only way to do so"); 4207 // The only way to abort the bitmap iteration is to return 4208 // false from the do_bit() method. However, inside the 4209 // do_bit() method we move the _finger to point to the 4210 // object currently being looked at. So, if we bail out, we 4211 // have definitely set _finger to something non-null. 4212 assert(_finger != NULL, "invariant"); 4213 4214 // Region iteration was actually aborted. So now _finger 4215 // points to the address of the object we last scanned. If we 4216 // leave it there, when we restart this task, we will rescan 4217 // the object. It is easy to avoid this. We move the finger by 4218 // enough to point to the next possible object header (the 4219 // bitmap knows by how much we need to move it as it knows its 4220 // granularity). 4221 assert(_finger < _region_limit, "invariant"); 4222 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4223 // Check if bitmap iteration was aborted while scanning the last object 4224 if (new_finger >= _region_limit) { 4225 giveup_current_region(); 4226 } else { 4227 move_finger_to(new_finger); 4228 } 4229 } 4230 } 4231 // At this point we have either completed iterating over the 4232 // region we were holding on to, or we have aborted. 4233 4234 // We then partially drain the local queue and the global stack. 4235 // (Do we really need this?) 4236 drain_local_queue(true); 4237 drain_global_stack(true); 4238 4239 // Read the note on the claim_region() method on why it might 4240 // return NULL with potentially more regions available for 4241 // claiming and why we have to check out_of_regions() to determine 4242 // whether we're done or not. 4243 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4244 // We are going to try to claim a new region. We should have 4245 // given up on the previous one. 4246 // Separated the asserts so that we know which one fires. 4247 assert(_curr_region == NULL, "invariant"); 4248 assert(_finger == NULL, "invariant"); 4249 assert(_region_limit == NULL, "invariant"); 4250 if (_cm->verbose_low()) 4251 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4252 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4253 if (claimed_region != NULL) { 4254 // Yes, we managed to claim one 4255 statsOnly( ++_regions_claimed ); 4256 4257 if (_cm->verbose_low()) 4258 gclog_or_tty->print_cr("[%d] we successfully claimed " 4259 "region "PTR_FORMAT, 4260 _task_id, claimed_region); 4261 4262 setup_for_region(claimed_region); 4263 assert(_curr_region == claimed_region, "invariant"); 4264 } 4265 // It is important to call the regular clock here. It might take 4266 // a while to claim a region if, for example, we hit a large 4267 // block of empty regions. So we need to call the regular clock 4268 // method once round the loop to make sure it's called 4269 // frequently enough. 4270 regular_clock_call(); 4271 } 4272 4273 if (!has_aborted() && _curr_region == NULL) { 4274 assert(_cm->out_of_regions(), 4275 "at this point we should be out of regions"); 4276 } 4277 } while ( _curr_region != NULL && !has_aborted()); 4278 4279 if (!has_aborted()) { 4280 // We cannot check whether the global stack is empty, since other 4281 // tasks might be pushing objects to it concurrently. We also cannot 4282 // check if the region stack is empty because if a thread is aborting 4283 // it can push a partially done region back. 4284 assert(_cm->out_of_regions(), 4285 "at this point we should be out of regions"); 4286 4287 if (_cm->verbose_low()) 4288 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4289 4290 // Try to reduce the number of available SATB buffers so that 4291 // remark has less work to do. 4292 drain_satb_buffers(); 4293 } 4294 4295 // Since we've done everything else, we can now totally drain the 4296 // local queue and global stack. 4297 drain_local_queue(false); 4298 drain_global_stack(false); 4299 4300 // Attempt at work stealing from other task's queues. 4301 if (do_stealing && !has_aborted()) { 4302 // We have not aborted. This means that we have finished all that 4303 // we could. Let's try to do some stealing... 4304 4305 // We cannot check whether the global stack is empty, since other 4306 // tasks might be pushing objects to it concurrently. We also cannot 4307 // check if the region stack is empty because if a thread is aborting 4308 // it can push a partially done region back. 4309 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4310 "only way to reach here"); 4311 4312 if (_cm->verbose_low()) 4313 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4314 4315 while (!has_aborted()) { 4316 oop obj; 4317 statsOnly( ++_steal_attempts ); 4318 4319 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4320 if (_cm->verbose_medium()) 4321 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4322 _task_id, (void*) obj); 4323 4324 statsOnly( ++_steals ); 4325 4326 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4327 "any stolen object should be marked"); 4328 scan_object(obj); 4329 4330 // And since we're towards the end, let's totally drain the 4331 // local queue and global stack. 4332 drain_local_queue(false); 4333 drain_global_stack(false); 4334 } else { 4335 break; 4336 } 4337 } 4338 } 4339 4340 // If we are about to wrap up and go into termination, check if we 4341 // should raise the overflow flag. 4342 if (do_termination && !has_aborted()) { 4343 if (_cm->force_overflow()->should_force()) { 4344 _cm->set_has_overflown(); 4345 regular_clock_call(); 4346 } 4347 } 4348 4349 // We still haven't aborted. Now, let's try to get into the 4350 // termination protocol. 4351 if (do_termination && !has_aborted()) { 4352 // We cannot check whether the global stack is empty, since other 4353 // tasks might be concurrently pushing objects on it. We also cannot 4354 // check if the region stack is empty because if a thread is aborting 4355 // it can push a partially done region back. 4356 // Separated the asserts so that we know which one fires. 4357 assert(_cm->out_of_regions(), "only way to reach here"); 4358 assert(_task_queue->size() == 0, "only way to reach here"); 4359 4360 if (_cm->verbose_low()) 4361 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4362 4363 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4364 // The CMTask class also extends the TerminatorTerminator class, 4365 // hence its should_exit_termination() method will also decide 4366 // whether to exit the termination protocol or not. 4367 bool finished = _cm->terminator()->offer_termination(this); 4368 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4369 _termination_time_ms += 4370 termination_end_time_ms - _termination_start_time_ms; 4371 4372 if (finished) { 4373 // We're all done. 4374 4375 if (_task_id == 0) { 4376 // let's allow task 0 to do this 4377 if (concurrent()) { 4378 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4379 // we need to set this to false before the next 4380 // safepoint. This way we ensure that the marking phase 4381 // doesn't observe any more heap expansions. 4382 _cm->clear_concurrent_marking_in_progress(); 4383 } 4384 } 4385 4386 // We can now guarantee that the global stack is empty, since 4387 // all other tasks have finished. We separated the guarantees so 4388 // that, if a condition is false, we can immediately find out 4389 // which one. 4390 guarantee(_cm->out_of_regions(), "only way to reach here"); 4391 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4392 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4393 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4394 guarantee(_task_queue->size() == 0, "only way to reach here"); 4395 guarantee(!_cm->has_overflown(), "only way to reach here"); 4396 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4397 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4398 4399 if (_cm->verbose_low()) 4400 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4401 } else { 4402 // Apparently there's more work to do. Let's abort this task. It 4403 // will restart it and we can hopefully find more things to do. 4404 4405 if (_cm->verbose_low()) 4406 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); 4407 4408 set_has_aborted(); 4409 statsOnly( ++_aborted_termination ); 4410 } 4411 } 4412 4413 // Mainly for debugging purposes to make sure that a pointer to the 4414 // closure which was statically allocated in this frame doesn't 4415 // escape it by accident. 4416 set_oop_closure(NULL); 4417 double end_time_ms = os::elapsedVTime() * 1000.0; 4418 double elapsed_time_ms = end_time_ms - _start_time_ms; 4419 // Update the step history. 4420 _step_times_ms.add(elapsed_time_ms); 4421 4422 if (has_aborted()) { 4423 // The task was aborted for some reason. 4424 4425 statsOnly( ++_aborted ); 4426 4427 if (_has_timed_out) { 4428 double diff_ms = elapsed_time_ms - _time_target_ms; 4429 // Keep statistics of how well we did with respect to hitting 4430 // our target only if we actually timed out (if we aborted for 4431 // other reasons, then the results might get skewed). 4432 _marking_step_diffs_ms.add(diff_ms); 4433 } 4434 4435 if (_cm->has_overflown()) { 4436 // This is the interesting one. We aborted because a global 4437 // overflow was raised. This means we have to restart the 4438 // marking phase and start iterating over regions. However, in 4439 // order to do this we have to make sure that all tasks stop 4440 // what they are doing and re-initialise in a safe manner. We 4441 // will achieve this with the use of two barrier sync points. 4442 4443 if (_cm->verbose_low()) 4444 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4445 4446 _cm->enter_first_sync_barrier(_task_id); 4447 // When we exit this sync barrier we know that all tasks have 4448 // stopped doing marking work. So, it's now safe to 4449 // re-initialise our data structures. At the end of this method, 4450 // task 0 will clear the global data structures. 4451 4452 statsOnly( ++_aborted_overflow ); 4453 4454 // We clear the local state of this task... 4455 clear_region_fields(); 4456 4457 // ...and enter the second barrier. 4458 _cm->enter_second_sync_barrier(_task_id); 4459 // At this point everything has bee re-initialised and we're 4460 // ready to restart. 4461 } 4462 4463 if (_cm->verbose_low()) { 4464 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4465 "elapsed = %1.2lfms <<<<<<<<<<", 4466 _task_id, _time_target_ms, elapsed_time_ms); 4467 if (_cm->has_aborted()) 4468 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4469 _task_id); 4470 } 4471 } else { 4472 if (_cm->verbose_low()) 4473 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4474 "elapsed = %1.2lfms <<<<<<<<<<", 4475 _task_id, _time_target_ms, elapsed_time_ms); 4476 } 4477 4478 _claimed = false; 4479 } 4480 4481 CMTask::CMTask(int task_id, 4482 ConcurrentMark* cm, 4483 CMTaskQueue* task_queue, 4484 CMTaskQueueSet* task_queues) 4485 : _g1h(G1CollectedHeap::heap()), 4486 _task_id(task_id), _cm(cm), 4487 _claimed(false), 4488 _nextMarkBitMap(NULL), _hash_seed(17), 4489 _task_queue(task_queue), 4490 _task_queues(task_queues), 4491 _oop_closure(NULL), 4492 _aborted_region(MemRegion()) { 4493 guarantee(task_queue != NULL, "invariant"); 4494 guarantee(task_queues != NULL, "invariant"); 4495 4496 statsOnly( _clock_due_to_scanning = 0; 4497 _clock_due_to_marking = 0 ); 4498 4499 _marking_step_diffs_ms.add(0.5); 4500 } 4501 4502 // These are formatting macros that are used below to ensure 4503 // consistent formatting. The *_H_* versions are used to format the 4504 // header for a particular value and they should be kept consistent 4505 // with the corresponding macro. Also note that most of the macros add 4506 // the necessary white space (as a prefix) which makes them a bit 4507 // easier to compose. 4508 4509 // All the output lines are prefixed with this string to be able to 4510 // identify them easily in a large log file. 4511 #define G1PPRL_LINE_PREFIX "###" 4512 4513 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4514 #ifdef _LP64 4515 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4516 #else // _LP64 4517 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4518 #endif // _LP64 4519 4520 // For per-region info 4521 #define G1PPRL_TYPE_FORMAT " %-4s" 4522 #define G1PPRL_TYPE_H_FORMAT " %4s" 4523 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4524 #define G1PPRL_BYTE_H_FORMAT " %9s" 4525 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4526 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4527 4528 // For summary info 4529 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4530 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4531 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4532 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4533 4534 G1PrintRegionLivenessInfoClosure:: 4535 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4536 : _out(out), 4537 _total_used_bytes(0), _total_capacity_bytes(0), 4538 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4539 _hum_used_bytes(0), _hum_capacity_bytes(0), 4540 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4541 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4542 MemRegion g1_committed = g1h->g1_committed(); 4543 MemRegion g1_reserved = g1h->g1_reserved(); 4544 double now = os::elapsedTime(); 4545 4546 // Print the header of the output. 4547 _out->cr(); 4548 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4549 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4550 G1PPRL_SUM_ADDR_FORMAT("committed") 4551 G1PPRL_SUM_ADDR_FORMAT("reserved") 4552 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4553 g1_committed.start(), g1_committed.end(), 4554 g1_reserved.start(), g1_reserved.end(), 4555 HeapRegion::GrainBytes); 4556 _out->print_cr(G1PPRL_LINE_PREFIX); 4557 _out->print_cr(G1PPRL_LINE_PREFIX 4558 G1PPRL_TYPE_H_FORMAT 4559 G1PPRL_ADDR_BASE_H_FORMAT 4560 G1PPRL_BYTE_H_FORMAT 4561 G1PPRL_BYTE_H_FORMAT 4562 G1PPRL_BYTE_H_FORMAT 4563 G1PPRL_DOUBLE_H_FORMAT, 4564 "type", "address-range", 4565 "used", "prev-live", "next-live", "gc-eff"); 4566 } 4567 4568 // It takes as a parameter a reference to one of the _hum_* fields, it 4569 // deduces the corresponding value for a region in a humongous region 4570 // series (either the region size, or what's left if the _hum_* field 4571 // is < the region size), and updates the _hum_* field accordingly. 4572 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4573 size_t bytes = 0; 4574 // The > 0 check is to deal with the prev and next live bytes which 4575 // could be 0. 4576 if (*hum_bytes > 0) { 4577 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes); 4578 *hum_bytes -= bytes; 4579 } 4580 return bytes; 4581 } 4582 4583 // It deduces the values for a region in a humongous region series 4584 // from the _hum_* fields and updates those accordingly. It assumes 4585 // that that _hum_* fields have already been set up from the "starts 4586 // humongous" region and we visit the regions in address order. 4587 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4588 size_t* capacity_bytes, 4589 size_t* prev_live_bytes, 4590 size_t* next_live_bytes) { 4591 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4592 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4593 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4594 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4595 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4596 } 4597 4598 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4599 const char* type = ""; 4600 HeapWord* bottom = r->bottom(); 4601 HeapWord* end = r->end(); 4602 size_t capacity_bytes = r->capacity(); 4603 size_t used_bytes = r->used(); 4604 size_t prev_live_bytes = r->live_bytes(); 4605 size_t next_live_bytes = r->next_live_bytes(); 4606 double gc_eff = r->gc_efficiency(); 4607 if (r->used() == 0) { 4608 type = "FREE"; 4609 } else if (r->is_survivor()) { 4610 type = "SURV"; 4611 } else if (r->is_young()) { 4612 type = "EDEN"; 4613 } else if (r->startsHumongous()) { 4614 type = "HUMS"; 4615 4616 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4617 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4618 "they should have been zeroed after the last time we used them"); 4619 // Set up the _hum_* fields. 4620 _hum_capacity_bytes = capacity_bytes; 4621 _hum_used_bytes = used_bytes; 4622 _hum_prev_live_bytes = prev_live_bytes; 4623 _hum_next_live_bytes = next_live_bytes; 4624 get_hum_bytes(&used_bytes, &capacity_bytes, 4625 &prev_live_bytes, &next_live_bytes); 4626 end = bottom + HeapRegion::GrainWords; 4627 } else if (r->continuesHumongous()) { 4628 type = "HUMC"; 4629 get_hum_bytes(&used_bytes, &capacity_bytes, 4630 &prev_live_bytes, &next_live_bytes); 4631 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4632 } else { 4633 type = "OLD"; 4634 } 4635 4636 _total_used_bytes += used_bytes; 4637 _total_capacity_bytes += capacity_bytes; 4638 _total_prev_live_bytes += prev_live_bytes; 4639 _total_next_live_bytes += next_live_bytes; 4640 4641 // Print a line for this particular region. 4642 _out->print_cr(G1PPRL_LINE_PREFIX 4643 G1PPRL_TYPE_FORMAT 4644 G1PPRL_ADDR_BASE_FORMAT 4645 G1PPRL_BYTE_FORMAT 4646 G1PPRL_BYTE_FORMAT 4647 G1PPRL_BYTE_FORMAT 4648 G1PPRL_DOUBLE_FORMAT, 4649 type, bottom, end, 4650 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4651 4652 return false; 4653 } 4654 4655 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4656 // Print the footer of the output. 4657 _out->print_cr(G1PPRL_LINE_PREFIX); 4658 _out->print_cr(G1PPRL_LINE_PREFIX 4659 " SUMMARY" 4660 G1PPRL_SUM_MB_FORMAT("capacity") 4661 G1PPRL_SUM_MB_PERC_FORMAT("used") 4662 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4663 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4664 bytes_to_mb(_total_capacity_bytes), 4665 bytes_to_mb(_total_used_bytes), 4666 perc(_total_used_bytes, _total_capacity_bytes), 4667 bytes_to_mb(_total_prev_live_bytes), 4668 perc(_total_prev_live_bytes, _total_capacity_bytes), 4669 bytes_to_mb(_total_next_live_bytes), 4670 perc(_total_next_live_bytes, _total_capacity_bytes)); 4671 _out->cr(); 4672 }