1 /* 2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1RemSet.hpp" 32 #include "gc_implementation/g1/heapRegionRemSet.hpp" 33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 34 #include "memory/genOopClosures.inline.hpp" 35 #include "memory/referencePolicy.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "oops/oop.inline.hpp" 38 #include "runtime/handles.inline.hpp" 39 #include "runtime/java.hpp" 40 41 // 42 // CMS Bit Map Wrapper 43 44 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 45 _bm((uintptr_t*)NULL,0), 46 _shifter(shifter) { 47 _bmStartWord = (HeapWord*)(rs.base()); 48 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 49 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 50 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 51 52 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 53 // For now we'll just commit all of the bit map up fromt. 54 // Later on we'll try to be more parsimonious with swap. 55 guarantee(_virtual_space.initialize(brs, brs.size()), 56 "couldn't reseve backing store for CMS bit map"); 57 assert(_virtual_space.committed_size() == brs.size(), 58 "didn't reserve backing store for all of CMS bit map?"); 59 _bm.set_map((uintptr_t*)_virtual_space.low()); 60 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 61 _bmWordSize, "inconsistency in bit map sizing"); 62 _bm.set_size(_bmWordSize >> _shifter); 63 } 64 65 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 66 HeapWord* limit) const { 67 // First we must round addr *up* to a possible object boundary. 68 addr = (HeapWord*)align_size_up((intptr_t)addr, 69 HeapWordSize << _shifter); 70 size_t addrOffset = heapWordToOffset(addr); 71 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 72 size_t limitOffset = heapWordToOffset(limit); 73 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 74 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 75 assert(nextAddr >= addr, "get_next_one postcondition"); 76 assert(nextAddr == limit || isMarked(nextAddr), 77 "get_next_one postcondition"); 78 return nextAddr; 79 } 80 81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 82 HeapWord* limit) const { 83 size_t addrOffset = heapWordToOffset(addr); 84 if (limit == NULL) limit = _bmStartWord + _bmWordSize; 85 size_t limitOffset = heapWordToOffset(limit); 86 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 87 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 88 assert(nextAddr >= addr, "get_next_one postcondition"); 89 assert(nextAddr == limit || !isMarked(nextAddr), 90 "get_next_one postcondition"); 91 return nextAddr; 92 } 93 94 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 95 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 96 return (int) (diff >> _shifter); 97 } 98 99 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 100 HeapWord* left = MAX2(_bmStartWord, mr.start()); 101 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 102 if (right > left) { 103 // Right-open interval [leftOffset, rightOffset). 104 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 105 } else { 106 return true; 107 } 108 } 109 110 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 111 size_t from_start_index, 112 HeapWord* to_start_word, 113 size_t word_num) { 114 _bm.mostly_disjoint_range_union(from_bitmap, 115 from_start_index, 116 heapWordToOffset(to_start_word), 117 word_num); 118 } 119 120 #ifndef PRODUCT 121 bool CMBitMapRO::covers(ReservedSpace rs) const { 122 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 123 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 124 "size inconsistency"); 125 return _bmStartWord == (HeapWord*)(rs.base()) && 126 _bmWordSize == rs.size()>>LogHeapWordSize; 127 } 128 #endif 129 130 void CMBitMap::clearAll() { 131 _bm.clear(); 132 return; 133 } 134 135 void CMBitMap::markRange(MemRegion mr) { 136 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 137 assert(!mr.is_empty(), "unexpected empty region"); 138 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 139 ((HeapWord *) mr.end())), 140 "markRange memory region end is not card aligned"); 141 // convert address range into offset range 142 _bm.at_put_range(heapWordToOffset(mr.start()), 143 heapWordToOffset(mr.end()), true); 144 } 145 146 void CMBitMap::clearRange(MemRegion mr) { 147 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 148 assert(!mr.is_empty(), "unexpected empty region"); 149 // convert address range into offset range 150 _bm.at_put_range(heapWordToOffset(mr.start()), 151 heapWordToOffset(mr.end()), false); 152 } 153 154 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 155 HeapWord* end_addr) { 156 HeapWord* start = getNextMarkedWordAddress(addr); 157 start = MIN2(start, end_addr); 158 HeapWord* end = getNextUnmarkedWordAddress(start); 159 end = MIN2(end, end_addr); 160 assert(start <= end, "Consistency check"); 161 MemRegion mr(start, end); 162 if (!mr.is_empty()) { 163 clearRange(mr); 164 } 165 return mr; 166 } 167 168 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 169 _base(NULL), _cm(cm) 170 #ifdef ASSERT 171 , _drain_in_progress(false) 172 , _drain_in_progress_yields(false) 173 #endif 174 {} 175 176 void CMMarkStack::allocate(size_t size) { 177 _base = NEW_C_HEAP_ARRAY(oop, size); 178 if (_base == NULL) 179 vm_exit_during_initialization("Failed to allocate " 180 "CM region mark stack"); 181 _index = 0; 182 // QQQQ cast ... 183 _capacity = (jint) size; 184 _oops_do_bound = -1; 185 NOT_PRODUCT(_max_depth = 0); 186 } 187 188 CMMarkStack::~CMMarkStack() { 189 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 190 } 191 192 void CMMarkStack::par_push(oop ptr) { 193 while (true) { 194 if (isFull()) { 195 _overflow = true; 196 return; 197 } 198 // Otherwise... 199 jint index = _index; 200 jint next_index = index+1; 201 jint res = Atomic::cmpxchg(next_index, &_index, index); 202 if (res == index) { 203 _base[index] = ptr; 204 // Note that we don't maintain this atomically. We could, but it 205 // doesn't seem necessary. 206 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 207 return; 208 } 209 // Otherwise, we need to try again. 210 } 211 } 212 213 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 214 while (true) { 215 if (isFull()) { 216 _overflow = true; 217 return; 218 } 219 // Otherwise... 220 jint index = _index; 221 jint next_index = index + n; 222 if (next_index > _capacity) { 223 _overflow = true; 224 return; 225 } 226 jint res = Atomic::cmpxchg(next_index, &_index, index); 227 if (res == index) { 228 for (int i = 0; i < n; i++) { 229 int ind = index + i; 230 assert(ind < _capacity, "By overflow test above."); 231 _base[ind] = ptr_arr[i]; 232 } 233 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 234 return; 235 } 236 // Otherwise, we need to try again. 237 } 238 } 239 240 241 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 242 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 243 jint start = _index; 244 jint next_index = start + n; 245 if (next_index > _capacity) { 246 _overflow = true; 247 return; 248 } 249 // Otherwise. 250 _index = next_index; 251 for (int i = 0; i < n; i++) { 252 int ind = start + i; 253 assert(ind < _capacity, "By overflow test above."); 254 _base[ind] = ptr_arr[i]; 255 } 256 } 257 258 259 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 260 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 261 jint index = _index; 262 if (index == 0) { 263 *n = 0; 264 return false; 265 } else { 266 int k = MIN2(max, index); 267 jint new_ind = index - k; 268 for (int j = 0; j < k; j++) { 269 ptr_arr[j] = _base[new_ind + j]; 270 } 271 _index = new_ind; 272 *n = k; 273 return true; 274 } 275 } 276 277 278 CMRegionStack::CMRegionStack() : _base(NULL) {} 279 280 void CMRegionStack::allocate(size_t size) { 281 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 282 if (_base == NULL) 283 vm_exit_during_initialization("Failed to allocate " 284 "CM region mark stack"); 285 _index = 0; 286 // QQQQ cast ... 287 _capacity = (jint) size; 288 } 289 290 CMRegionStack::~CMRegionStack() { 291 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 292 } 293 294 void CMRegionStack::push(MemRegion mr) { 295 assert(mr.word_size() > 0, "Precondition"); 296 while (true) { 297 if (isFull()) { 298 _overflow = true; 299 return; 300 } 301 // Otherwise... 302 jint index = _index; 303 jint next_index = index+1; 304 jint res = Atomic::cmpxchg(next_index, &_index, index); 305 if (res == index) { 306 _base[index] = mr; 307 return; 308 } 309 // Otherwise, we need to try again. 310 } 311 } 312 313 // Currently we do not call this at all. Normally we would call it 314 // during the concurrent marking / remark phases but we now call 315 // the lock-based version instead. But we might want to resurrect this 316 // code in the future. So, we'll leave it here commented out. 317 #if 0 318 MemRegion CMRegionStack::pop() { 319 while (true) { 320 // Otherwise... 321 jint index = _index; 322 323 if (index == 0) { 324 return MemRegion(); 325 } 326 jint next_index = index-1; 327 jint res = Atomic::cmpxchg(next_index, &_index, index); 328 if (res == index) { 329 MemRegion mr = _base[next_index]; 330 if (mr.start() != NULL) { 331 assert(mr.end() != NULL, "invariant"); 332 assert(mr.word_size() > 0, "invariant"); 333 return mr; 334 } else { 335 // that entry was invalidated... let's skip it 336 assert(mr.end() == NULL, "invariant"); 337 } 338 } 339 // Otherwise, we need to try again. 340 } 341 } 342 #endif // 0 343 344 void CMRegionStack::push_with_lock(MemRegion mr) { 345 assert(mr.word_size() > 0, "Precondition"); 346 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 347 348 if (isFull()) { 349 _overflow = true; 350 return; 351 } 352 353 _base[_index] = mr; 354 _index += 1; 355 } 356 357 MemRegion CMRegionStack::pop_with_lock() { 358 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 359 360 while (true) { 361 if (_index == 0) { 362 return MemRegion(); 363 } 364 _index -= 1; 365 366 MemRegion mr = _base[_index]; 367 if (mr.start() != NULL) { 368 assert(mr.end() != NULL, "invariant"); 369 assert(mr.word_size() > 0, "invariant"); 370 return mr; 371 } else { 372 // that entry was invalidated... let's skip it 373 assert(mr.end() == NULL, "invariant"); 374 } 375 } 376 } 377 378 bool CMRegionStack::invalidate_entries_into_cset() { 379 bool result = false; 380 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 381 for (int i = 0; i < _oops_do_bound; ++i) { 382 MemRegion mr = _base[i]; 383 if (mr.start() != NULL) { 384 assert(mr.end() != NULL, "invariant"); 385 assert(mr.word_size() > 0, "invariant"); 386 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 387 assert(hr != NULL, "invariant"); 388 if (hr->in_collection_set()) { 389 // The region points into the collection set 390 _base[i] = MemRegion(); 391 result = true; 392 } 393 } else { 394 // that entry was invalidated... let's skip it 395 assert(mr.end() == NULL, "invariant"); 396 } 397 } 398 return result; 399 } 400 401 template<class OopClosureClass> 402 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 403 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 404 || SafepointSynchronize::is_at_safepoint(), 405 "Drain recursion must be yield-safe."); 406 bool res = true; 407 debug_only(_drain_in_progress = true); 408 debug_only(_drain_in_progress_yields = yield_after); 409 while (!isEmpty()) { 410 oop newOop = pop(); 411 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 412 assert(newOop->is_oop(), "Expected an oop"); 413 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 414 "only grey objects on this stack"); 415 // iterate over the oops in this oop, marking and pushing 416 // the ones in CMS generation. 417 newOop->oop_iterate(cl); 418 if (yield_after && _cm->do_yield_check()) { 419 res = false; break; 420 } 421 } 422 debug_only(_drain_in_progress = false); 423 return res; 424 } 425 426 void CMMarkStack::oops_do(OopClosure* f) { 427 if (_index == 0) return; 428 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 429 "Bound must be set."); 430 for (int i = 0; i < _oops_do_bound; i++) { 431 f->do_oop(&_base[i]); 432 } 433 _oops_do_bound = -1; 434 } 435 436 bool ConcurrentMark::not_yet_marked(oop obj) const { 437 return (_g1h->is_obj_ill(obj) 438 || (_g1h->is_in_permanent(obj) 439 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 440 } 441 442 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 443 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 444 #endif // _MSC_VER 445 446 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 447 int max_regions) : 448 _markBitMap1(rs, MinObjAlignment - 1), 449 _markBitMap2(rs, MinObjAlignment - 1), 450 451 _parallel_marking_threads(0), 452 _sleep_factor(0.0), 453 _marking_task_overhead(1.0), 454 _cleanup_sleep_factor(0.0), 455 _cleanup_task_overhead(1.0), 456 _region_bm(max_regions, false /* in_resource_area*/), 457 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 458 CardTableModRefBS::card_shift, 459 false /* in_resource_area*/), 460 _prevMarkBitMap(&_markBitMap1), 461 _nextMarkBitMap(&_markBitMap2), 462 _at_least_one_mark_complete(false), 463 464 _markStack(this), 465 _regionStack(), 466 // _finger set in set_non_marking_state 467 468 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 469 // _active_tasks set in set_non_marking_state 470 // _tasks set inside the constructor 471 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 472 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 473 474 _has_overflown(false), 475 _concurrent(false), 476 _has_aborted(false), 477 _restart_for_overflow(false), 478 _concurrent_marking_in_progress(false), 479 _should_gray_objects(false), 480 481 // _verbose_level set below 482 483 _init_times(), 484 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 485 _cleanup_times(), 486 _total_counting_time(0.0), 487 _total_rs_scrub_time(0.0), 488 489 _parallel_workers(NULL) 490 { 491 CMVerboseLevel verbose_level = 492 (CMVerboseLevel) G1MarkingVerboseLevel; 493 if (verbose_level < no_verbose) 494 verbose_level = no_verbose; 495 if (verbose_level > high_verbose) 496 verbose_level = high_verbose; 497 _verbose_level = verbose_level; 498 499 if (verbose_low()) 500 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 501 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 502 503 _markStack.allocate(MarkStackSize); 504 _regionStack.allocate(G1MarkRegionStackSize); 505 506 // Create & start a ConcurrentMark thread. 507 _cmThread = new ConcurrentMarkThread(this); 508 assert(cmThread() != NULL, "CM Thread should have been created"); 509 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 510 511 _g1h = G1CollectedHeap::heap(); 512 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 513 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 514 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 515 516 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 517 satb_qs.set_buffer_size(G1SATBBufferSize); 518 519 int size = (int) MAX2(ParallelGCThreads, (size_t)1); 520 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); 521 for (int i = 0 ; i < size; i++) { 522 _par_cleanup_thread_state[i] = new ParCleanupThreadState; 523 } 524 525 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 526 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 527 528 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 529 _active_tasks = _max_task_num; 530 for (int i = 0; i < (int) _max_task_num; ++i) { 531 CMTaskQueue* task_queue = new CMTaskQueue(); 532 task_queue->initialize(); 533 _task_queues->register_queue(i, task_queue); 534 535 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 536 _accum_task_vtime[i] = 0.0; 537 } 538 539 if (ConcGCThreads > ParallelGCThreads) { 540 vm_exit_during_initialization("Can't have more ConcGCThreads " 541 "than ParallelGCThreads."); 542 } 543 if (ParallelGCThreads == 0) { 544 // if we are not running with any parallel GC threads we will not 545 // spawn any marking threads either 546 _parallel_marking_threads = 0; 547 _sleep_factor = 0.0; 548 _marking_task_overhead = 1.0; 549 } else { 550 if (ConcGCThreads > 0) { 551 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 552 // if both are set 553 554 _parallel_marking_threads = ConcGCThreads; 555 _sleep_factor = 0.0; 556 _marking_task_overhead = 1.0; 557 } else if (G1MarkingOverheadPercent > 0) { 558 // we will calculate the number of parallel marking threads 559 // based on a target overhead with respect to the soft real-time 560 // goal 561 562 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 563 double overall_cm_overhead = 564 (double) MaxGCPauseMillis * marking_overhead / 565 (double) GCPauseIntervalMillis; 566 double cpu_ratio = 1.0 / (double) os::processor_count(); 567 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 568 double marking_task_overhead = 569 overall_cm_overhead / marking_thread_num * 570 (double) os::processor_count(); 571 double sleep_factor = 572 (1.0 - marking_task_overhead) / marking_task_overhead; 573 574 _parallel_marking_threads = (size_t) marking_thread_num; 575 _sleep_factor = sleep_factor; 576 _marking_task_overhead = marking_task_overhead; 577 } else { 578 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 579 _sleep_factor = 0.0; 580 _marking_task_overhead = 1.0; 581 } 582 583 if (parallel_marking_threads() > 1) 584 _cleanup_task_overhead = 1.0; 585 else 586 _cleanup_task_overhead = marking_task_overhead(); 587 _cleanup_sleep_factor = 588 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 589 590 #if 0 591 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 592 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 593 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 594 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 595 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 596 #endif 597 598 guarantee(parallel_marking_threads() > 0, "peace of mind"); 599 _parallel_workers = new WorkGang("G1 Parallel Marking Threads", 600 (int) parallel_marking_threads(), false, true); 601 if (_parallel_workers == NULL) 602 vm_exit_during_initialization("Failed necessary allocation."); 603 } 604 605 // so that the call below can read a sensible value 606 _heap_start = (HeapWord*) rs.base(); 607 set_non_marking_state(); 608 } 609 610 void ConcurrentMark::update_g1_committed(bool force) { 611 // If concurrent marking is not in progress, then we do not need to 612 // update _heap_end. This has a subtle and important 613 // side-effect. Imagine that two evacuation pauses happen between 614 // marking completion and remark. The first one can grow the 615 // heap (hence now the finger is below the heap end). Then, the 616 // second one could unnecessarily push regions on the region 617 // stack. This causes the invariant that the region stack is empty 618 // at the beginning of remark to be false. By ensuring that we do 619 // not observe heap expansions after marking is complete, then we do 620 // not have this problem. 621 if (!concurrent_marking_in_progress() && !force) 622 return; 623 624 MemRegion committed = _g1h->g1_committed(); 625 assert(committed.start() == _heap_start, "start shouldn't change"); 626 HeapWord* new_end = committed.end(); 627 if (new_end > _heap_end) { 628 // The heap has been expanded. 629 630 _heap_end = new_end; 631 } 632 // Notice that the heap can also shrink. However, this only happens 633 // during a Full GC (at least currently) and the entire marking 634 // phase will bail out and the task will not be restarted. So, let's 635 // do nothing. 636 } 637 638 void ConcurrentMark::reset() { 639 // Starting values for these two. This should be called in a STW 640 // phase. CM will be notified of any future g1_committed expansions 641 // will be at the end of evacuation pauses, when tasks are 642 // inactive. 643 MemRegion committed = _g1h->g1_committed(); 644 _heap_start = committed.start(); 645 _heap_end = committed.end(); 646 647 // Separated the asserts so that we know which one fires. 648 assert(_heap_start != NULL, "heap bounds should look ok"); 649 assert(_heap_end != NULL, "heap bounds should look ok"); 650 assert(_heap_start < _heap_end, "heap bounds should look ok"); 651 652 // reset all the marking data structures and any necessary flags 653 clear_marking_state(); 654 655 if (verbose_low()) 656 gclog_or_tty->print_cr("[global] resetting"); 657 658 // We do reset all of them, since different phases will use 659 // different number of active threads. So, it's easiest to have all 660 // of them ready. 661 for (int i = 0; i < (int) _max_task_num; ++i) 662 _tasks[i]->reset(_nextMarkBitMap); 663 664 // we need this to make sure that the flag is on during the evac 665 // pause with initial mark piggy-backed 666 set_concurrent_marking_in_progress(); 667 } 668 669 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 670 assert(active_tasks <= _max_task_num, "we should not have more"); 671 672 _active_tasks = active_tasks; 673 // Need to update the three data structures below according to the 674 // number of active threads for this phase. 675 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 676 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 677 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 678 679 _concurrent = concurrent; 680 // We propagate this to all tasks, not just the active ones. 681 for (int i = 0; i < (int) _max_task_num; ++i) 682 _tasks[i]->set_concurrent(concurrent); 683 684 if (concurrent) { 685 set_concurrent_marking_in_progress(); 686 } else { 687 // We currently assume that the concurrent flag has been set to 688 // false before we start remark. At this point we should also be 689 // in a STW phase. 690 assert(!concurrent_marking_in_progress(), "invariant"); 691 assert(_finger == _heap_end, "only way to get here"); 692 update_g1_committed(true); 693 } 694 } 695 696 void ConcurrentMark::set_non_marking_state() { 697 // We set the global marking state to some default values when we're 698 // not doing marking. 699 clear_marking_state(); 700 _active_tasks = 0; 701 clear_concurrent_marking_in_progress(); 702 } 703 704 ConcurrentMark::~ConcurrentMark() { 705 int size = (int) MAX2(ParallelGCThreads, (size_t)1); 706 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; 707 FREE_C_HEAP_ARRAY(ParCleanupThreadState*, 708 _par_cleanup_thread_state); 709 710 for (int i = 0; i < (int) _max_task_num; ++i) { 711 delete _task_queues->queue(i); 712 delete _tasks[i]; 713 } 714 delete _task_queues; 715 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 716 } 717 718 // This closure is used to mark refs into the g1 generation 719 // from external roots in the CMS bit map. 720 // Called at the first checkpoint. 721 // 722 723 void ConcurrentMark::clearNextBitmap() { 724 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 725 G1CollectorPolicy* g1p = g1h->g1_policy(); 726 727 // Make sure that the concurrent mark thread looks to still be in 728 // the current cycle. 729 guarantee(cmThread()->during_cycle(), "invariant"); 730 731 // We are finishing up the current cycle by clearing the next 732 // marking bitmap and getting it ready for the next cycle. During 733 // this time no other cycle can start. So, let's make sure that this 734 // is the case. 735 guarantee(!g1h->mark_in_progress(), "invariant"); 736 737 // clear the mark bitmap (no grey objects to start with). 738 // We need to do this in chunks and offer to yield in between 739 // each chunk. 740 HeapWord* start = _nextMarkBitMap->startWord(); 741 HeapWord* end = _nextMarkBitMap->endWord(); 742 HeapWord* cur = start; 743 size_t chunkSize = M; 744 while (cur < end) { 745 HeapWord* next = cur + chunkSize; 746 if (next > end) 747 next = end; 748 MemRegion mr(cur,next); 749 _nextMarkBitMap->clearRange(mr); 750 cur = next; 751 do_yield_check(); 752 753 // Repeat the asserts from above. We'll do them as asserts here to 754 // minimize their overhead on the product. However, we'll have 755 // them as guarantees at the beginning / end of the bitmap 756 // clearing to get some checking in the product. 757 assert(cmThread()->during_cycle(), "invariant"); 758 assert(!g1h->mark_in_progress(), "invariant"); 759 } 760 761 // Repeat the asserts from above. 762 guarantee(cmThread()->during_cycle(), "invariant"); 763 guarantee(!g1h->mark_in_progress(), "invariant"); 764 } 765 766 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 767 public: 768 bool doHeapRegion(HeapRegion* r) { 769 if (!r->continuesHumongous()) { 770 r->note_start_of_marking(true); 771 } 772 return false; 773 } 774 }; 775 776 void ConcurrentMark::checkpointRootsInitialPre() { 777 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 778 G1CollectorPolicy* g1p = g1h->g1_policy(); 779 780 _has_aborted = false; 781 782 #ifndef PRODUCT 783 if (G1PrintReachableAtInitialMark) { 784 print_reachable("at-cycle-start", 785 true /* use_prev_marking */, true /* all */); 786 } 787 #endif 788 789 // Initialise marking structures. This has to be done in a STW phase. 790 reset(); 791 } 792 793 class CMMarkRootsClosure: public OopsInGenClosure { 794 private: 795 ConcurrentMark* _cm; 796 G1CollectedHeap* _g1h; 797 bool _do_barrier; 798 799 public: 800 CMMarkRootsClosure(ConcurrentMark* cm, 801 G1CollectedHeap* g1h, 802 bool do_barrier) : _cm(cm), _g1h(g1h), 803 _do_barrier(do_barrier) { } 804 805 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 806 virtual void do_oop( oop* p) { do_oop_work(p); } 807 808 template <class T> void do_oop_work(T* p) { 809 T heap_oop = oopDesc::load_heap_oop(p); 810 if (!oopDesc::is_null(heap_oop)) { 811 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 812 assert(obj->is_oop() || obj->mark() == NULL, 813 "expected an oop, possibly with mark word displaced"); 814 HeapWord* addr = (HeapWord*)obj; 815 if (_g1h->is_in_g1_reserved(addr)) { 816 _cm->grayRoot(obj); 817 } 818 } 819 if (_do_barrier) { 820 assert(!_g1h->is_in_g1_reserved(p), 821 "Should be called on external roots"); 822 do_barrier(p); 823 } 824 } 825 }; 826 827 void ConcurrentMark::checkpointRootsInitialPost() { 828 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 829 830 // For each region note start of marking. 831 NoteStartOfMarkHRClosure startcl; 832 g1h->heap_region_iterate(&startcl); 833 834 // Start weak-reference discovery. 835 ReferenceProcessor* rp = g1h->ref_processor(); 836 rp->verify_no_references_recorded(); 837 rp->enable_discovery(); // enable ("weak") refs discovery 838 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 839 840 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 841 // This is the start of the marking cycle, we're expected all 842 // threads to have SATB queues with active set to false. 843 satb_mq_set.set_active_all_threads(true, /* new active value */ 844 false /* expected_active */); 845 846 // update_g1_committed() will be called at the end of an evac pause 847 // when marking is on. So, it's also called at the end of the 848 // initial-mark pause to update the heap end, if the heap expands 849 // during it. No need to call it here. 850 } 851 852 // Checkpoint the roots into this generation from outside 853 // this generation. [Note this initial checkpoint need only 854 // be approximate -- we'll do a catch up phase subsequently.] 855 void ConcurrentMark::checkpointRootsInitial() { 856 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 857 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 858 859 double start = os::elapsedTime(); 860 861 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 862 g1p->record_concurrent_mark_init_start(); 863 checkpointRootsInitialPre(); 864 865 // YSR: when concurrent precleaning is in place, we'll 866 // need to clear the cached card table here 867 868 ResourceMark rm; 869 HandleMark hm; 870 871 g1h->ensure_parsability(false); 872 g1h->perm_gen()->save_marks(); 873 874 CMMarkRootsClosure notOlder(this, g1h, false); 875 CMMarkRootsClosure older(this, g1h, true); 876 877 g1h->set_marking_started(); 878 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 879 880 g1h->process_strong_roots(true, // activate StrongRootsScope 881 false, // fake perm gen collection 882 SharedHeap::SO_AllClasses, 883 ¬Older, // Regular roots 884 NULL, // do not visit active blobs 885 &older // Perm Gen Roots 886 ); 887 checkpointRootsInitialPost(); 888 889 // Statistics. 890 double end = os::elapsedTime(); 891 _init_times.add((end - start) * 1000.0); 892 893 g1p->record_concurrent_mark_init_end(); 894 } 895 896 /* 897 Notice that in the next two methods, we actually leave the STS 898 during the barrier sync and join it immediately afterwards. If we 899 do not do this, this then the following deadlock can occur: one 900 thread could be in the barrier sync code, waiting for the other 901 thread to also sync up, whereas another one could be trying to 902 yield, while also waiting for the other threads to sync up too. 903 904 Because the thread that does the sync barrier has left the STS, it 905 is possible to be suspended for a Full GC or an evacuation pause 906 could occur. This is actually safe, since the entering the sync 907 barrier is one of the last things do_marking_step() does, and it 908 doesn't manipulate any data structures afterwards. 909 */ 910 911 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 912 if (verbose_low()) 913 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 914 915 ConcurrentGCThread::stsLeave(); 916 _first_overflow_barrier_sync.enter(); 917 ConcurrentGCThread::stsJoin(); 918 // at this point everyone should have synced up and not be doing any 919 // more work 920 921 if (verbose_low()) 922 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 923 924 // let task 0 do this 925 if (task_num == 0) { 926 // task 0 is responsible for clearing the global data structures 927 clear_marking_state(); 928 929 if (PrintGC) { 930 gclog_or_tty->date_stamp(PrintGCDateStamps); 931 gclog_or_tty->stamp(PrintGCTimeStamps); 932 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 933 } 934 } 935 936 // after this, each task should reset its own data structures then 937 // then go into the second barrier 938 } 939 940 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 941 if (verbose_low()) 942 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 943 944 ConcurrentGCThread::stsLeave(); 945 _second_overflow_barrier_sync.enter(); 946 ConcurrentGCThread::stsJoin(); 947 // at this point everything should be re-initialised and ready to go 948 949 if (verbose_low()) 950 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 951 } 952 953 void ConcurrentMark::grayRoot(oop p) { 954 HeapWord* addr = (HeapWord*) p; 955 // We can't really check against _heap_start and _heap_end, since it 956 // is possible during an evacuation pause with piggy-backed 957 // initial-mark that the committed space is expanded during the 958 // pause without CM observing this change. So the assertions below 959 // is a bit conservative; but better than nothing. 960 assert(_g1h->g1_committed().contains(addr), 961 "address should be within the heap bounds"); 962 963 if (!_nextMarkBitMap->isMarked(addr)) 964 _nextMarkBitMap->parMark(addr); 965 } 966 967 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 968 // The objects on the region have already been marked "in bulk" by 969 // the caller. We only need to decide whether to push the region on 970 // the region stack or not. 971 972 if (!concurrent_marking_in_progress() || !_should_gray_objects) 973 // We're done with marking and waiting for remark. We do not need to 974 // push anything else on the region stack. 975 return; 976 977 HeapWord* finger = _finger; 978 979 if (verbose_low()) 980 gclog_or_tty->print_cr("[global] attempting to push " 981 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 982 PTR_FORMAT, mr.start(), mr.end(), finger); 983 984 if (mr.start() < finger) { 985 // The finger is always heap region aligned and it is not possible 986 // for mr to span heap regions. 987 assert(mr.end() <= finger, "invariant"); 988 989 // Separated the asserts so that we know which one fires. 990 assert(mr.start() <= mr.end(), 991 "region boundaries should fall within the committed space"); 992 assert(_heap_start <= mr.start(), 993 "region boundaries should fall within the committed space"); 994 assert(mr.end() <= _heap_end, 995 "region boundaries should fall within the committed space"); 996 if (verbose_low()) 997 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 998 "below the finger, pushing it", 999 mr.start(), mr.end()); 1000 1001 if (!region_stack_push(mr)) { 1002 if (verbose_low()) 1003 gclog_or_tty->print_cr("[global] region stack has overflown."); 1004 } 1005 } 1006 } 1007 1008 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1009 // The object is not marked by the caller. We need to at least mark 1010 // it and maybe push in on the stack. 1011 1012 HeapWord* addr = (HeapWord*)p; 1013 if (!_nextMarkBitMap->isMarked(addr)) { 1014 // We definitely need to mark it, irrespective whether we bail out 1015 // because we're done with marking. 1016 if (_nextMarkBitMap->parMark(addr)) { 1017 if (!concurrent_marking_in_progress() || !_should_gray_objects) 1018 // If we're done with concurrent marking and we're waiting for 1019 // remark, then we're not pushing anything on the stack. 1020 return; 1021 1022 // No OrderAccess:store_load() is needed. It is implicit in the 1023 // CAS done in parMark(addr) above 1024 HeapWord* finger = _finger; 1025 1026 if (addr < finger) { 1027 if (!mark_stack_push(oop(addr))) { 1028 if (verbose_low()) 1029 gclog_or_tty->print_cr("[global] global stack overflow " 1030 "during parMark"); 1031 } 1032 } 1033 } 1034 } 1035 } 1036 1037 class CMConcurrentMarkingTask: public AbstractGangTask { 1038 private: 1039 ConcurrentMark* _cm; 1040 ConcurrentMarkThread* _cmt; 1041 1042 public: 1043 void work(int worker_i) { 1044 assert(Thread::current()->is_ConcurrentGC_thread(), 1045 "this should only be done by a conc GC thread"); 1046 1047 double start_vtime = os::elapsedVTime(); 1048 1049 ConcurrentGCThread::stsJoin(); 1050 1051 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1052 CMTask* the_task = _cm->task(worker_i); 1053 the_task->record_start_time(); 1054 if (!_cm->has_aborted()) { 1055 do { 1056 double start_vtime_sec = os::elapsedVTime(); 1057 double start_time_sec = os::elapsedTime(); 1058 the_task->do_marking_step(10.0); 1059 double end_time_sec = os::elapsedTime(); 1060 double end_vtime_sec = os::elapsedVTime(); 1061 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1062 double elapsed_time_sec = end_time_sec - start_time_sec; 1063 _cm->clear_has_overflown(); 1064 1065 bool ret = _cm->do_yield_check(worker_i); 1066 1067 jlong sleep_time_ms; 1068 if (!_cm->has_aborted() && the_task->has_aborted()) { 1069 sleep_time_ms = 1070 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1071 ConcurrentGCThread::stsLeave(); 1072 os::sleep(Thread::current(), sleep_time_ms, false); 1073 ConcurrentGCThread::stsJoin(); 1074 } 1075 double end_time2_sec = os::elapsedTime(); 1076 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1077 1078 #if 0 1079 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1080 "overhead %1.4lf", 1081 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1082 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1083 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1084 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1085 #endif 1086 } while (!_cm->has_aborted() && the_task->has_aborted()); 1087 } 1088 the_task->record_end_time(); 1089 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1090 1091 ConcurrentGCThread::stsLeave(); 1092 1093 double end_vtime = os::elapsedVTime(); 1094 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1095 } 1096 1097 CMConcurrentMarkingTask(ConcurrentMark* cm, 1098 ConcurrentMarkThread* cmt) : 1099 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1100 1101 ~CMConcurrentMarkingTask() { } 1102 }; 1103 1104 void ConcurrentMark::markFromRoots() { 1105 // we might be tempted to assert that: 1106 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1107 // "inconsistent argument?"); 1108 // However that wouldn't be right, because it's possible that 1109 // a safepoint is indeed in progress as a younger generation 1110 // stop-the-world GC happens even as we mark in this generation. 1111 1112 _restart_for_overflow = false; 1113 1114 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); 1115 1116 CMConcurrentMarkingTask markingTask(this, cmThread()); 1117 if (parallel_marking_threads() > 0) 1118 _parallel_workers->run_task(&markingTask); 1119 else 1120 markingTask.work(0); 1121 print_stats(); 1122 } 1123 1124 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1125 // world is stopped at this checkpoint 1126 assert(SafepointSynchronize::is_at_safepoint(), 1127 "world should be stopped"); 1128 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1129 1130 // If a full collection has happened, we shouldn't do this. 1131 if (has_aborted()) { 1132 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1133 return; 1134 } 1135 1136 if (VerifyDuringGC) { 1137 HandleMark hm; // handle scope 1138 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1139 Universe::heap()->prepare_for_verify(); 1140 Universe::verify(true, false, true); 1141 } 1142 1143 G1CollectorPolicy* g1p = g1h->g1_policy(); 1144 g1p->record_concurrent_mark_remark_start(); 1145 1146 double start = os::elapsedTime(); 1147 1148 checkpointRootsFinalWork(); 1149 1150 double mark_work_end = os::elapsedTime(); 1151 1152 weakRefsWork(clear_all_soft_refs); 1153 1154 if (has_overflown()) { 1155 // Oops. We overflowed. Restart concurrent marking. 1156 _restart_for_overflow = true; 1157 // Clear the flag. We do not need it any more. 1158 clear_has_overflown(); 1159 if (G1TraceMarkStackOverflow) 1160 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1161 } else { 1162 // We're done with marking. 1163 // This is the end of the marking cycle, we're expected all 1164 // threads to have SATB queues with active set to true. 1165 JavaThread::satb_mark_queue_set().set_active_all_threads( 1166 false, /* new active value */ 1167 true /* expected_active */); 1168 1169 if (VerifyDuringGC) { 1170 HandleMark hm; // handle scope 1171 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1172 Universe::heap()->prepare_for_verify(); 1173 Universe::heap()->verify(/* allow_dirty */ true, 1174 /* silent */ false, 1175 /* use_prev_marking */ false); 1176 } 1177 } 1178 1179 #if VERIFY_OBJS_PROCESSED 1180 _scan_obj_cl.objs_processed = 0; 1181 ThreadLocalObjQueue::objs_enqueued = 0; 1182 #endif 1183 1184 // Statistics 1185 double now = os::elapsedTime(); 1186 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1187 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1188 _remark_times.add((now - start) * 1000.0); 1189 1190 g1p->record_concurrent_mark_remark_end(); 1191 } 1192 1193 1194 #define CARD_BM_TEST_MODE 0 1195 1196 class CalcLiveObjectsClosure: public HeapRegionClosure { 1197 1198 CMBitMapRO* _bm; 1199 ConcurrentMark* _cm; 1200 bool _changed; 1201 bool _yield; 1202 size_t _words_done; 1203 size_t _tot_live; 1204 size_t _tot_used; 1205 size_t _regions_done; 1206 double _start_vtime_sec; 1207 1208 BitMap* _region_bm; 1209 BitMap* _card_bm; 1210 intptr_t _bottom_card_num; 1211 bool _final; 1212 1213 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1214 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1215 #if CARD_BM_TEST_MODE 1216 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1217 #else 1218 _card_bm->par_at_put(i - _bottom_card_num, 1); 1219 #endif 1220 } 1221 } 1222 1223 public: 1224 CalcLiveObjectsClosure(bool final, 1225 CMBitMapRO *bm, ConcurrentMark *cm, 1226 BitMap* region_bm, BitMap* card_bm) : 1227 _bm(bm), _cm(cm), _changed(false), _yield(true), 1228 _words_done(0), _tot_live(0), _tot_used(0), 1229 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1230 _regions_done(0), _start_vtime_sec(0.0) 1231 { 1232 _bottom_card_num = 1233 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1234 CardTableModRefBS::card_shift); 1235 } 1236 1237 // It takes a region that's not empty (i.e., it has at least one 1238 // live object in it and sets its corresponding bit on the region 1239 // bitmap to 1. If the region is "starts humongous" it will also set 1240 // to 1 the bits on the region bitmap that correspond to its 1241 // associated "continues humongous" regions. 1242 void set_bit_for_region(HeapRegion* hr) { 1243 assert(!hr->continuesHumongous(), "should have filtered those out"); 1244 1245 size_t index = hr->hrs_index(); 1246 if (!hr->startsHumongous()) { 1247 // Normal (non-humongous) case: just set the bit. 1248 _region_bm->par_at_put((BitMap::idx_t) index, true); 1249 } else { 1250 // Starts humongous case: calculate how many regions are part of 1251 // this humongous region and then set the bit range. It might 1252 // have been a bit more efficient to look at the object that 1253 // spans these humongous regions to calculate their number from 1254 // the object's size. However, it's a good idea to calculate 1255 // this based on the metadata itself, and not the region 1256 // contents, so that this code is not aware of what goes into 1257 // the humongous regions (in case this changes in the future). 1258 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1259 size_t end_index = index + 1; 1260 while (end_index < g1h->n_regions()) { 1261 HeapRegion* chr = g1h->region_at(end_index); 1262 if (!chr->continuesHumongous()) { 1263 break; 1264 } 1265 end_index += 1; 1266 } 1267 _region_bm->par_at_put_range((BitMap::idx_t) index, 1268 (BitMap::idx_t) end_index, true); 1269 } 1270 } 1271 1272 bool doHeapRegion(HeapRegion* hr) { 1273 if (!_final && _regions_done == 0) 1274 _start_vtime_sec = os::elapsedVTime(); 1275 1276 if (hr->continuesHumongous()) { 1277 // We will ignore these here and process them when their 1278 // associated "starts humongous" region is processed (see 1279 // set_bit_for_heap_region()). Note that we cannot rely on their 1280 // associated "starts humongous" region to have their bit set to 1281 // 1 since, due to the region chunking in the parallel region 1282 // iteration, a "continues humongous" region might be visited 1283 // before its associated "starts humongous". 1284 return false; 1285 } 1286 1287 HeapWord* nextTop = hr->next_top_at_mark_start(); 1288 HeapWord* start = hr->top_at_conc_mark_count(); 1289 assert(hr->bottom() <= start && start <= hr->end() && 1290 hr->bottom() <= nextTop && nextTop <= hr->end() && 1291 start <= nextTop, 1292 "Preconditions."); 1293 // Otherwise, record the number of word's we'll examine. 1294 size_t words_done = (nextTop - start); 1295 // Find the first marked object at or after "start". 1296 start = _bm->getNextMarkedWordAddress(start, nextTop); 1297 size_t marked_bytes = 0; 1298 1299 // Below, the term "card num" means the result of shifting an address 1300 // by the card shift -- address 0 corresponds to card number 0. One 1301 // must subtract the card num of the bottom of the heap to obtain a 1302 // card table index. 1303 // The first card num of the sequence of live cards currently being 1304 // constructed. -1 ==> no sequence. 1305 intptr_t start_card_num = -1; 1306 // The last card num of the sequence of live cards currently being 1307 // constructed. -1 ==> no sequence. 1308 intptr_t last_card_num = -1; 1309 1310 while (start < nextTop) { 1311 if (_yield && _cm->do_yield_check()) { 1312 // We yielded. It might be for a full collection, in which case 1313 // all bets are off; terminate the traversal. 1314 if (_cm->has_aborted()) { 1315 _changed = false; 1316 return true; 1317 } else { 1318 // Otherwise, it might be a collection pause, and the region 1319 // we're looking at might be in the collection set. We'll 1320 // abandon this region. 1321 return false; 1322 } 1323 } 1324 oop obj = oop(start); 1325 int obj_sz = obj->size(); 1326 // The card num of the start of the current object. 1327 intptr_t obj_card_num = 1328 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1329 1330 HeapWord* obj_last = start + obj_sz - 1; 1331 intptr_t obj_last_card_num = 1332 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1333 1334 if (obj_card_num != last_card_num) { 1335 if (start_card_num == -1) { 1336 assert(last_card_num == -1, "Both or neither."); 1337 start_card_num = obj_card_num; 1338 } else { 1339 assert(last_card_num != -1, "Both or neither."); 1340 assert(obj_card_num >= last_card_num, "Inv"); 1341 if ((obj_card_num - last_card_num) > 1) { 1342 // Mark the last run, and start a new one. 1343 mark_card_num_range(start_card_num, last_card_num); 1344 start_card_num = obj_card_num; 1345 } 1346 } 1347 #if CARD_BM_TEST_MODE 1348 /* 1349 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1350 obj_card_num - _bottom_card_num, 1351 obj_last_card_num - _bottom_card_num); 1352 */ 1353 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1354 _card_bm->par_at_put(j - _bottom_card_num, 1); 1355 } 1356 #endif 1357 } 1358 // In any case, we set the last card num. 1359 last_card_num = obj_last_card_num; 1360 1361 marked_bytes += (size_t)obj_sz * HeapWordSize; 1362 // Find the next marked object after this one. 1363 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1364 _changed = true; 1365 } 1366 // Handle the last range, if any. 1367 if (start_card_num != -1) 1368 mark_card_num_range(start_card_num, last_card_num); 1369 if (_final) { 1370 // Mark the allocated-since-marking portion... 1371 HeapWord* tp = hr->top(); 1372 if (nextTop < tp) { 1373 start_card_num = 1374 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1375 last_card_num = 1376 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1377 mark_card_num_range(start_card_num, last_card_num); 1378 // This definitely means the region has live objects. 1379 set_bit_for_region(hr); 1380 } 1381 } 1382 1383 hr->add_to_marked_bytes(marked_bytes); 1384 // Update the live region bitmap. 1385 if (marked_bytes > 0) { 1386 set_bit_for_region(hr); 1387 } 1388 hr->set_top_at_conc_mark_count(nextTop); 1389 _tot_live += hr->next_live_bytes(); 1390 _tot_used += hr->used(); 1391 _words_done = words_done; 1392 1393 if (!_final) { 1394 ++_regions_done; 1395 if (_regions_done % 10 == 0) { 1396 double end_vtime_sec = os::elapsedVTime(); 1397 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1398 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1399 jlong sleep_time_ms = 1400 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1401 os::sleep(Thread::current(), sleep_time_ms, false); 1402 _start_vtime_sec = end_vtime_sec; 1403 } 1404 } 1405 } 1406 1407 return false; 1408 } 1409 1410 bool changed() { return _changed; } 1411 void reset() { _changed = false; _words_done = 0; } 1412 void no_yield() { _yield = false; } 1413 size_t words_done() { return _words_done; } 1414 size_t tot_live() { return _tot_live; } 1415 size_t tot_used() { return _tot_used; } 1416 }; 1417 1418 1419 void ConcurrentMark::calcDesiredRegions() { 1420 _region_bm.clear(); 1421 _card_bm.clear(); 1422 CalcLiveObjectsClosure calccl(false /*final*/, 1423 nextMarkBitMap(), this, 1424 &_region_bm, &_card_bm); 1425 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1426 g1h->heap_region_iterate(&calccl); 1427 1428 do { 1429 calccl.reset(); 1430 g1h->heap_region_iterate(&calccl); 1431 } while (calccl.changed()); 1432 } 1433 1434 class G1ParFinalCountTask: public AbstractGangTask { 1435 protected: 1436 G1CollectedHeap* _g1h; 1437 CMBitMap* _bm; 1438 size_t _n_workers; 1439 size_t *_live_bytes; 1440 size_t *_used_bytes; 1441 BitMap* _region_bm; 1442 BitMap* _card_bm; 1443 public: 1444 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1445 BitMap* region_bm, BitMap* card_bm) : 1446 AbstractGangTask("G1 final counting"), _g1h(g1h), 1447 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) 1448 { 1449 if (ParallelGCThreads > 0) 1450 _n_workers = _g1h->workers()->total_workers(); 1451 else 1452 _n_workers = 1; 1453 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1454 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1455 } 1456 1457 ~G1ParFinalCountTask() { 1458 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1459 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1460 } 1461 1462 void work(int i) { 1463 CalcLiveObjectsClosure calccl(true /*final*/, 1464 _bm, _g1h->concurrent_mark(), 1465 _region_bm, _card_bm); 1466 calccl.no_yield(); 1467 if (ParallelGCThreads > 0) { 1468 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1469 HeapRegion::FinalCountClaimValue); 1470 } else { 1471 _g1h->heap_region_iterate(&calccl); 1472 } 1473 assert(calccl.complete(), "Shouldn't have yielded!"); 1474 1475 assert((size_t) i < _n_workers, "invariant"); 1476 _live_bytes[i] = calccl.tot_live(); 1477 _used_bytes[i] = calccl.tot_used(); 1478 } 1479 size_t live_bytes() { 1480 size_t live_bytes = 0; 1481 for (size_t i = 0; i < _n_workers; ++i) 1482 live_bytes += _live_bytes[i]; 1483 return live_bytes; 1484 } 1485 size_t used_bytes() { 1486 size_t used_bytes = 0; 1487 for (size_t i = 0; i < _n_workers; ++i) 1488 used_bytes += _used_bytes[i]; 1489 return used_bytes; 1490 } 1491 }; 1492 1493 class G1ParNoteEndTask; 1494 1495 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1496 G1CollectedHeap* _g1; 1497 int _worker_num; 1498 size_t _max_live_bytes; 1499 size_t _regions_claimed; 1500 size_t _freed_bytes; 1501 size_t _cleared_h_regions; 1502 size_t _freed_regions; 1503 UncleanRegionList* _unclean_region_list; 1504 double _claimed_region_time; 1505 double _max_region_time; 1506 1507 public: 1508 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1509 UncleanRegionList* list, 1510 int worker_num); 1511 size_t freed_bytes() { return _freed_bytes; } 1512 size_t cleared_h_regions() { return _cleared_h_regions; } 1513 size_t freed_regions() { return _freed_regions; } 1514 UncleanRegionList* unclean_region_list() { 1515 return _unclean_region_list; 1516 } 1517 1518 bool doHeapRegion(HeapRegion *r); 1519 1520 size_t max_live_bytes() { return _max_live_bytes; } 1521 size_t regions_claimed() { return _regions_claimed; } 1522 double claimed_region_time_sec() { return _claimed_region_time; } 1523 double max_region_time_sec() { return _max_region_time; } 1524 }; 1525 1526 class G1ParNoteEndTask: public AbstractGangTask { 1527 friend class G1NoteEndOfConcMarkClosure; 1528 protected: 1529 G1CollectedHeap* _g1h; 1530 size_t _max_live_bytes; 1531 size_t _freed_bytes; 1532 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; 1533 public: 1534 G1ParNoteEndTask(G1CollectedHeap* g1h, 1535 ConcurrentMark::ParCleanupThreadState** 1536 par_cleanup_thread_state) : 1537 AbstractGangTask("G1 note end"), _g1h(g1h), 1538 _max_live_bytes(0), _freed_bytes(0), 1539 _par_cleanup_thread_state(par_cleanup_thread_state) 1540 {} 1541 1542 void work(int i) { 1543 double start = os::elapsedTime(); 1544 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, 1545 &_par_cleanup_thread_state[i]->list, 1546 i); 1547 if (ParallelGCThreads > 0) { 1548 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1549 HeapRegion::NoteEndClaimValue); 1550 } else { 1551 _g1h->heap_region_iterate(&g1_note_end); 1552 } 1553 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1554 1555 // Now finish up freeing the current thread's regions. 1556 _g1h->finish_free_region_work(g1_note_end.freed_bytes(), 1557 g1_note_end.cleared_h_regions(), 1558 0, NULL); 1559 { 1560 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1561 _max_live_bytes += g1_note_end.max_live_bytes(); 1562 _freed_bytes += g1_note_end.freed_bytes(); 1563 } 1564 double end = os::elapsedTime(); 1565 if (G1PrintParCleanupStats) { 1566 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1567 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1568 i, start, end, (end-start)*1000.0, 1569 g1_note_end.regions_claimed(), 1570 g1_note_end.claimed_region_time_sec()*1000.0, 1571 g1_note_end.max_region_time_sec()*1000.0); 1572 } 1573 } 1574 size_t max_live_bytes() { return _max_live_bytes; } 1575 size_t freed_bytes() { return _freed_bytes; } 1576 }; 1577 1578 class G1ParScrubRemSetTask: public AbstractGangTask { 1579 protected: 1580 G1RemSet* _g1rs; 1581 BitMap* _region_bm; 1582 BitMap* _card_bm; 1583 public: 1584 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1585 BitMap* region_bm, BitMap* card_bm) : 1586 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1587 _region_bm(region_bm), _card_bm(card_bm) 1588 {} 1589 1590 void work(int i) { 1591 if (ParallelGCThreads > 0) { 1592 _g1rs->scrub_par(_region_bm, _card_bm, i, 1593 HeapRegion::ScrubRemSetClaimValue); 1594 } else { 1595 _g1rs->scrub(_region_bm, _card_bm); 1596 } 1597 } 1598 1599 }; 1600 1601 G1NoteEndOfConcMarkClosure:: 1602 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1603 UncleanRegionList* list, 1604 int worker_num) 1605 : _g1(g1), _worker_num(worker_num), 1606 _max_live_bytes(0), _regions_claimed(0), 1607 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), 1608 _claimed_region_time(0.0), _max_region_time(0.0), 1609 _unclean_region_list(list) 1610 {} 1611 1612 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { 1613 // We use a claim value of zero here because all regions 1614 // were claimed with value 1 in the FinalCount task. 1615 r->reset_gc_time_stamp(); 1616 if (!r->continuesHumongous()) { 1617 double start = os::elapsedTime(); 1618 _regions_claimed++; 1619 r->note_end_of_marking(); 1620 _max_live_bytes += r->max_live_bytes(); 1621 _g1->free_region_if_totally_empty_work(r, 1622 _freed_bytes, 1623 _cleared_h_regions, 1624 _freed_regions, 1625 _unclean_region_list, 1626 true /*par*/); 1627 double region_time = (os::elapsedTime() - start); 1628 _claimed_region_time += region_time; 1629 if (region_time > _max_region_time) _max_region_time = region_time; 1630 } 1631 return false; 1632 } 1633 1634 void ConcurrentMark::cleanup() { 1635 // world is stopped at this checkpoint 1636 assert(SafepointSynchronize::is_at_safepoint(), 1637 "world should be stopped"); 1638 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1639 1640 // If a full collection has happened, we shouldn't do this. 1641 if (has_aborted()) { 1642 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1643 return; 1644 } 1645 1646 if (VerifyDuringGC) { 1647 HandleMark hm; // handle scope 1648 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1649 Universe::heap()->prepare_for_verify(); 1650 Universe::verify(/* allow dirty */ true, 1651 /* silent */ false, 1652 /* prev marking */ true); 1653 } 1654 1655 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1656 g1p->record_concurrent_mark_cleanup_start(); 1657 1658 double start = os::elapsedTime(); 1659 1660 // Do counting once more with the world stopped for good measure. 1661 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1662 &_region_bm, &_card_bm); 1663 if (ParallelGCThreads > 0) { 1664 assert(g1h->check_heap_region_claim_values( 1665 HeapRegion::InitialClaimValue), 1666 "sanity check"); 1667 1668 int n_workers = g1h->workers()->total_workers(); 1669 g1h->set_par_threads(n_workers); 1670 g1h->workers()->run_task(&g1_par_count_task); 1671 g1h->set_par_threads(0); 1672 1673 assert(g1h->check_heap_region_claim_values( 1674 HeapRegion::FinalCountClaimValue), 1675 "sanity check"); 1676 } else { 1677 g1_par_count_task.work(0); 1678 } 1679 1680 size_t known_garbage_bytes = 1681 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1682 #if 0 1683 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1684 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1685 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1686 (double) known_garbage_bytes / (double) (1024 * 1024)); 1687 #endif // 0 1688 g1p->set_known_garbage_bytes(known_garbage_bytes); 1689 1690 size_t start_used_bytes = g1h->used(); 1691 _at_least_one_mark_complete = true; 1692 g1h->set_marking_complete(); 1693 1694 double count_end = os::elapsedTime(); 1695 double this_final_counting_time = (count_end - start); 1696 if (G1PrintParCleanupStats) { 1697 gclog_or_tty->print_cr("Cleanup:"); 1698 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1699 this_final_counting_time*1000.0); 1700 } 1701 _total_counting_time += this_final_counting_time; 1702 1703 // Install newly created mark bitMap as "prev". 1704 swapMarkBitMaps(); 1705 1706 g1h->reset_gc_time_stamp(); 1707 1708 // Note end of marking in all heap regions. 1709 double note_end_start = os::elapsedTime(); 1710 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); 1711 if (ParallelGCThreads > 0) { 1712 int n_workers = g1h->workers()->total_workers(); 1713 g1h->set_par_threads(n_workers); 1714 g1h->workers()->run_task(&g1_par_note_end_task); 1715 g1h->set_par_threads(0); 1716 1717 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1718 "sanity check"); 1719 } else { 1720 g1_par_note_end_task.work(0); 1721 } 1722 g1h->set_unclean_regions_coming(true); 1723 double note_end_end = os::elapsedTime(); 1724 // Tell the mutators that there might be unclean regions coming... 1725 if (G1PrintParCleanupStats) { 1726 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1727 (note_end_end - note_end_start)*1000.0); 1728 } 1729 1730 1731 // call below, since it affects the metric by which we sort the heap 1732 // regions. 1733 if (G1ScrubRemSets) { 1734 double rs_scrub_start = os::elapsedTime(); 1735 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1736 if (ParallelGCThreads > 0) { 1737 int n_workers = g1h->workers()->total_workers(); 1738 g1h->set_par_threads(n_workers); 1739 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1740 g1h->set_par_threads(0); 1741 1742 assert(g1h->check_heap_region_claim_values( 1743 HeapRegion::ScrubRemSetClaimValue), 1744 "sanity check"); 1745 } else { 1746 g1_par_scrub_rs_task.work(0); 1747 } 1748 1749 double rs_scrub_end = os::elapsedTime(); 1750 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1751 _total_rs_scrub_time += this_rs_scrub_time; 1752 } 1753 1754 // this will also free any regions totally full of garbage objects, 1755 // and sort the regions. 1756 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1757 g1_par_note_end_task.freed_bytes(), 1758 g1_par_note_end_task.max_live_bytes()); 1759 1760 // Statistics. 1761 double end = os::elapsedTime(); 1762 _cleanup_times.add((end - start) * 1000.0); 1763 1764 // G1CollectedHeap::heap()->print(); 1765 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1766 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1767 1768 if (PrintGC || PrintGCDetails) { 1769 g1h->print_size_transition(gclog_or_tty, 1770 start_used_bytes, 1771 g1h->used(), 1772 g1h->capacity()); 1773 } 1774 1775 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1776 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1777 1778 // We need to make this be a "collection" so any collection pause that 1779 // races with it goes around and waits for completeCleanup to finish. 1780 g1h->increment_total_collections(); 1781 1782 if (VerifyDuringGC) { 1783 HandleMark hm; // handle scope 1784 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1785 Universe::heap()->prepare_for_verify(); 1786 Universe::verify(/* allow dirty */ true, 1787 /* silent */ false, 1788 /* prev marking */ true); 1789 } 1790 } 1791 1792 void ConcurrentMark::completeCleanup() { 1793 // A full collection intervened. 1794 if (has_aborted()) return; 1795 1796 int first = 0; 1797 int last = (int)MAX2(ParallelGCThreads, (size_t)1); 1798 for (int t = 0; t < last; t++) { 1799 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; 1800 assert(list->well_formed(), "Inv"); 1801 HeapRegion* hd = list->hd(); 1802 while (hd != NULL) { 1803 // Now finish up the other stuff. 1804 hd->rem_set()->clear(); 1805 HeapRegion* next_hd = hd->next_from_unclean_list(); 1806 (void)list->pop(); 1807 assert(list->hd() == next_hd, "how not?"); 1808 _g1h->put_region_on_unclean_list(hd); 1809 if (!hd->isHumongous()) { 1810 // Add this to the _free_regions count by 1. 1811 _g1h->finish_free_region_work(0, 0, 1, NULL); 1812 } 1813 hd = list->hd(); 1814 assert(hd == next_hd, "how not?"); 1815 } 1816 } 1817 } 1818 1819 1820 class G1CMIsAliveClosure: public BoolObjectClosure { 1821 G1CollectedHeap* _g1; 1822 public: 1823 G1CMIsAliveClosure(G1CollectedHeap* g1) : 1824 _g1(g1) 1825 {} 1826 1827 void do_object(oop obj) { 1828 assert(false, "not to be invoked"); 1829 } 1830 bool do_object_b(oop obj) { 1831 HeapWord* addr = (HeapWord*)obj; 1832 return addr != NULL && 1833 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1834 } 1835 }; 1836 1837 class G1CMKeepAliveClosure: public OopClosure { 1838 G1CollectedHeap* _g1; 1839 ConcurrentMark* _cm; 1840 CMBitMap* _bitMap; 1841 public: 1842 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1843 CMBitMap* bitMap) : 1844 _g1(g1), _cm(cm), 1845 _bitMap(bitMap) {} 1846 1847 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1848 virtual void do_oop( oop* p) { do_oop_work(p); } 1849 1850 template <class T> void do_oop_work(T* p) { 1851 oop thisOop = oopDesc::load_decode_heap_oop(p); 1852 HeapWord* addr = (HeapWord*)thisOop; 1853 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { 1854 _bitMap->mark(addr); 1855 _cm->mark_stack_push(thisOop); 1856 } 1857 } 1858 }; 1859 1860 class G1CMDrainMarkingStackClosure: public VoidClosure { 1861 CMMarkStack* _markStack; 1862 CMBitMap* _bitMap; 1863 G1CMKeepAliveClosure* _oopClosure; 1864 public: 1865 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 1866 G1CMKeepAliveClosure* oopClosure) : 1867 _bitMap(bitMap), 1868 _markStack(markStack), 1869 _oopClosure(oopClosure) 1870 {} 1871 1872 void do_void() { 1873 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 1874 } 1875 }; 1876 1877 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1878 ResourceMark rm; 1879 HandleMark hm; 1880 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1881 ReferenceProcessor* rp = g1h->ref_processor(); 1882 1883 // Process weak references. 1884 rp->setup_policy(clear_all_soft_refs); 1885 assert(_markStack.isEmpty(), "mark stack should be empty"); 1886 1887 G1CMIsAliveClosure g1IsAliveClosure (g1h); 1888 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap()); 1889 G1CMDrainMarkingStackClosure 1890 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, 1891 &g1KeepAliveClosure); 1892 1893 // XXXYYY Also: copy the parallel ref processing code from CMS. 1894 rp->process_discovered_references(&g1IsAliveClosure, 1895 &g1KeepAliveClosure, 1896 &g1DrainMarkingStackClosure, 1897 NULL); 1898 assert(_markStack.overflow() || _markStack.isEmpty(), 1899 "mark stack should be empty (unless it overflowed)"); 1900 if (_markStack.overflow()) { 1901 set_has_overflown(); 1902 } 1903 1904 rp->enqueue_discovered_references(); 1905 rp->verify_no_references_recorded(); 1906 assert(!rp->discovery_enabled(), "should have been disabled"); 1907 1908 // Now clean up stale oops in SymbolTable and StringTable 1909 SymbolTable::unlink(&g1IsAliveClosure); 1910 StringTable::unlink(&g1IsAliveClosure); 1911 } 1912 1913 void ConcurrentMark::swapMarkBitMaps() { 1914 CMBitMapRO* temp = _prevMarkBitMap; 1915 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 1916 _nextMarkBitMap = (CMBitMap*) temp; 1917 } 1918 1919 class CMRemarkTask: public AbstractGangTask { 1920 private: 1921 ConcurrentMark *_cm; 1922 1923 public: 1924 void work(int worker_i) { 1925 // Since all available tasks are actually started, we should 1926 // only proceed if we're supposed to be actived. 1927 if ((size_t)worker_i < _cm->active_tasks()) { 1928 CMTask* task = _cm->task(worker_i); 1929 task->record_start_time(); 1930 do { 1931 task->do_marking_step(1000000000.0 /* something very large */); 1932 } while (task->has_aborted() && !_cm->has_overflown()); 1933 // If we overflow, then we do not want to restart. We instead 1934 // want to abort remark and do concurrent marking again. 1935 task->record_end_time(); 1936 } 1937 } 1938 1939 CMRemarkTask(ConcurrentMark* cm) : 1940 AbstractGangTask("Par Remark"), _cm(cm) { } 1941 }; 1942 1943 void ConcurrentMark::checkpointRootsFinalWork() { 1944 ResourceMark rm; 1945 HandleMark hm; 1946 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1947 1948 g1h->ensure_parsability(false); 1949 1950 if (ParallelGCThreads > 0) { 1951 G1CollectedHeap::StrongRootsScope srs(g1h); 1952 // this is remark, so we'll use up all available threads 1953 int active_workers = ParallelGCThreads; 1954 set_phase(active_workers, false); 1955 1956 CMRemarkTask remarkTask(this); 1957 // We will start all available threads, even if we decide that the 1958 // active_workers will be fewer. The extra ones will just bail out 1959 // immediately. 1960 int n_workers = g1h->workers()->total_workers(); 1961 g1h->set_par_threads(n_workers); 1962 g1h->workers()->run_task(&remarkTask); 1963 g1h->set_par_threads(0); 1964 } else { 1965 G1CollectedHeap::StrongRootsScope srs(g1h); 1966 // this is remark, so we'll use up all available threads 1967 int active_workers = 1; 1968 set_phase(active_workers, false); 1969 1970 CMRemarkTask remarkTask(this); 1971 // We will start all available threads, even if we decide that the 1972 // active_workers will be fewer. The extra ones will just bail out 1973 // immediately. 1974 remarkTask.work(0); 1975 } 1976 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1977 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 1978 1979 print_stats(); 1980 1981 if (!restart_for_overflow()) 1982 set_non_marking_state(); 1983 1984 #if VERIFY_OBJS_PROCESSED 1985 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 1986 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 1987 _scan_obj_cl.objs_processed, 1988 ThreadLocalObjQueue::objs_enqueued); 1989 guarantee(_scan_obj_cl.objs_processed == 1990 ThreadLocalObjQueue::objs_enqueued, 1991 "Different number of objs processed and enqueued."); 1992 } 1993 #endif 1994 } 1995 1996 #ifndef PRODUCT 1997 1998 class PrintReachableOopClosure: public OopClosure { 1999 private: 2000 G1CollectedHeap* _g1h; 2001 CMBitMapRO* _bitmap; 2002 outputStream* _out; 2003 bool _use_prev_marking; 2004 bool _all; 2005 2006 public: 2007 PrintReachableOopClosure(CMBitMapRO* bitmap, 2008 outputStream* out, 2009 bool use_prev_marking, 2010 bool all) : 2011 _g1h(G1CollectedHeap::heap()), 2012 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2013 2014 void do_oop(narrowOop* p) { do_oop_work(p); } 2015 void do_oop( oop* p) { do_oop_work(p); } 2016 2017 template <class T> void do_oop_work(T* p) { 2018 oop obj = oopDesc::load_decode_heap_oop(p); 2019 const char* str = NULL; 2020 const char* str2 = ""; 2021 2022 if (obj == NULL) { 2023 str = ""; 2024 } else if (!_g1h->is_in_g1_reserved(obj)) { 2025 str = " O"; 2026 } else { 2027 HeapRegion* hr = _g1h->heap_region_containing(obj); 2028 guarantee(hr != NULL, "invariant"); 2029 bool over_tams = false; 2030 if (_use_prev_marking) { 2031 over_tams = hr->obj_allocated_since_prev_marking(obj); 2032 } else { 2033 over_tams = hr->obj_allocated_since_next_marking(obj); 2034 } 2035 bool marked = _bitmap->isMarked((HeapWord*) obj); 2036 2037 if (over_tams) { 2038 str = " >"; 2039 if (marked) { 2040 str2 = " AND MARKED"; 2041 } 2042 } else if (marked) { 2043 str = " M"; 2044 } else { 2045 str = " NOT"; 2046 } 2047 } 2048 2049 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2050 p, (void*) obj, str, str2); 2051 } 2052 }; 2053 2054 class PrintReachableObjectClosure : public ObjectClosure { 2055 private: 2056 CMBitMapRO* _bitmap; 2057 outputStream* _out; 2058 bool _use_prev_marking; 2059 bool _all; 2060 HeapRegion* _hr; 2061 2062 public: 2063 PrintReachableObjectClosure(CMBitMapRO* bitmap, 2064 outputStream* out, 2065 bool use_prev_marking, 2066 bool all, 2067 HeapRegion* hr) : 2068 _bitmap(bitmap), _out(out), 2069 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { } 2070 2071 void do_object(oop o) { 2072 bool over_tams; 2073 if (_use_prev_marking) { 2074 over_tams = _hr->obj_allocated_since_prev_marking(o); 2075 } else { 2076 over_tams = _hr->obj_allocated_since_next_marking(o); 2077 } 2078 bool marked = _bitmap->isMarked((HeapWord*) o); 2079 bool print_it = _all || over_tams || marked; 2080 2081 if (print_it) { 2082 _out->print_cr(" "PTR_FORMAT"%s", 2083 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2084 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all); 2085 o->oop_iterate(&oopCl); 2086 } 2087 } 2088 }; 2089 2090 class PrintReachableRegionClosure : public HeapRegionClosure { 2091 private: 2092 CMBitMapRO* _bitmap; 2093 outputStream* _out; 2094 bool _use_prev_marking; 2095 bool _all; 2096 2097 public: 2098 bool doHeapRegion(HeapRegion* hr) { 2099 HeapWord* b = hr->bottom(); 2100 HeapWord* e = hr->end(); 2101 HeapWord* t = hr->top(); 2102 HeapWord* p = NULL; 2103 if (_use_prev_marking) { 2104 p = hr->prev_top_at_mark_start(); 2105 } else { 2106 p = hr->next_top_at_mark_start(); 2107 } 2108 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2109 "TAMS: "PTR_FORMAT, b, e, t, p); 2110 _out->cr(); 2111 2112 HeapWord* from = b; 2113 HeapWord* to = t; 2114 2115 if (to > from) { 2116 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2117 _out->cr(); 2118 PrintReachableObjectClosure ocl(_bitmap, _out, 2119 _use_prev_marking, _all, hr); 2120 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2121 _out->cr(); 2122 } 2123 2124 return false; 2125 } 2126 2127 PrintReachableRegionClosure(CMBitMapRO* bitmap, 2128 outputStream* out, 2129 bool use_prev_marking, 2130 bool all) : 2131 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { } 2132 }; 2133 2134 void ConcurrentMark::print_reachable(const char* str, 2135 bool use_prev_marking, 2136 bool all) { 2137 gclog_or_tty->cr(); 2138 gclog_or_tty->print_cr("== Doing heap dump... "); 2139 2140 if (G1PrintReachableBaseFile == NULL) { 2141 gclog_or_tty->print_cr(" #### error: no base file defined"); 2142 return; 2143 } 2144 2145 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2146 (JVM_MAXPATHLEN - 1)) { 2147 gclog_or_tty->print_cr(" #### error: file name too long"); 2148 return; 2149 } 2150 2151 char file_name[JVM_MAXPATHLEN]; 2152 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2153 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2154 2155 fileStream fout(file_name); 2156 if (!fout.is_open()) { 2157 gclog_or_tty->print_cr(" #### error: could not open file"); 2158 return; 2159 } 2160 2161 outputStream* out = &fout; 2162 2163 CMBitMapRO* bitmap = NULL; 2164 if (use_prev_marking) { 2165 bitmap = _prevMarkBitMap; 2166 } else { 2167 bitmap = _nextMarkBitMap; 2168 } 2169 2170 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS"); 2171 out->cr(); 2172 2173 out->print_cr("--- ITERATING OVER REGIONS"); 2174 out->cr(); 2175 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all); 2176 _g1h->heap_region_iterate(&rcl); 2177 out->cr(); 2178 2179 gclog_or_tty->print_cr(" done"); 2180 gclog_or_tty->flush(); 2181 } 2182 2183 #endif // PRODUCT 2184 2185 // This note is for drainAllSATBBuffers and the code in between. 2186 // In the future we could reuse a task to do this work during an 2187 // evacuation pause (since now tasks are not active and can be claimed 2188 // during an evacuation pause). This was a late change to the code and 2189 // is currently not being taken advantage of. 2190 2191 class CMGlobalObjectClosure : public ObjectClosure { 2192 private: 2193 ConcurrentMark* _cm; 2194 2195 public: 2196 void do_object(oop obj) { 2197 _cm->deal_with_reference(obj); 2198 } 2199 2200 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2201 }; 2202 2203 void ConcurrentMark::deal_with_reference(oop obj) { 2204 if (verbose_high()) 2205 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2206 (void*) obj); 2207 2208 2209 HeapWord* objAddr = (HeapWord*) obj; 2210 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2211 if (_g1h->is_in_g1_reserved(objAddr)) { 2212 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 2213 HeapRegion* hr = _g1h->heap_region_containing(obj); 2214 if (_g1h->is_obj_ill(obj, hr)) { 2215 if (verbose_high()) 2216 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2217 "marked", (void*) obj); 2218 2219 // we need to mark it first 2220 if (_nextMarkBitMap->parMark(objAddr)) { 2221 // No OrderAccess:store_load() is needed. It is implicit in the 2222 // CAS done in parMark(objAddr) above 2223 HeapWord* finger = _finger; 2224 if (objAddr < finger) { 2225 if (verbose_high()) 2226 gclog_or_tty->print_cr("[global] below the global finger " 2227 "("PTR_FORMAT"), pushing it", finger); 2228 if (!mark_stack_push(obj)) { 2229 if (verbose_low()) 2230 gclog_or_tty->print_cr("[global] global stack overflow during " 2231 "deal_with_reference"); 2232 } 2233 } 2234 } 2235 } 2236 } 2237 } 2238 2239 void ConcurrentMark::drainAllSATBBuffers() { 2240 CMGlobalObjectClosure oc(this); 2241 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2242 satb_mq_set.set_closure(&oc); 2243 2244 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2245 if (verbose_medium()) 2246 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2247 } 2248 2249 // no need to check whether we should do this, as this is only 2250 // called during an evacuation pause 2251 satb_mq_set.iterate_closure_all_threads(); 2252 2253 satb_mq_set.set_closure(NULL); 2254 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2255 } 2256 2257 void ConcurrentMark::markPrev(oop p) { 2258 // Note we are overriding the read-only view of the prev map here, via 2259 // the cast. 2260 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2261 } 2262 2263 void ConcurrentMark::clear(oop p) { 2264 assert(p != NULL && p->is_oop(), "expected an oop"); 2265 HeapWord* addr = (HeapWord*)p; 2266 assert(addr >= _nextMarkBitMap->startWord() || 2267 addr < _nextMarkBitMap->endWord(), "in a region"); 2268 2269 _nextMarkBitMap->clear(addr); 2270 } 2271 2272 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2273 // Note we are overriding the read-only view of the prev map here, via 2274 // the cast. 2275 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2276 _nextMarkBitMap->clearRange(mr); 2277 } 2278 2279 HeapRegion* 2280 ConcurrentMark::claim_region(int task_num) { 2281 // "checkpoint" the finger 2282 HeapWord* finger = _finger; 2283 2284 // _heap_end will not change underneath our feet; it only changes at 2285 // yield points. 2286 while (finger < _heap_end) { 2287 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2288 2289 // is the gap between reading the finger and doing the CAS too long? 2290 2291 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2292 HeapWord* bottom = curr_region->bottom(); 2293 HeapWord* end = curr_region->end(); 2294 HeapWord* limit = curr_region->next_top_at_mark_start(); 2295 2296 if (verbose_low()) 2297 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2298 "["PTR_FORMAT", "PTR_FORMAT"), " 2299 "limit = "PTR_FORMAT, 2300 task_num, curr_region, bottom, end, limit); 2301 2302 HeapWord* res = 2303 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2304 if (res == finger) { 2305 // we succeeded 2306 2307 // notice that _finger == end cannot be guaranteed here since, 2308 // someone else might have moved the finger even further 2309 assert(_finger >= end, "the finger should have moved forward"); 2310 2311 if (verbose_low()) 2312 gclog_or_tty->print_cr("[%d] we were successful with region = " 2313 PTR_FORMAT, task_num, curr_region); 2314 2315 if (limit > bottom) { 2316 if (verbose_low()) 2317 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2318 "returning it ", task_num, curr_region); 2319 return curr_region; 2320 } else { 2321 assert(limit == bottom, 2322 "the region limit should be at bottom"); 2323 if (verbose_low()) 2324 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2325 "returning NULL", task_num, curr_region); 2326 // we return NULL and the caller should try calling 2327 // claim_region() again. 2328 return NULL; 2329 } 2330 } else { 2331 assert(_finger > finger, "the finger should have moved forward"); 2332 if (verbose_low()) 2333 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2334 "global finger = "PTR_FORMAT", " 2335 "our finger = "PTR_FORMAT, 2336 task_num, _finger, finger); 2337 2338 // read it again 2339 finger = _finger; 2340 } 2341 } 2342 2343 return NULL; 2344 } 2345 2346 void ConcurrentMark::oops_do(OopClosure* cl) { 2347 if (_markStack.size() > 0 && verbose_low()) 2348 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2349 "size = %d", _markStack.size()); 2350 // we first iterate over the contents of the mark stack... 2351 _markStack.oops_do(cl); 2352 2353 for (int i = 0; i < (int)_max_task_num; ++i) { 2354 OopTaskQueue* queue = _task_queues->queue((int)i); 2355 2356 if (queue->size() > 0 && verbose_low()) 2357 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2358 "size = %d", i, queue->size()); 2359 2360 // ...then over the contents of the all the task queues. 2361 queue->oops_do(cl); 2362 } 2363 2364 // finally, invalidate any entries that in the region stack that 2365 // point into the collection set 2366 if (_regionStack.invalidate_entries_into_cset()) { 2367 // otherwise, any gray objects copied during the evacuation pause 2368 // might not be visited. 2369 assert(_should_gray_objects, "invariant"); 2370 } 2371 } 2372 2373 void ConcurrentMark::clear_marking_state() { 2374 _markStack.setEmpty(); 2375 _markStack.clear_overflow(); 2376 _regionStack.setEmpty(); 2377 _regionStack.clear_overflow(); 2378 clear_has_overflown(); 2379 _finger = _heap_start; 2380 2381 for (int i = 0; i < (int)_max_task_num; ++i) { 2382 OopTaskQueue* queue = _task_queues->queue(i); 2383 queue->set_empty(); 2384 } 2385 } 2386 2387 void ConcurrentMark::print_stats() { 2388 if (verbose_stats()) { 2389 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2390 for (size_t i = 0; i < _active_tasks; ++i) { 2391 _tasks[i]->print_stats(); 2392 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2393 } 2394 } 2395 } 2396 2397 class CSMarkOopClosure: public OopClosure { 2398 friend class CSMarkBitMapClosure; 2399 2400 G1CollectedHeap* _g1h; 2401 CMBitMap* _bm; 2402 ConcurrentMark* _cm; 2403 oop* _ms; 2404 jint* _array_ind_stack; 2405 int _ms_size; 2406 int _ms_ind; 2407 int _array_increment; 2408 2409 bool push(oop obj, int arr_ind = 0) { 2410 if (_ms_ind == _ms_size) { 2411 gclog_or_tty->print_cr("Mark stack is full."); 2412 return false; 2413 } 2414 _ms[_ms_ind] = obj; 2415 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; 2416 _ms_ind++; 2417 return true; 2418 } 2419 2420 oop pop() { 2421 if (_ms_ind == 0) return NULL; 2422 else { 2423 _ms_ind--; 2424 return _ms[_ms_ind]; 2425 } 2426 } 2427 2428 template <class T> bool drain() { 2429 while (_ms_ind > 0) { 2430 oop obj = pop(); 2431 assert(obj != NULL, "Since index was non-zero."); 2432 if (obj->is_objArray()) { 2433 jint arr_ind = _array_ind_stack[_ms_ind]; 2434 objArrayOop aobj = objArrayOop(obj); 2435 jint len = aobj->length(); 2436 jint next_arr_ind = arr_ind + _array_increment; 2437 if (next_arr_ind < len) { 2438 push(obj, next_arr_ind); 2439 } 2440 // Now process this portion of this one. 2441 int lim = MIN2(next_arr_ind, len); 2442 for (int j = arr_ind; j < lim; j++) { 2443 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2444 } 2445 2446 } else { 2447 obj->oop_iterate(this); 2448 } 2449 if (abort()) return false; 2450 } 2451 return true; 2452 } 2453 2454 public: 2455 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2456 _g1h(G1CollectedHeap::heap()), 2457 _cm(cm), 2458 _bm(cm->nextMarkBitMap()), 2459 _ms_size(ms_size), _ms_ind(0), 2460 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2461 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2462 _array_increment(MAX2(ms_size/8, 16)) 2463 {} 2464 2465 ~CSMarkOopClosure() { 2466 FREE_C_HEAP_ARRAY(oop, _ms); 2467 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2468 } 2469 2470 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2471 virtual void do_oop( oop* p) { do_oop_work(p); } 2472 2473 template <class T> void do_oop_work(T* p) { 2474 T heap_oop = oopDesc::load_heap_oop(p); 2475 if (oopDesc::is_null(heap_oop)) return; 2476 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2477 if (obj->is_forwarded()) { 2478 // If the object has already been forwarded, we have to make sure 2479 // that it's marked. So follow the forwarding pointer. Note that 2480 // this does the right thing for self-forwarding pointers in the 2481 // evacuation failure case. 2482 obj = obj->forwardee(); 2483 } 2484 HeapRegion* hr = _g1h->heap_region_containing(obj); 2485 if (hr != NULL) { 2486 if (hr->in_collection_set()) { 2487 if (_g1h->is_obj_ill(obj)) { 2488 _bm->mark((HeapWord*)obj); 2489 if (!push(obj)) { 2490 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 2491 set_abort(); 2492 } 2493 } 2494 } else { 2495 // Outside the collection set; we need to gray it 2496 _cm->deal_with_reference(obj); 2497 } 2498 } 2499 } 2500 }; 2501 2502 class CSMarkBitMapClosure: public BitMapClosure { 2503 G1CollectedHeap* _g1h; 2504 CMBitMap* _bitMap; 2505 ConcurrentMark* _cm; 2506 CSMarkOopClosure _oop_cl; 2507 public: 2508 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 2509 _g1h(G1CollectedHeap::heap()), 2510 _bitMap(cm->nextMarkBitMap()), 2511 _oop_cl(cm, ms_size) 2512 {} 2513 2514 ~CSMarkBitMapClosure() {} 2515 2516 bool do_bit(size_t offset) { 2517 // convert offset into a HeapWord* 2518 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 2519 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 2520 "address out of range"); 2521 assert(_bitMap->isMarked(addr), "tautology"); 2522 oop obj = oop(addr); 2523 if (!obj->is_forwarded()) { 2524 if (!_oop_cl.push(obj)) return false; 2525 if (UseCompressedOops) { 2526 if (!_oop_cl.drain<narrowOop>()) return false; 2527 } else { 2528 if (!_oop_cl.drain<oop>()) return false; 2529 } 2530 } 2531 // Otherwise... 2532 return true; 2533 } 2534 }; 2535 2536 2537 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 2538 CMBitMap* _bm; 2539 CSMarkBitMapClosure _bit_cl; 2540 enum SomePrivateConstants { 2541 MSSize = 1000 2542 }; 2543 bool _completed; 2544 public: 2545 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 2546 _bm(cm->nextMarkBitMap()), 2547 _bit_cl(cm, MSSize), 2548 _completed(true) 2549 {} 2550 2551 ~CompleteMarkingInCSHRClosure() {} 2552 2553 bool doHeapRegion(HeapRegion* r) { 2554 if (!r->evacuation_failed()) { 2555 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 2556 if (!mr.is_empty()) { 2557 if (!_bm->iterate(&_bit_cl, mr)) { 2558 _completed = false; 2559 return true; 2560 } 2561 } 2562 } 2563 return false; 2564 } 2565 2566 bool completed() { return _completed; } 2567 }; 2568 2569 class ClearMarksInHRClosure: public HeapRegionClosure { 2570 CMBitMap* _bm; 2571 public: 2572 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 2573 2574 bool doHeapRegion(HeapRegion* r) { 2575 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 2576 MemRegion usedMR = r->used_region(); 2577 _bm->clearRange(r->used_region()); 2578 } 2579 return false; 2580 } 2581 }; 2582 2583 void ConcurrentMark::complete_marking_in_collection_set() { 2584 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2585 2586 if (!g1h->mark_in_progress()) { 2587 g1h->g1_policy()->record_mark_closure_time(0.0); 2588 return; 2589 } 2590 2591 int i = 1; 2592 double start = os::elapsedTime(); 2593 while (true) { 2594 i++; 2595 CompleteMarkingInCSHRClosure cmplt(this); 2596 g1h->collection_set_iterate(&cmplt); 2597 if (cmplt.completed()) break; 2598 } 2599 double end_time = os::elapsedTime(); 2600 double elapsed_time_ms = (end_time - start) * 1000.0; 2601 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 2602 2603 ClearMarksInHRClosure clr(nextMarkBitMap()); 2604 g1h->collection_set_iterate(&clr); 2605 } 2606 2607 // The next two methods deal with the following optimisation. Some 2608 // objects are gray by being marked and located above the finger. If 2609 // they are copied, during an evacuation pause, below the finger then 2610 // the need to be pushed on the stack. The observation is that, if 2611 // there are no regions in the collection set located above the 2612 // finger, then the above cannot happen, hence we do not need to 2613 // explicitly gray any objects when copying them to below the 2614 // finger. The global stack will be scanned to ensure that, if it 2615 // points to objects being copied, it will update their 2616 // location. There is a tricky situation with the gray objects in 2617 // region stack that are being coped, however. See the comment in 2618 // newCSet(). 2619 2620 void ConcurrentMark::newCSet() { 2621 if (!concurrent_marking_in_progress()) 2622 // nothing to do if marking is not in progress 2623 return; 2624 2625 // find what the lowest finger is among the global and local fingers 2626 _min_finger = _finger; 2627 for (int i = 0; i < (int)_max_task_num; ++i) { 2628 CMTask* task = _tasks[i]; 2629 HeapWord* task_finger = task->finger(); 2630 if (task_finger != NULL && task_finger < _min_finger) 2631 _min_finger = task_finger; 2632 } 2633 2634 _should_gray_objects = false; 2635 2636 // This fixes a very subtle and fustrating bug. It might be the case 2637 // that, during en evacuation pause, heap regions that contain 2638 // objects that are gray (by being in regions contained in the 2639 // region stack) are included in the collection set. Since such gray 2640 // objects will be moved, and because it's not easy to redirect 2641 // region stack entries to point to a new location (because objects 2642 // in one region might be scattered to multiple regions after they 2643 // are copied), one option is to ensure that all marked objects 2644 // copied during a pause are pushed on the stack. Notice, however, 2645 // that this problem can only happen when the region stack is not 2646 // empty during an evacuation pause. So, we make the fix a bit less 2647 // conservative and ensure that regions are pushed on the stack, 2648 // irrespective whether all collection set regions are below the 2649 // finger, if the region stack is not empty. This is expected to be 2650 // a rare case, so I don't think it's necessary to be smarted about it. 2651 if (!region_stack_empty()) 2652 _should_gray_objects = true; 2653 } 2654 2655 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 2656 if (!concurrent_marking_in_progress()) 2657 return; 2658 2659 HeapWord* region_end = hr->end(); 2660 if (region_end > _min_finger) 2661 _should_gray_objects = true; 2662 } 2663 2664 // abandon current marking iteration due to a Full GC 2665 void ConcurrentMark::abort() { 2666 // Clear all marks to force marking thread to do nothing 2667 _nextMarkBitMap->clearAll(); 2668 // Empty mark stack 2669 clear_marking_state(); 2670 for (int i = 0; i < (int)_max_task_num; ++i) 2671 _tasks[i]->clear_region_fields(); 2672 _has_aborted = true; 2673 2674 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2675 satb_mq_set.abandon_partial_marking(); 2676 // This can be called either during or outside marking, we'll read 2677 // the expected_active value from the SATB queue set. 2678 satb_mq_set.set_active_all_threads( 2679 false, /* new active value */ 2680 satb_mq_set.is_active() /* expected_active */); 2681 } 2682 2683 static void print_ms_time_info(const char* prefix, const char* name, 2684 NumberSeq& ns) { 2685 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2686 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2687 if (ns.num() > 0) { 2688 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2689 prefix, ns.sd(), ns.maximum()); 2690 } 2691 } 2692 2693 void ConcurrentMark::print_summary_info() { 2694 gclog_or_tty->print_cr(" Concurrent marking:"); 2695 print_ms_time_info(" ", "init marks", _init_times); 2696 print_ms_time_info(" ", "remarks", _remark_times); 2697 { 2698 print_ms_time_info(" ", "final marks", _remark_mark_times); 2699 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2700 2701 } 2702 print_ms_time_info(" ", "cleanups", _cleanup_times); 2703 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2704 _total_counting_time, 2705 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2706 (double)_cleanup_times.num() 2707 : 0.0)); 2708 if (G1ScrubRemSets) { 2709 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2710 _total_rs_scrub_time, 2711 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2712 (double)_cleanup_times.num() 2713 : 0.0)); 2714 } 2715 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2716 (_init_times.sum() + _remark_times.sum() + 2717 _cleanup_times.sum())/1000.0); 2718 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2719 "(%8.2f s marking, %8.2f s counting).", 2720 cmThread()->vtime_accum(), 2721 cmThread()->vtime_mark_accum(), 2722 cmThread()->vtime_count_accum()); 2723 } 2724 2725 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2726 _parallel_workers->print_worker_threads_on(st); 2727 } 2728 2729 // Closures 2730 // XXX: there seems to be a lot of code duplication here; 2731 // should refactor and consolidate the shared code. 2732 2733 // This closure is used to mark refs into the CMS generation in 2734 // the CMS bit map. Called at the first checkpoint. 2735 2736 // We take a break if someone is trying to stop the world. 2737 bool ConcurrentMark::do_yield_check(int worker_i) { 2738 if (should_yield()) { 2739 if (worker_i == 0) 2740 _g1h->g1_policy()->record_concurrent_pause(); 2741 cmThread()->yield(); 2742 if (worker_i == 0) 2743 _g1h->g1_policy()->record_concurrent_pause_end(); 2744 return true; 2745 } else { 2746 return false; 2747 } 2748 } 2749 2750 bool ConcurrentMark::should_yield() { 2751 return cmThread()->should_yield(); 2752 } 2753 2754 bool ConcurrentMark::containing_card_is_marked(void* p) { 2755 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 2756 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 2757 } 2758 2759 bool ConcurrentMark::containing_cards_are_marked(void* start, 2760 void* last) { 2761 return 2762 containing_card_is_marked(start) && 2763 containing_card_is_marked(last); 2764 } 2765 2766 #ifndef PRODUCT 2767 // for debugging purposes 2768 void ConcurrentMark::print_finger() { 2769 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 2770 _heap_start, _heap_end, _finger); 2771 for (int i = 0; i < (int) _max_task_num; ++i) { 2772 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 2773 } 2774 gclog_or_tty->print_cr(""); 2775 } 2776 #endif 2777 2778 // Closure for iteration over bitmaps 2779 class CMBitMapClosure : public BitMapClosure { 2780 private: 2781 // the bitmap that is being iterated over 2782 CMBitMap* _nextMarkBitMap; 2783 ConcurrentMark* _cm; 2784 CMTask* _task; 2785 // true if we're scanning a heap region claimed by the task (so that 2786 // we move the finger along), false if we're not, i.e. currently when 2787 // scanning a heap region popped from the region stack (so that we 2788 // do not move the task finger along; it'd be a mistake if we did so). 2789 bool _scanning_heap_region; 2790 2791 public: 2792 CMBitMapClosure(CMTask *task, 2793 ConcurrentMark* cm, 2794 CMBitMap* nextMarkBitMap) 2795 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2796 2797 void set_scanning_heap_region(bool scanning_heap_region) { 2798 _scanning_heap_region = scanning_heap_region; 2799 } 2800 2801 bool do_bit(size_t offset) { 2802 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2803 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2804 assert( addr < _cm->finger(), "invariant"); 2805 2806 if (_scanning_heap_region) { 2807 statsOnly( _task->increase_objs_found_on_bitmap() ); 2808 assert(addr >= _task->finger(), "invariant"); 2809 // We move that task's local finger along. 2810 _task->move_finger_to(addr); 2811 } else { 2812 // We move the task's region finger along. 2813 _task->move_region_finger_to(addr); 2814 } 2815 2816 _task->scan_object(oop(addr)); 2817 // we only partially drain the local queue and global stack 2818 _task->drain_local_queue(true); 2819 _task->drain_global_stack(true); 2820 2821 // if the has_aborted flag has been raised, we need to bail out of 2822 // the iteration 2823 return !_task->has_aborted(); 2824 } 2825 }; 2826 2827 // Closure for iterating over objects, currently only used for 2828 // processing SATB buffers. 2829 class CMObjectClosure : public ObjectClosure { 2830 private: 2831 CMTask* _task; 2832 2833 public: 2834 void do_object(oop obj) { 2835 _task->deal_with_reference(obj); 2836 } 2837 2838 CMObjectClosure(CMTask* task) : _task(task) { } 2839 }; 2840 2841 // Closure for iterating over object fields 2842 class CMOopClosure : public OopClosure { 2843 private: 2844 G1CollectedHeap* _g1h; 2845 ConcurrentMark* _cm; 2846 CMTask* _task; 2847 2848 public: 2849 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2850 virtual void do_oop( oop* p) { do_oop_work(p); } 2851 2852 template <class T> void do_oop_work(T* p) { 2853 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant"); 2854 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), 2855 "invariant"); 2856 2857 oop obj = oopDesc::load_decode_heap_oop(p); 2858 if (_cm->verbose_high()) 2859 gclog_or_tty->print_cr("[%d] we're looking at location " 2860 "*"PTR_FORMAT" = "PTR_FORMAT, 2861 _task->task_id(), p, (void*) obj); 2862 _task->deal_with_reference(obj); 2863 } 2864 2865 CMOopClosure(G1CollectedHeap* g1h, 2866 ConcurrentMark* cm, 2867 CMTask* task) 2868 : _g1h(g1h), _cm(cm), _task(task) { } 2869 }; 2870 2871 void CMTask::setup_for_region(HeapRegion* hr) { 2872 // Separated the asserts so that we know which one fires. 2873 assert(hr != NULL, 2874 "claim_region() should have filtered out continues humongous regions"); 2875 assert(!hr->continuesHumongous(), 2876 "claim_region() should have filtered out continues humongous regions"); 2877 2878 if (_cm->verbose_low()) 2879 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 2880 _task_id, hr); 2881 2882 _curr_region = hr; 2883 _finger = hr->bottom(); 2884 update_region_limit(); 2885 } 2886 2887 void CMTask::update_region_limit() { 2888 HeapRegion* hr = _curr_region; 2889 HeapWord* bottom = hr->bottom(); 2890 HeapWord* limit = hr->next_top_at_mark_start(); 2891 2892 if (limit == bottom) { 2893 if (_cm->verbose_low()) 2894 gclog_or_tty->print_cr("[%d] found an empty region " 2895 "["PTR_FORMAT", "PTR_FORMAT")", 2896 _task_id, bottom, limit); 2897 // The region was collected underneath our feet. 2898 // We set the finger to bottom to ensure that the bitmap 2899 // iteration that will follow this will not do anything. 2900 // (this is not a condition that holds when we set the region up, 2901 // as the region is not supposed to be empty in the first place) 2902 _finger = bottom; 2903 } else if (limit >= _region_limit) { 2904 assert(limit >= _finger, "peace of mind"); 2905 } else { 2906 assert(limit < _region_limit, "only way to get here"); 2907 // This can happen under some pretty unusual circumstances. An 2908 // evacuation pause empties the region underneath our feet (NTAMS 2909 // at bottom). We then do some allocation in the region (NTAMS 2910 // stays at bottom), followed by the region being used as a GC 2911 // alloc region (NTAMS will move to top() and the objects 2912 // originally below it will be grayed). All objects now marked in 2913 // the region are explicitly grayed, if below the global finger, 2914 // and we do not need in fact to scan anything else. So, we simply 2915 // set _finger to be limit to ensure that the bitmap iteration 2916 // doesn't do anything. 2917 _finger = limit; 2918 } 2919 2920 _region_limit = limit; 2921 } 2922 2923 void CMTask::giveup_current_region() { 2924 assert(_curr_region != NULL, "invariant"); 2925 if (_cm->verbose_low()) 2926 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 2927 _task_id, _curr_region); 2928 clear_region_fields(); 2929 } 2930 2931 void CMTask::clear_region_fields() { 2932 // Values for these three fields that indicate that we're not 2933 // holding on to a region. 2934 _curr_region = NULL; 2935 _finger = NULL; 2936 _region_limit = NULL; 2937 2938 _region_finger = NULL; 2939 } 2940 2941 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2942 guarantee(nextMarkBitMap != NULL, "invariant"); 2943 2944 if (_cm->verbose_low()) 2945 gclog_or_tty->print_cr("[%d] resetting", _task_id); 2946 2947 _nextMarkBitMap = nextMarkBitMap; 2948 clear_region_fields(); 2949 2950 _calls = 0; 2951 _elapsed_time_ms = 0.0; 2952 _termination_time_ms = 0.0; 2953 _termination_start_time_ms = 0.0; 2954 2955 #if _MARKING_STATS_ 2956 _local_pushes = 0; 2957 _local_pops = 0; 2958 _local_max_size = 0; 2959 _objs_scanned = 0; 2960 _global_pushes = 0; 2961 _global_pops = 0; 2962 _global_max_size = 0; 2963 _global_transfers_to = 0; 2964 _global_transfers_from = 0; 2965 _region_stack_pops = 0; 2966 _regions_claimed = 0; 2967 _objs_found_on_bitmap = 0; 2968 _satb_buffers_processed = 0; 2969 _steal_attempts = 0; 2970 _steals = 0; 2971 _aborted = 0; 2972 _aborted_overflow = 0; 2973 _aborted_cm_aborted = 0; 2974 _aborted_yield = 0; 2975 _aborted_timed_out = 0; 2976 _aborted_satb = 0; 2977 _aborted_termination = 0; 2978 #endif // _MARKING_STATS_ 2979 } 2980 2981 bool CMTask::should_exit_termination() { 2982 regular_clock_call(); 2983 // This is called when we are in the termination protocol. We should 2984 // quit if, for some reason, this task wants to abort or the global 2985 // stack is not empty (this means that we can get work from it). 2986 return !_cm->mark_stack_empty() || has_aborted(); 2987 } 2988 2989 // This determines whether the method below will check both the local 2990 // and global fingers when determining whether to push on the stack a 2991 // gray object (value 1) or whether it will only check the global one 2992 // (value 0). The tradeoffs are that the former will be a bit more 2993 // accurate and possibly push less on the stack, but it might also be 2994 // a little bit slower. 2995 2996 #define _CHECK_BOTH_FINGERS_ 1 2997 2998 void CMTask::deal_with_reference(oop obj) { 2999 if (_cm->verbose_high()) 3000 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, 3001 _task_id, (void*) obj); 3002 3003 ++_refs_reached; 3004 3005 HeapWord* objAddr = (HeapWord*) obj; 3006 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 3007 if (_g1h->is_in_g1_reserved(objAddr)) { 3008 assert(obj != NULL, "is_in_g1_reserved should ensure this"); 3009 HeapRegion* hr = _g1h->heap_region_containing(obj); 3010 if (_g1h->is_obj_ill(obj, hr)) { 3011 if (_cm->verbose_high()) 3012 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", 3013 _task_id, (void*) obj); 3014 3015 // we need to mark it first 3016 if (_nextMarkBitMap->parMark(objAddr)) { 3017 // No OrderAccess:store_load() is needed. It is implicit in the 3018 // CAS done in parMark(objAddr) above 3019 HeapWord* global_finger = _cm->finger(); 3020 3021 #if _CHECK_BOTH_FINGERS_ 3022 // we will check both the local and global fingers 3023 3024 if (_finger != NULL && objAddr < _finger) { 3025 if (_cm->verbose_high()) 3026 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " 3027 "pushing it", _task_id, _finger); 3028 push(obj); 3029 } else if (_curr_region != NULL && objAddr < _region_limit) { 3030 // do nothing 3031 } else if (objAddr < global_finger) { 3032 // Notice that the global finger might be moving forward 3033 // concurrently. This is not a problem. In the worst case, we 3034 // mark the object while it is above the global finger and, by 3035 // the time we read the global finger, it has moved forward 3036 // passed this object. In this case, the object will probably 3037 // be visited when a task is scanning the region and will also 3038 // be pushed on the stack. So, some duplicate work, but no 3039 // correctness problems. 3040 3041 if (_cm->verbose_high()) 3042 gclog_or_tty->print_cr("[%d] below the global finger " 3043 "("PTR_FORMAT"), pushing it", 3044 _task_id, global_finger); 3045 push(obj); 3046 } else { 3047 // do nothing 3048 } 3049 #else // _CHECK_BOTH_FINGERS_ 3050 // we will only check the global finger 3051 3052 if (objAddr < global_finger) { 3053 // see long comment above 3054 3055 if (_cm->verbose_high()) 3056 gclog_or_tty->print_cr("[%d] below the global finger " 3057 "("PTR_FORMAT"), pushing it", 3058 _task_id, global_finger); 3059 push(obj); 3060 } 3061 #endif // _CHECK_BOTH_FINGERS_ 3062 } 3063 } 3064 } 3065 } 3066 3067 void CMTask::push(oop obj) { 3068 HeapWord* objAddr = (HeapWord*) obj; 3069 assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); 3070 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(), 3071 "invariant"); 3072 assert(!_g1h->is_obj_ill(obj), "invariant"); 3073 assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); 3074 3075 if (_cm->verbose_high()) 3076 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); 3077 3078 if (!_task_queue->push(obj)) { 3079 // The local task queue looks full. We need to push some entries 3080 // to the global stack. 3081 3082 if (_cm->verbose_medium()) 3083 gclog_or_tty->print_cr("[%d] task queue overflow, " 3084 "moving entries to the global stack", 3085 _task_id); 3086 move_entries_to_global_stack(); 3087 3088 // this should succeed since, even if we overflow the global 3089 // stack, we should have definitely removed some entries from the 3090 // local queue. So, there must be space on it. 3091 bool success = _task_queue->push(obj); 3092 assert(success, "invariant"); 3093 } 3094 3095 statsOnly( int tmp_size = _task_queue->size(); 3096 if (tmp_size > _local_max_size) 3097 _local_max_size = tmp_size; 3098 ++_local_pushes ); 3099 } 3100 3101 void CMTask::reached_limit() { 3102 assert(_words_scanned >= _words_scanned_limit || 3103 _refs_reached >= _refs_reached_limit , 3104 "shouldn't have been called otherwise"); 3105 regular_clock_call(); 3106 } 3107 3108 void CMTask::regular_clock_call() { 3109 if (has_aborted()) 3110 return; 3111 3112 // First, we need to recalculate the words scanned and refs reached 3113 // limits for the next clock call. 3114 recalculate_limits(); 3115 3116 // During the regular clock call we do the following 3117 3118 // (1) If an overflow has been flagged, then we abort. 3119 if (_cm->has_overflown()) { 3120 set_has_aborted(); 3121 return; 3122 } 3123 3124 // If we are not concurrent (i.e. we're doing remark) we don't need 3125 // to check anything else. The other steps are only needed during 3126 // the concurrent marking phase. 3127 if (!concurrent()) 3128 return; 3129 3130 // (2) If marking has been aborted for Full GC, then we also abort. 3131 if (_cm->has_aborted()) { 3132 set_has_aborted(); 3133 statsOnly( ++_aborted_cm_aborted ); 3134 return; 3135 } 3136 3137 double curr_time_ms = os::elapsedVTime() * 1000.0; 3138 3139 // (3) If marking stats are enabled, then we update the step history. 3140 #if _MARKING_STATS_ 3141 if (_words_scanned >= _words_scanned_limit) 3142 ++_clock_due_to_scanning; 3143 if (_refs_reached >= _refs_reached_limit) 3144 ++_clock_due_to_marking; 3145 3146 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3147 _interval_start_time_ms = curr_time_ms; 3148 _all_clock_intervals_ms.add(last_interval_ms); 3149 3150 if (_cm->verbose_medium()) { 3151 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3152 "scanned = %d%s, refs reached = %d%s", 3153 _task_id, last_interval_ms, 3154 _words_scanned, 3155 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3156 _refs_reached, 3157 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3158 } 3159 #endif // _MARKING_STATS_ 3160 3161 // (4) We check whether we should yield. If we have to, then we abort. 3162 if (_cm->should_yield()) { 3163 // We should yield. To do this we abort the task. The caller is 3164 // responsible for yielding. 3165 set_has_aborted(); 3166 statsOnly( ++_aborted_yield ); 3167 return; 3168 } 3169 3170 // (5) We check whether we've reached our time quota. If we have, 3171 // then we abort. 3172 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3173 if (elapsed_time_ms > _time_target_ms) { 3174 set_has_aborted(); 3175 _has_aborted_timed_out = true; 3176 statsOnly( ++_aborted_timed_out ); 3177 return; 3178 } 3179 3180 // (6) Finally, we check whether there are enough completed STAB 3181 // buffers available for processing. If there are, we abort. 3182 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3183 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3184 if (_cm->verbose_low()) 3185 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3186 _task_id); 3187 // we do need to process SATB buffers, we'll abort and restart 3188 // the marking task to do so 3189 set_has_aborted(); 3190 statsOnly( ++_aborted_satb ); 3191 return; 3192 } 3193 } 3194 3195 void CMTask::recalculate_limits() { 3196 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3197 _words_scanned_limit = _real_words_scanned_limit; 3198 3199 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3200 _refs_reached_limit = _real_refs_reached_limit; 3201 } 3202 3203 void CMTask::decrease_limits() { 3204 // This is called when we believe that we're going to do an infrequent 3205 // operation which will increase the per byte scanned cost (i.e. move 3206 // entries to/from the global stack). It basically tries to decrease the 3207 // scanning limit so that the clock is called earlier. 3208 3209 if (_cm->verbose_medium()) 3210 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3211 3212 _words_scanned_limit = _real_words_scanned_limit - 3213 3 * words_scanned_period / 4; 3214 _refs_reached_limit = _real_refs_reached_limit - 3215 3 * refs_reached_period / 4; 3216 } 3217 3218 void CMTask::move_entries_to_global_stack() { 3219 // local array where we'll store the entries that will be popped 3220 // from the local queue 3221 oop buffer[global_stack_transfer_size]; 3222 3223 int n = 0; 3224 oop obj; 3225 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3226 buffer[n] = obj; 3227 ++n; 3228 } 3229 3230 if (n > 0) { 3231 // we popped at least one entry from the local queue 3232 3233 statsOnly( ++_global_transfers_to; _local_pops += n ); 3234 3235 if (!_cm->mark_stack_push(buffer, n)) { 3236 if (_cm->verbose_low()) 3237 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); 3238 set_has_aborted(); 3239 } else { 3240 // the transfer was successful 3241 3242 if (_cm->verbose_medium()) 3243 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3244 _task_id, n); 3245 statsOnly( int tmp_size = _cm->mark_stack_size(); 3246 if (tmp_size > _global_max_size) 3247 _global_max_size = tmp_size; 3248 _global_pushes += n ); 3249 } 3250 } 3251 3252 // this operation was quite expensive, so decrease the limits 3253 decrease_limits(); 3254 } 3255 3256 void CMTask::get_entries_from_global_stack() { 3257 // local array where we'll store the entries that will be popped 3258 // from the global stack. 3259 oop buffer[global_stack_transfer_size]; 3260 int n; 3261 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3262 assert(n <= global_stack_transfer_size, 3263 "we should not pop more than the given limit"); 3264 if (n > 0) { 3265 // yes, we did actually pop at least one entry 3266 3267 statsOnly( ++_global_transfers_from; _global_pops += n ); 3268 if (_cm->verbose_medium()) 3269 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3270 _task_id, n); 3271 for (int i = 0; i < n; ++i) { 3272 bool success = _task_queue->push(buffer[i]); 3273 // We only call this when the local queue is empty or under a 3274 // given target limit. So, we do not expect this push to fail. 3275 assert(success, "invariant"); 3276 } 3277 3278 statsOnly( int tmp_size = _task_queue->size(); 3279 if (tmp_size > _local_max_size) 3280 _local_max_size = tmp_size; 3281 _local_pushes += n ); 3282 } 3283 3284 // this operation was quite expensive, so decrease the limits 3285 decrease_limits(); 3286 } 3287 3288 void CMTask::drain_local_queue(bool partially) { 3289 if (has_aborted()) 3290 return; 3291 3292 // Decide what the target size is, depending whether we're going to 3293 // drain it partially (so that other tasks can steal if they run out 3294 // of things to do) or totally (at the very end). 3295 size_t target_size; 3296 if (partially) 3297 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3298 else 3299 target_size = 0; 3300 3301 if (_task_queue->size() > target_size) { 3302 if (_cm->verbose_high()) 3303 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3304 _task_id, target_size); 3305 3306 oop obj; 3307 bool ret = _task_queue->pop_local(obj); 3308 while (ret) { 3309 statsOnly( ++_local_pops ); 3310 3311 if (_cm->verbose_high()) 3312 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3313 (void*) obj); 3314 3315 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3316 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(), 3317 "invariant"); 3318 3319 scan_object(obj); 3320 3321 if (_task_queue->size() <= target_size || has_aborted()) 3322 ret = false; 3323 else 3324 ret = _task_queue->pop_local(obj); 3325 } 3326 3327 if (_cm->verbose_high()) 3328 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3329 _task_id, _task_queue->size()); 3330 } 3331 } 3332 3333 void CMTask::drain_global_stack(bool partially) { 3334 if (has_aborted()) 3335 return; 3336 3337 // We have a policy to drain the local queue before we attempt to 3338 // drain the global stack. 3339 assert(partially || _task_queue->size() == 0, "invariant"); 3340 3341 // Decide what the target size is, depending whether we're going to 3342 // drain it partially (so that other tasks can steal if they run out 3343 // of things to do) or totally (at the very end). Notice that, 3344 // because we move entries from the global stack in chunks or 3345 // because another task might be doing the same, we might in fact 3346 // drop below the target. But, this is not a problem. 3347 size_t target_size; 3348 if (partially) 3349 target_size = _cm->partial_mark_stack_size_target(); 3350 else 3351 target_size = 0; 3352 3353 if (_cm->mark_stack_size() > target_size) { 3354 if (_cm->verbose_low()) 3355 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3356 _task_id, target_size); 3357 3358 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3359 get_entries_from_global_stack(); 3360 drain_local_queue(partially); 3361 } 3362 3363 if (_cm->verbose_low()) 3364 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3365 _task_id, _cm->mark_stack_size()); 3366 } 3367 } 3368 3369 // SATB Queue has several assumptions on whether to call the par or 3370 // non-par versions of the methods. this is why some of the code is 3371 // replicated. We should really get rid of the single-threaded version 3372 // of the code to simplify things. 3373 void CMTask::drain_satb_buffers() { 3374 if (has_aborted()) 3375 return; 3376 3377 // We set this so that the regular clock knows that we're in the 3378 // middle of draining buffers and doesn't set the abort flag when it 3379 // notices that SATB buffers are available for draining. It'd be 3380 // very counter productive if it did that. :-) 3381 _draining_satb_buffers = true; 3382 3383 CMObjectClosure oc(this); 3384 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3385 if (ParallelGCThreads > 0) 3386 satb_mq_set.set_par_closure(_task_id, &oc); 3387 else 3388 satb_mq_set.set_closure(&oc); 3389 3390 // This keeps claiming and applying the closure to completed buffers 3391 // until we run out of buffers or we need to abort. 3392 if (ParallelGCThreads > 0) { 3393 while (!has_aborted() && 3394 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3395 if (_cm->verbose_medium()) 3396 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3397 statsOnly( ++_satb_buffers_processed ); 3398 regular_clock_call(); 3399 } 3400 } else { 3401 while (!has_aborted() && 3402 satb_mq_set.apply_closure_to_completed_buffer()) { 3403 if (_cm->verbose_medium()) 3404 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3405 statsOnly( ++_satb_buffers_processed ); 3406 regular_clock_call(); 3407 } 3408 } 3409 3410 if (!concurrent() && !has_aborted()) { 3411 // We should only do this during remark. 3412 if (ParallelGCThreads > 0) 3413 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3414 else 3415 satb_mq_set.iterate_closure_all_threads(); 3416 } 3417 3418 _draining_satb_buffers = false; 3419 3420 assert(has_aborted() || 3421 concurrent() || 3422 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3423 3424 if (ParallelGCThreads > 0) 3425 satb_mq_set.set_par_closure(_task_id, NULL); 3426 else 3427 satb_mq_set.set_closure(NULL); 3428 3429 // again, this was a potentially expensive operation, decrease the 3430 // limits to get the regular clock call early 3431 decrease_limits(); 3432 } 3433 3434 void CMTask::drain_region_stack(BitMapClosure* bc) { 3435 if (has_aborted()) 3436 return; 3437 3438 assert(_region_finger == NULL, 3439 "it should be NULL when we're not scanning a region"); 3440 3441 if (!_cm->region_stack_empty()) { 3442 if (_cm->verbose_low()) 3443 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3444 _task_id, _cm->region_stack_size()); 3445 3446 MemRegion mr = _cm->region_stack_pop_with_lock(); 3447 // it returns MemRegion() if the pop fails 3448 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3449 3450 while (mr.start() != NULL) { 3451 if (_cm->verbose_medium()) 3452 gclog_or_tty->print_cr("[%d] we are scanning region " 3453 "["PTR_FORMAT", "PTR_FORMAT")", 3454 _task_id, mr.start(), mr.end()); 3455 assert(mr.end() <= _cm->finger(), 3456 "otherwise the region shouldn't be on the stack"); 3457 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3458 if (_nextMarkBitMap->iterate(bc, mr)) { 3459 assert(!has_aborted(), 3460 "cannot abort the task without aborting the bitmap iteration"); 3461 3462 // We finished iterating over the region without aborting. 3463 regular_clock_call(); 3464 if (has_aborted()) 3465 mr = MemRegion(); 3466 else { 3467 mr = _cm->region_stack_pop_with_lock(); 3468 // it returns MemRegion() if the pop fails 3469 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3470 } 3471 } else { 3472 assert(has_aborted(), "currently the only way to do so"); 3473 3474 // The only way to abort the bitmap iteration is to return 3475 // false from the do_bit() method. However, inside the 3476 // do_bit() method we move the _region_finger to point to the 3477 // object currently being looked at. So, if we bail out, we 3478 // have definitely set _region_finger to something non-null. 3479 assert(_region_finger != NULL, "invariant"); 3480 3481 // The iteration was actually aborted. So now _region_finger 3482 // points to the address of the object we last scanned. If we 3483 // leave it there, when we restart this task, we will rescan 3484 // the object. It is easy to avoid this. We move the finger by 3485 // enough to point to the next possible object header (the 3486 // bitmap knows by how much we need to move it as it knows its 3487 // granularity). 3488 MemRegion newRegion = 3489 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3490 3491 if (!newRegion.is_empty()) { 3492 if (_cm->verbose_low()) { 3493 gclog_or_tty->print_cr("[%d] pushing unscanned region" 3494 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack", 3495 _task_id, 3496 newRegion.start(), newRegion.end()); 3497 } 3498 // Now push the part of the region we didn't scan on the 3499 // region stack to make sure a task scans it later. 3500 _cm->region_stack_push_with_lock(newRegion); 3501 } 3502 // break from while 3503 mr = MemRegion(); 3504 } 3505 _region_finger = NULL; 3506 } 3507 3508 if (_cm->verbose_low()) 3509 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3510 _task_id, _cm->region_stack_size()); 3511 } 3512 } 3513 3514 void CMTask::print_stats() { 3515 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3516 _task_id, _calls); 3517 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3518 _elapsed_time_ms, _termination_time_ms); 3519 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3520 _step_times_ms.num(), _step_times_ms.avg(), 3521 _step_times_ms.sd()); 3522 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3523 _step_times_ms.maximum(), _step_times_ms.sum()); 3524 3525 #if _MARKING_STATS_ 3526 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3527 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3528 _all_clock_intervals_ms.sd()); 3529 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3530 _all_clock_intervals_ms.maximum(), 3531 _all_clock_intervals_ms.sum()); 3532 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3533 _clock_due_to_scanning, _clock_due_to_marking); 3534 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3535 _objs_scanned, _objs_found_on_bitmap); 3536 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3537 _local_pushes, _local_pops, _local_max_size); 3538 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3539 _global_pushes, _global_pops, _global_max_size); 3540 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3541 _global_transfers_to,_global_transfers_from); 3542 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 3543 _regions_claimed, _region_stack_pops); 3544 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3545 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3546 _steal_attempts, _steals); 3547 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3548 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3549 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3550 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3551 _aborted_timed_out, _aborted_satb, _aborted_termination); 3552 #endif // _MARKING_STATS_ 3553 } 3554 3555 /***************************************************************************** 3556 3557 The do_marking_step(time_target_ms) method is the building block 3558 of the parallel marking framework. It can be called in parallel 3559 with other invocations of do_marking_step() on different tasks 3560 (but only one per task, obviously) and concurrently with the 3561 mutator threads, or during remark, hence it eliminates the need 3562 for two versions of the code. When called during remark, it will 3563 pick up from where the task left off during the concurrent marking 3564 phase. Interestingly, tasks are also claimable during evacuation 3565 pauses too, since do_marking_step() ensures that it aborts before 3566 it needs to yield. 3567 3568 The data structures that is uses to do marking work are the 3569 following: 3570 3571 (1) Marking Bitmap. If there are gray objects that appear only 3572 on the bitmap (this happens either when dealing with an overflow 3573 or when the initial marking phase has simply marked the roots 3574 and didn't push them on the stack), then tasks claim heap 3575 regions whose bitmap they then scan to find gray objects. A 3576 global finger indicates where the end of the last claimed region 3577 is. A local finger indicates how far into the region a task has 3578 scanned. The two fingers are used to determine how to gray an 3579 object (i.e. whether simply marking it is OK, as it will be 3580 visited by a task in the future, or whether it needs to be also 3581 pushed on a stack). 3582 3583 (2) Local Queue. The local queue of the task which is accessed 3584 reasonably efficiently by the task. Other tasks can steal from 3585 it when they run out of work. Throughout the marking phase, a 3586 task attempts to keep its local queue short but not totally 3587 empty, so that entries are available for stealing by other 3588 tasks. Only when there is no more work, a task will totally 3589 drain its local queue. 3590 3591 (3) Global Mark Stack. This handles local queue overflow. During 3592 marking only sets of entries are moved between it and the local 3593 queues, as access to it requires a mutex and more fine-grain 3594 interaction with it which might cause contention. If it 3595 overflows, then the marking phase should restart and iterate 3596 over the bitmap to identify gray objects. Throughout the marking 3597 phase, tasks attempt to keep the global mark stack at a small 3598 length but not totally empty, so that entries are available for 3599 popping by other tasks. Only when there is no more work, tasks 3600 will totally drain the global mark stack. 3601 3602 (4) Global Region Stack. Entries on it correspond to areas of 3603 the bitmap that need to be scanned since they contain gray 3604 objects. Pushes on the region stack only happen during 3605 evacuation pauses and typically correspond to areas covered by 3606 GC LABS. If it overflows, then the marking phase should restart 3607 and iterate over the bitmap to identify gray objects. Tasks will 3608 try to totally drain the region stack as soon as possible. 3609 3610 (5) SATB Buffer Queue. This is where completed SATB buffers are 3611 made available. Buffers are regularly removed from this queue 3612 and scanned for roots, so that the queue doesn't get too 3613 long. During remark, all completed buffers are processed, as 3614 well as the filled in parts of any uncompleted buffers. 3615 3616 The do_marking_step() method tries to abort when the time target 3617 has been reached. There are a few other cases when the 3618 do_marking_step() method also aborts: 3619 3620 (1) When the marking phase has been aborted (after a Full GC). 3621 3622 (2) When a global overflow (either on the global stack or the 3623 region stack) has been triggered. Before the task aborts, it 3624 will actually sync up with the other tasks to ensure that all 3625 the marking data structures (local queues, stacks, fingers etc.) 3626 are re-initialised so that when do_marking_step() completes, 3627 the marking phase can immediately restart. 3628 3629 (3) When enough completed SATB buffers are available. The 3630 do_marking_step() method only tries to drain SATB buffers right 3631 at the beginning. So, if enough buffers are available, the 3632 marking step aborts and the SATB buffers are processed at 3633 the beginning of the next invocation. 3634 3635 (4) To yield. when we have to yield then we abort and yield 3636 right at the end of do_marking_step(). This saves us from a lot 3637 of hassle as, by yielding we might allow a Full GC. If this 3638 happens then objects will be compacted underneath our feet, the 3639 heap might shrink, etc. We save checking for this by just 3640 aborting and doing the yield right at the end. 3641 3642 From the above it follows that the do_marking_step() method should 3643 be called in a loop (or, otherwise, regularly) until it completes. 3644 3645 If a marking step completes without its has_aborted() flag being 3646 true, it means it has completed the current marking phase (and 3647 also all other marking tasks have done so and have all synced up). 3648 3649 A method called regular_clock_call() is invoked "regularly" (in 3650 sub ms intervals) throughout marking. It is this clock method that 3651 checks all the abort conditions which were mentioned above and 3652 decides when the task should abort. A work-based scheme is used to 3653 trigger this clock method: when the number of object words the 3654 marking phase has scanned or the number of references the marking 3655 phase has visited reach a given limit. Additional invocations to 3656 the method clock have been planted in a few other strategic places 3657 too. The initial reason for the clock method was to avoid calling 3658 vtime too regularly, as it is quite expensive. So, once it was in 3659 place, it was natural to piggy-back all the other conditions on it 3660 too and not constantly check them throughout the code. 3661 3662 *****************************************************************************/ 3663 3664 void CMTask::do_marking_step(double time_target_ms) { 3665 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3666 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3667 3668 assert(concurrent() || _cm->region_stack_empty(), 3669 "the region stack should have been cleared before remark"); 3670 assert(_region_finger == NULL, 3671 "this should be non-null only when a region is being scanned"); 3672 3673 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3674 assert(_task_queues != NULL, "invariant"); 3675 assert(_task_queue != NULL, "invariant"); 3676 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3677 3678 assert(!_claimed, 3679 "only one thread should claim this task at any one time"); 3680 3681 // OK, this doesn't safeguard again all possible scenarios, as it is 3682 // possible for two threads to set the _claimed flag at the same 3683 // time. But it is only for debugging purposes anyway and it will 3684 // catch most problems. 3685 _claimed = true; 3686 3687 _start_time_ms = os::elapsedVTime() * 1000.0; 3688 statsOnly( _interval_start_time_ms = _start_time_ms ); 3689 3690 double diff_prediction_ms = 3691 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3692 _time_target_ms = time_target_ms - diff_prediction_ms; 3693 3694 // set up the variables that are used in the work-based scheme to 3695 // call the regular clock method 3696 _words_scanned = 0; 3697 _refs_reached = 0; 3698 recalculate_limits(); 3699 3700 // clear all flags 3701 clear_has_aborted(); 3702 _has_aborted_timed_out = false; 3703 _draining_satb_buffers = false; 3704 3705 ++_calls; 3706 3707 if (_cm->verbose_low()) 3708 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3709 "target = %1.2lfms >>>>>>>>>>", 3710 _task_id, _calls, _time_target_ms); 3711 3712 // Set up the bitmap and oop closures. Anything that uses them is 3713 // eventually called from this method, so it is OK to allocate these 3714 // statically. 3715 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3716 CMOopClosure oop_closure(_g1h, _cm, this); 3717 set_oop_closure(&oop_closure); 3718 3719 if (_cm->has_overflown()) { 3720 // This can happen if the region stack or the mark stack overflows 3721 // during a GC pause and this task, after a yield point, 3722 // restarts. We have to abort as we need to get into the overflow 3723 // protocol which happens right at the end of this task. 3724 set_has_aborted(); 3725 } 3726 3727 // First drain any available SATB buffers. After this, we will not 3728 // look at SATB buffers before the next invocation of this method. 3729 // If enough completed SATB buffers are queued up, the regular clock 3730 // will abort this task so that it restarts. 3731 drain_satb_buffers(); 3732 // ...then partially drain the local queue and the global stack 3733 drain_local_queue(true); 3734 drain_global_stack(true); 3735 3736 // Then totally drain the region stack. We will not look at 3737 // it again before the next invocation of this method. Entries on 3738 // the region stack are only added during evacuation pauses, for 3739 // which we have to yield. When we do, we abort the task anyway so 3740 // it will look at the region stack again when it restarts. 3741 bitmap_closure.set_scanning_heap_region(false); 3742 drain_region_stack(&bitmap_closure); 3743 // ...then partially drain the local queue and the global stack 3744 drain_local_queue(true); 3745 drain_global_stack(true); 3746 3747 do { 3748 if (!has_aborted() && _curr_region != NULL) { 3749 // This means that we're already holding on to a region. 3750 assert(_finger != NULL, "if region is not NULL, then the finger " 3751 "should not be NULL either"); 3752 3753 // We might have restarted this task after an evacuation pause 3754 // which might have evacuated the region we're holding on to 3755 // underneath our feet. Let's read its limit again to make sure 3756 // that we do not iterate over a region of the heap that 3757 // contains garbage (update_region_limit() will also move 3758 // _finger to the start of the region if it is found empty). 3759 update_region_limit(); 3760 // We will start from _finger not from the start of the region, 3761 // as we might be restarting this task after aborting half-way 3762 // through scanning this region. In this case, _finger points to 3763 // the address where we last found a marked object. If this is a 3764 // fresh region, _finger points to start(). 3765 MemRegion mr = MemRegion(_finger, _region_limit); 3766 3767 if (_cm->verbose_low()) 3768 gclog_or_tty->print_cr("[%d] we're scanning part " 3769 "["PTR_FORMAT", "PTR_FORMAT") " 3770 "of region "PTR_FORMAT, 3771 _task_id, _finger, _region_limit, _curr_region); 3772 3773 // Let's iterate over the bitmap of the part of the 3774 // region that is left. 3775 bitmap_closure.set_scanning_heap_region(true); 3776 if (mr.is_empty() || 3777 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3778 // We successfully completed iterating over the region. Now, 3779 // let's give up the region. 3780 giveup_current_region(); 3781 regular_clock_call(); 3782 } else { 3783 assert(has_aborted(), "currently the only way to do so"); 3784 // The only way to abort the bitmap iteration is to return 3785 // false from the do_bit() method. However, inside the 3786 // do_bit() method we move the _finger to point to the 3787 // object currently being looked at. So, if we bail out, we 3788 // have definitely set _finger to something non-null. 3789 assert(_finger != NULL, "invariant"); 3790 3791 // Region iteration was actually aborted. So now _finger 3792 // points to the address of the object we last scanned. If we 3793 // leave it there, when we restart this task, we will rescan 3794 // the object. It is easy to avoid this. We move the finger by 3795 // enough to point to the next possible object header (the 3796 // bitmap knows by how much we need to move it as it knows its 3797 // granularity). 3798 assert(_finger < _region_limit, "invariant"); 3799 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3800 // Check if bitmap iteration was aborted while scanning the last object 3801 if (new_finger >= _region_limit) { 3802 giveup_current_region(); 3803 } else { 3804 move_finger_to(new_finger); 3805 } 3806 } 3807 } 3808 // At this point we have either completed iterating over the 3809 // region we were holding on to, or we have aborted. 3810 3811 // We then partially drain the local queue and the global stack. 3812 // (Do we really need this?) 3813 drain_local_queue(true); 3814 drain_global_stack(true); 3815 3816 // Read the note on the claim_region() method on why it might 3817 // return NULL with potentially more regions available for 3818 // claiming and why we have to check out_of_regions() to determine 3819 // whether we're done or not. 3820 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3821 // We are going to try to claim a new region. We should have 3822 // given up on the previous one. 3823 // Separated the asserts so that we know which one fires. 3824 assert(_curr_region == NULL, "invariant"); 3825 assert(_finger == NULL, "invariant"); 3826 assert(_region_limit == NULL, "invariant"); 3827 if (_cm->verbose_low()) 3828 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 3829 HeapRegion* claimed_region = _cm->claim_region(_task_id); 3830 if (claimed_region != NULL) { 3831 // Yes, we managed to claim one 3832 statsOnly( ++_regions_claimed ); 3833 3834 if (_cm->verbose_low()) 3835 gclog_or_tty->print_cr("[%d] we successfully claimed " 3836 "region "PTR_FORMAT, 3837 _task_id, claimed_region); 3838 3839 setup_for_region(claimed_region); 3840 assert(_curr_region == claimed_region, "invariant"); 3841 } 3842 // It is important to call the regular clock here. It might take 3843 // a while to claim a region if, for example, we hit a large 3844 // block of empty regions. So we need to call the regular clock 3845 // method once round the loop to make sure it's called 3846 // frequently enough. 3847 regular_clock_call(); 3848 } 3849 3850 if (!has_aborted() && _curr_region == NULL) { 3851 assert(_cm->out_of_regions(), 3852 "at this point we should be out of regions"); 3853 } 3854 } while ( _curr_region != NULL && !has_aborted()); 3855 3856 if (!has_aborted()) { 3857 // We cannot check whether the global stack is empty, since other 3858 // tasks might be pushing objects to it concurrently. We also cannot 3859 // check if the region stack is empty because if a thread is aborting 3860 // it can push a partially done region back. 3861 assert(_cm->out_of_regions(), 3862 "at this point we should be out of regions"); 3863 3864 if (_cm->verbose_low()) 3865 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 3866 3867 // Try to reduce the number of available SATB buffers so that 3868 // remark has less work to do. 3869 drain_satb_buffers(); 3870 } 3871 3872 // Since we've done everything else, we can now totally drain the 3873 // local queue and global stack. 3874 drain_local_queue(false); 3875 drain_global_stack(false); 3876 3877 // Attempt at work stealing from other task's queues. 3878 if (!has_aborted()) { 3879 // We have not aborted. This means that we have finished all that 3880 // we could. Let's try to do some stealing... 3881 3882 // We cannot check whether the global stack is empty, since other 3883 // tasks might be pushing objects to it concurrently. We also cannot 3884 // check if the region stack is empty because if a thread is aborting 3885 // it can push a partially done region back. 3886 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3887 "only way to reach here"); 3888 3889 if (_cm->verbose_low()) 3890 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 3891 3892 while (!has_aborted()) { 3893 oop obj; 3894 statsOnly( ++_steal_attempts ); 3895 3896 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 3897 if (_cm->verbose_medium()) 3898 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 3899 _task_id, (void*) obj); 3900 3901 statsOnly( ++_steals ); 3902 3903 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3904 "any stolen object should be marked"); 3905 scan_object(obj); 3906 3907 // And since we're towards the end, let's totally drain the 3908 // local queue and global stack. 3909 drain_local_queue(false); 3910 drain_global_stack(false); 3911 } else { 3912 break; 3913 } 3914 } 3915 } 3916 3917 // We still haven't aborted. Now, let's try to get into the 3918 // termination protocol. 3919 if (!has_aborted()) { 3920 // We cannot check whether the global stack is empty, since other 3921 // tasks might be concurrently pushing objects on it. We also cannot 3922 // check if the region stack is empty because if a thread is aborting 3923 // it can push a partially done region back. 3924 // Separated the asserts so that we know which one fires. 3925 assert(_cm->out_of_regions(), "only way to reach here"); 3926 assert(_task_queue->size() == 0, "only way to reach here"); 3927 3928 if (_cm->verbose_low()) 3929 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 3930 3931 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3932 // The CMTask class also extends the TerminatorTerminator class, 3933 // hence its should_exit_termination() method will also decide 3934 // whether to exit the termination protocol or not. 3935 bool finished = _cm->terminator()->offer_termination(this); 3936 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3937 _termination_time_ms += 3938 termination_end_time_ms - _termination_start_time_ms; 3939 3940 if (finished) { 3941 // We're all done. 3942 3943 if (_task_id == 0) { 3944 // let's allow task 0 to do this 3945 if (concurrent()) { 3946 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3947 // we need to set this to false before the next 3948 // safepoint. This way we ensure that the marking phase 3949 // doesn't observe any more heap expansions. 3950 _cm->clear_concurrent_marking_in_progress(); 3951 } 3952 } 3953 3954 // We can now guarantee that the global stack is empty, since 3955 // all other tasks have finished. We separated the guarantees so 3956 // that, if a condition is false, we can immediately find out 3957 // which one. 3958 guarantee(_cm->out_of_regions(), "only way to reach here"); 3959 guarantee(_cm->region_stack_empty(), "only way to reach here"); 3960 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3961 guarantee(_task_queue->size() == 0, "only way to reach here"); 3962 guarantee(!_cm->has_overflown(), "only way to reach here"); 3963 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3964 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 3965 3966 if (_cm->verbose_low()) 3967 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 3968 } else { 3969 // Apparently there's more work to do. Let's abort this task. It 3970 // will restart it and we can hopefully find more things to do. 3971 3972 if (_cm->verbose_low()) 3973 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); 3974 3975 set_has_aborted(); 3976 statsOnly( ++_aborted_termination ); 3977 } 3978 } 3979 3980 // Mainly for debugging purposes to make sure that a pointer to the 3981 // closure which was statically allocated in this frame doesn't 3982 // escape it by accident. 3983 set_oop_closure(NULL); 3984 double end_time_ms = os::elapsedVTime() * 1000.0; 3985 double elapsed_time_ms = end_time_ms - _start_time_ms; 3986 // Update the step history. 3987 _step_times_ms.add(elapsed_time_ms); 3988 3989 if (has_aborted()) { 3990 // The task was aborted for some reason. 3991 3992 statsOnly( ++_aborted ); 3993 3994 if (_has_aborted_timed_out) { 3995 double diff_ms = elapsed_time_ms - _time_target_ms; 3996 // Keep statistics of how well we did with respect to hitting 3997 // our target only if we actually timed out (if we aborted for 3998 // other reasons, then the results might get skewed). 3999 _marking_step_diffs_ms.add(diff_ms); 4000 } 4001 4002 if (_cm->has_overflown()) { 4003 // This is the interesting one. We aborted because a global 4004 // overflow was raised. This means we have to restart the 4005 // marking phase and start iterating over regions. However, in 4006 // order to do this we have to make sure that all tasks stop 4007 // what they are doing and re-initialise in a safe manner. We 4008 // will achieve this with the use of two barrier sync points. 4009 4010 if (_cm->verbose_low()) 4011 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4012 4013 _cm->enter_first_sync_barrier(_task_id); 4014 // When we exit this sync barrier we know that all tasks have 4015 // stopped doing marking work. So, it's now safe to 4016 // re-initialise our data structures. At the end of this method, 4017 // task 0 will clear the global data structures. 4018 4019 statsOnly( ++_aborted_overflow ); 4020 4021 // We clear the local state of this task... 4022 clear_region_fields(); 4023 4024 // ...and enter the second barrier. 4025 _cm->enter_second_sync_barrier(_task_id); 4026 // At this point everything has bee re-initialised and we're 4027 // ready to restart. 4028 } 4029 4030 if (_cm->verbose_low()) { 4031 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4032 "elapsed = %1.2lfms <<<<<<<<<<", 4033 _task_id, _time_target_ms, elapsed_time_ms); 4034 if (_cm->has_aborted()) 4035 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4036 _task_id); 4037 } 4038 } else { 4039 if (_cm->verbose_low()) 4040 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4041 "elapsed = %1.2lfms <<<<<<<<<<", 4042 _task_id, _time_target_ms, elapsed_time_ms); 4043 } 4044 4045 _claimed = false; 4046 } 4047 4048 CMTask::CMTask(int task_id, 4049 ConcurrentMark* cm, 4050 CMTaskQueue* task_queue, 4051 CMTaskQueueSet* task_queues) 4052 : _g1h(G1CollectedHeap::heap()), 4053 _task_id(task_id), _cm(cm), 4054 _claimed(false), 4055 _nextMarkBitMap(NULL), _hash_seed(17), 4056 _task_queue(task_queue), 4057 _task_queues(task_queues), 4058 _oop_closure(NULL) { 4059 guarantee(task_queue != NULL, "invariant"); 4060 guarantee(task_queues != NULL, "invariant"); 4061 4062 statsOnly( _clock_due_to_scanning = 0; 4063 _clock_due_to_marking = 0 ); 4064 4065 _marking_step_diffs_ms.add(0.5); 4066 }