1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 32 #include "gc_implementation/g1/g1RemSet.hpp" 33 #include "gc_implementation/g1/heapRegionRemSet.hpp" 34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 35 #include "gc_implementation/shared/vmGCOperations.hpp" 36 #include "memory/genOopClosures.inline.hpp" 37 #include "memory/referencePolicy.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "oops/oop.inline.hpp" 40 #include "runtime/handles.inline.hpp" 41 #include "runtime/java.hpp" 42 43 // 44 // CMS Bit Map Wrapper 45 46 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 47 _bm((uintptr_t*)NULL,0), 48 _shifter(shifter) { 49 _bmStartWord = (HeapWord*)(rs.base()); 50 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 51 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 52 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 53 54 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 55 // For now we'll just commit all of the bit map up fromt. 56 // Later on we'll try to be more parsimonious with swap. 57 guarantee(_virtual_space.initialize(brs, brs.size()), 58 "couldn't reseve backing store for CMS bit map"); 59 assert(_virtual_space.committed_size() == brs.size(), 60 "didn't reserve backing store for all of CMS bit map?"); 61 _bm.set_map((uintptr_t*)_virtual_space.low()); 62 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 63 _bmWordSize, "inconsistency in bit map sizing"); 64 _bm.set_size(_bmWordSize >> _shifter); 65 } 66 67 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 68 HeapWord* limit) const { 69 // First we must round addr *up* to a possible object boundary. 70 addr = (HeapWord*)align_size_up((intptr_t)addr, 71 HeapWordSize << _shifter); 72 size_t addrOffset = heapWordToOffset(addr); 73 if (limit == NULL) { 74 limit = _bmStartWord + _bmWordSize; 75 } 76 size_t limitOffset = heapWordToOffset(limit); 77 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 78 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 79 assert(nextAddr >= addr, "get_next_one postcondition"); 80 assert(nextAddr == limit || isMarked(nextAddr), 81 "get_next_one postcondition"); 82 return nextAddr; 83 } 84 85 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 86 HeapWord* limit) const { 87 size_t addrOffset = heapWordToOffset(addr); 88 if (limit == NULL) { 89 limit = _bmStartWord + _bmWordSize; 90 } 91 size_t limitOffset = heapWordToOffset(limit); 92 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 93 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 94 assert(nextAddr >= addr, "get_next_one postcondition"); 95 assert(nextAddr == limit || !isMarked(nextAddr), 96 "get_next_one postcondition"); 97 return nextAddr; 98 } 99 100 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 101 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 102 return (int) (diff >> _shifter); 103 } 104 105 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 106 HeapWord* left = MAX2(_bmStartWord, mr.start()); 107 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 108 if (right > left) { 109 // Right-open interval [leftOffset, rightOffset). 110 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 111 } else { 112 return true; 113 } 114 } 115 116 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 117 size_t from_start_index, 118 HeapWord* to_start_word, 119 size_t word_num) { 120 _bm.mostly_disjoint_range_union(from_bitmap, 121 from_start_index, 122 heapWordToOffset(to_start_word), 123 word_num); 124 } 125 126 #ifndef PRODUCT 127 bool CMBitMapRO::covers(ReservedSpace rs) const { 128 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 129 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 130 "size inconsistency"); 131 return _bmStartWord == (HeapWord*)(rs.base()) && 132 _bmWordSize == rs.size()>>LogHeapWordSize; 133 } 134 #endif 135 136 void CMBitMap::clearAll() { 137 _bm.clear(); 138 return; 139 } 140 141 void CMBitMap::markRange(MemRegion mr) { 142 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 143 assert(!mr.is_empty(), "unexpected empty region"); 144 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 145 ((HeapWord *) mr.end())), 146 "markRange memory region end is not card aligned"); 147 // convert address range into offset range 148 _bm.at_put_range(heapWordToOffset(mr.start()), 149 heapWordToOffset(mr.end()), true); 150 } 151 152 void CMBitMap::clearRange(MemRegion mr) { 153 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 154 assert(!mr.is_empty(), "unexpected empty region"); 155 // convert address range into offset range 156 _bm.at_put_range(heapWordToOffset(mr.start()), 157 heapWordToOffset(mr.end()), false); 158 } 159 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 161 HeapWord* end_addr) { 162 HeapWord* start = getNextMarkedWordAddress(addr); 163 start = MIN2(start, end_addr); 164 HeapWord* end = getNextUnmarkedWordAddress(start); 165 end = MIN2(end, end_addr); 166 assert(start <= end, "Consistency check"); 167 MemRegion mr(start, end); 168 if (!mr.is_empty()) { 169 clearRange(mr); 170 } 171 return mr; 172 } 173 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 175 _base(NULL), _cm(cm) 176 #ifdef ASSERT 177 , _drain_in_progress(false) 178 , _drain_in_progress_yields(false) 179 #endif 180 {} 181 182 void CMMarkStack::allocate(size_t size) { 183 _base = NEW_C_HEAP_ARRAY(oop, size); 184 if (_base == NULL) { 185 vm_exit_during_initialization("Failed to allocate " 186 "CM region mark stack"); 187 } 188 _index = 0; 189 _capacity = (jint) size; 190 _oops_do_bound = -1; 191 NOT_PRODUCT(_max_depth = 0); 192 } 193 194 CMMarkStack::~CMMarkStack() { 195 if (_base != NULL) { 196 FREE_C_HEAP_ARRAY(oop, _base); 197 } 198 } 199 200 void CMMarkStack::par_push(oop ptr) { 201 while (true) { 202 if (isFull()) { 203 _overflow = true; 204 return; 205 } 206 // Otherwise... 207 jint index = _index; 208 jint next_index = index+1; 209 jint res = Atomic::cmpxchg(next_index, &_index, index); 210 if (res == index) { 211 _base[index] = ptr; 212 // Note that we don't maintain this atomically. We could, but it 213 // doesn't seem necessary. 214 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 215 return; 216 } 217 // Otherwise, we need to try again. 218 } 219 } 220 221 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 222 while (true) { 223 if (isFull()) { 224 _overflow = true; 225 return; 226 } 227 // Otherwise... 228 jint index = _index; 229 jint next_index = index + n; 230 if (next_index > _capacity) { 231 _overflow = true; 232 return; 233 } 234 jint res = Atomic::cmpxchg(next_index, &_index, index); 235 if (res == index) { 236 for (int i = 0; i < n; i++) { 237 int ind = index + i; 238 assert(ind < _capacity, "By overflow test above."); 239 _base[ind] = ptr_arr[i]; 240 } 241 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 242 return; 243 } 244 // Otherwise, we need to try again. 245 } 246 } 247 248 249 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 250 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 251 jint start = _index; 252 jint next_index = start + n; 253 if (next_index > _capacity) { 254 _overflow = true; 255 return; 256 } 257 // Otherwise. 258 _index = next_index; 259 for (int i = 0; i < n; i++) { 260 int ind = start + i; 261 assert(ind < _capacity, "By overflow test above."); 262 _base[ind] = ptr_arr[i]; 263 } 264 } 265 266 267 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 268 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 269 jint index = _index; 270 if (index == 0) { 271 *n = 0; 272 return false; 273 } else { 274 int k = MIN2(max, index); 275 jint new_ind = index - k; 276 for (int j = 0; j < k; j++) { 277 ptr_arr[j] = _base[new_ind + j]; 278 } 279 _index = new_ind; 280 *n = k; 281 return true; 282 } 283 } 284 285 286 CMRegionStack::CMRegionStack() : _base(NULL) {} 287 288 void CMRegionStack::allocate(size_t size) { 289 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 290 if (_base == NULL) { 291 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 292 } 293 _index = 0; 294 _capacity = (jint) size; 295 } 296 297 CMRegionStack::~CMRegionStack() { 298 if (_base != NULL) { 299 FREE_C_HEAP_ARRAY(oop, _base); 300 } 301 } 302 303 void CMRegionStack::push_lock_free(MemRegion mr) { 304 assert(mr.word_size() > 0, "Precondition"); 305 while (true) { 306 jint index = _index; 307 308 if (index >= _capacity) { 309 _overflow = true; 310 return; 311 } 312 // Otherwise... 313 jint next_index = index+1; 314 jint res = Atomic::cmpxchg(next_index, &_index, index); 315 if (res == index) { 316 _base[index] = mr; 317 return; 318 } 319 // Otherwise, we need to try again. 320 } 321 } 322 323 // Lock-free pop of the region stack. Called during the concurrent 324 // marking / remark phases. Should only be called in tandem with 325 // other lock-free pops. 326 MemRegion CMRegionStack::pop_lock_free() { 327 while (true) { 328 jint index = _index; 329 330 if (index == 0) { 331 return MemRegion(); 332 } 333 // Otherwise... 334 jint next_index = index-1; 335 jint res = Atomic::cmpxchg(next_index, &_index, index); 336 if (res == index) { 337 MemRegion mr = _base[next_index]; 338 if (mr.start() != NULL) { 339 assert(mr.end() != NULL, "invariant"); 340 assert(mr.word_size() > 0, "invariant"); 341 return mr; 342 } else { 343 // that entry was invalidated... let's skip it 344 assert(mr.end() == NULL, "invariant"); 345 } 346 } 347 // Otherwise, we need to try again. 348 } 349 } 350 351 #if 0 352 // The routines that manipulate the region stack with a lock are 353 // not currently used. They should be retained, however, as a 354 // diagnostic aid. 355 356 void CMRegionStack::push_with_lock(MemRegion mr) { 357 assert(mr.word_size() > 0, "Precondition"); 358 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 359 360 if (isFull()) { 361 _overflow = true; 362 return; 363 } 364 365 _base[_index] = mr; 366 _index += 1; 367 } 368 369 MemRegion CMRegionStack::pop_with_lock() { 370 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 371 372 while (true) { 373 if (_index == 0) { 374 return MemRegion(); 375 } 376 _index -= 1; 377 378 MemRegion mr = _base[_index]; 379 if (mr.start() != NULL) { 380 assert(mr.end() != NULL, "invariant"); 381 assert(mr.word_size() > 0, "invariant"); 382 return mr; 383 } else { 384 // that entry was invalidated... let's skip it 385 assert(mr.end() == NULL, "invariant"); 386 } 387 } 388 } 389 #endif 390 391 bool CMRegionStack::invalidate_entries_into_cset() { 392 bool result = false; 393 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 394 for (int i = 0; i < _oops_do_bound; ++i) { 395 MemRegion mr = _base[i]; 396 if (mr.start() != NULL) { 397 assert(mr.end() != NULL, "invariant"); 398 assert(mr.word_size() > 0, "invariant"); 399 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 400 assert(hr != NULL, "invariant"); 401 if (hr->in_collection_set()) { 402 // The region points into the collection set 403 _base[i] = MemRegion(); 404 result = true; 405 } 406 } else { 407 // that entry was invalidated... let's skip it 408 assert(mr.end() == NULL, "invariant"); 409 } 410 } 411 return result; 412 } 413 414 template<class OopClosureClass> 415 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 416 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 417 || SafepointSynchronize::is_at_safepoint(), 418 "Drain recursion must be yield-safe."); 419 bool res = true; 420 debug_only(_drain_in_progress = true); 421 debug_only(_drain_in_progress_yields = yield_after); 422 while (!isEmpty()) { 423 oop newOop = pop(); 424 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 425 assert(newOop->is_oop(), "Expected an oop"); 426 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 427 "only grey objects on this stack"); 428 // iterate over the oops in this oop, marking and pushing 429 // the ones in CMS generation. 430 newOop->oop_iterate(cl); 431 if (yield_after && _cm->do_yield_check()) { 432 res = false; 433 break; 434 } 435 } 436 debug_only(_drain_in_progress = false); 437 return res; 438 } 439 440 void CMMarkStack::oops_do(OopClosure* f) { 441 if (_index == 0) return; 442 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 443 "Bound must be set."); 444 for (int i = 0; i < _oops_do_bound; i++) { 445 f->do_oop(&_base[i]); 446 } 447 _oops_do_bound = -1; 448 } 449 450 bool ConcurrentMark::not_yet_marked(oop obj) const { 451 return (_g1h->is_obj_ill(obj) 452 || (_g1h->is_in_permanent(obj) 453 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 454 } 455 456 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 457 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 458 #endif // _MSC_VER 459 460 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 461 int max_regions) : 462 _markBitMap1(rs, MinObjAlignment - 1), 463 _markBitMap2(rs, MinObjAlignment - 1), 464 465 _parallel_marking_threads(0), 466 _sleep_factor(0.0), 467 _marking_task_overhead(1.0), 468 _cleanup_sleep_factor(0.0), 469 _cleanup_task_overhead(1.0), 470 _cleanup_list("Cleanup List"), 471 _region_bm(max_regions, false /* in_resource_area*/), 472 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 473 CardTableModRefBS::card_shift, 474 false /* in_resource_area*/), 475 _prevMarkBitMap(&_markBitMap1), 476 _nextMarkBitMap(&_markBitMap2), 477 _at_least_one_mark_complete(false), 478 479 _markStack(this), 480 _regionStack(), 481 // _finger set in set_non_marking_state 482 483 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 484 // _active_tasks set in set_non_marking_state 485 // _tasks set inside the constructor 486 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 487 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 488 489 _has_overflown(false), 490 _concurrent(false), 491 _has_aborted(false), 492 _restart_for_overflow(false), 493 _concurrent_marking_in_progress(false), 494 _should_gray_objects(false), 495 496 // _verbose_level set below 497 498 _init_times(), 499 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 500 _cleanup_times(), 501 _total_counting_time(0.0), 502 _total_rs_scrub_time(0.0), 503 504 _parallel_workers(NULL) { 505 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 506 if (verbose_level < no_verbose) { 507 verbose_level = no_verbose; 508 } 509 if (verbose_level > high_verbose) { 510 verbose_level = high_verbose; 511 } 512 _verbose_level = verbose_level; 513 514 if (verbose_low()) { 515 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 516 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 517 } 518 519 _markStack.allocate(MarkStackSize); 520 _regionStack.allocate(G1MarkRegionStackSize); 521 522 // Create & start a ConcurrentMark thread. 523 _cmThread = new ConcurrentMarkThread(this); 524 assert(cmThread() != NULL, "CM Thread should have been created"); 525 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 526 527 _g1h = G1CollectedHeap::heap(); 528 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 529 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 530 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 531 532 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 533 satb_qs.set_buffer_size(G1SATBBufferSize); 534 535 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 536 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 537 538 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 539 _active_tasks = _max_task_num; 540 for (int i = 0; i < (int) _max_task_num; ++i) { 541 CMTaskQueue* task_queue = new CMTaskQueue(); 542 task_queue->initialize(); 543 _task_queues->register_queue(i, task_queue); 544 545 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 546 _accum_task_vtime[i] = 0.0; 547 } 548 549 if (ConcGCThreads > ParallelGCThreads) { 550 vm_exit_during_initialization("Can't have more ConcGCThreads " 551 "than ParallelGCThreads."); 552 } 553 if (ParallelGCThreads == 0) { 554 // if we are not running with any parallel GC threads we will not 555 // spawn any marking threads either 556 _parallel_marking_threads = 0; 557 _sleep_factor = 0.0; 558 _marking_task_overhead = 1.0; 559 } else { 560 if (ConcGCThreads > 0) { 561 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 562 // if both are set 563 564 _parallel_marking_threads = ConcGCThreads; 565 _sleep_factor = 0.0; 566 _marking_task_overhead = 1.0; 567 } else if (G1MarkingOverheadPercent > 0) { 568 // we will calculate the number of parallel marking threads 569 // based on a target overhead with respect to the soft real-time 570 // goal 571 572 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 573 double overall_cm_overhead = 574 (double) MaxGCPauseMillis * marking_overhead / 575 (double) GCPauseIntervalMillis; 576 double cpu_ratio = 1.0 / (double) os::processor_count(); 577 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 578 double marking_task_overhead = 579 overall_cm_overhead / marking_thread_num * 580 (double) os::processor_count(); 581 double sleep_factor = 582 (1.0 - marking_task_overhead) / marking_task_overhead; 583 584 _parallel_marking_threads = (size_t) marking_thread_num; 585 _sleep_factor = sleep_factor; 586 _marking_task_overhead = marking_task_overhead; 587 } else { 588 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 589 _sleep_factor = 0.0; 590 _marking_task_overhead = 1.0; 591 } 592 593 if (parallel_marking_threads() > 1) { 594 _cleanup_task_overhead = 1.0; 595 } else { 596 _cleanup_task_overhead = marking_task_overhead(); 597 } 598 _cleanup_sleep_factor = 599 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 600 601 #if 0 602 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 603 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 604 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 605 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 606 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 607 #endif 608 609 guarantee(parallel_marking_threads() > 0, "peace of mind"); 610 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 611 (int) _parallel_marking_threads, false, true); 612 if (_parallel_workers == NULL) { 613 vm_exit_during_initialization("Failed necessary allocation."); 614 } else { 615 _parallel_workers->initialize_workers(); 616 } 617 } 618 619 // so that the call below can read a sensible value 620 _heap_start = (HeapWord*) rs.base(); 621 set_non_marking_state(); 622 } 623 624 void ConcurrentMark::update_g1_committed(bool force) { 625 // If concurrent marking is not in progress, then we do not need to 626 // update _heap_end. This has a subtle and important 627 // side-effect. Imagine that two evacuation pauses happen between 628 // marking completion and remark. The first one can grow the 629 // heap (hence now the finger is below the heap end). Then, the 630 // second one could unnecessarily push regions on the region 631 // stack. This causes the invariant that the region stack is empty 632 // at the beginning of remark to be false. By ensuring that we do 633 // not observe heap expansions after marking is complete, then we do 634 // not have this problem. 635 if (!concurrent_marking_in_progress() && !force) return; 636 637 MemRegion committed = _g1h->g1_committed(); 638 assert(committed.start() == _heap_start, "start shouldn't change"); 639 HeapWord* new_end = committed.end(); 640 if (new_end > _heap_end) { 641 // The heap has been expanded. 642 643 _heap_end = new_end; 644 } 645 // Notice that the heap can also shrink. However, this only happens 646 // during a Full GC (at least currently) and the entire marking 647 // phase will bail out and the task will not be restarted. So, let's 648 // do nothing. 649 } 650 651 void ConcurrentMark::reset() { 652 // Starting values for these two. This should be called in a STW 653 // phase. CM will be notified of any future g1_committed expansions 654 // will be at the end of evacuation pauses, when tasks are 655 // inactive. 656 MemRegion committed = _g1h->g1_committed(); 657 _heap_start = committed.start(); 658 _heap_end = committed.end(); 659 660 // Separated the asserts so that we know which one fires. 661 assert(_heap_start != NULL, "heap bounds should look ok"); 662 assert(_heap_end != NULL, "heap bounds should look ok"); 663 assert(_heap_start < _heap_end, "heap bounds should look ok"); 664 665 // reset all the marking data structures and any necessary flags 666 clear_marking_state(); 667 668 if (verbose_low()) { 669 gclog_or_tty->print_cr("[global] resetting"); 670 } 671 672 // We do reset all of them, since different phases will use 673 // different number of active threads. So, it's easiest to have all 674 // of them ready. 675 for (int i = 0; i < (int) _max_task_num; ++i) { 676 _tasks[i]->reset(_nextMarkBitMap); 677 } 678 679 // we need this to make sure that the flag is on during the evac 680 // pause with initial mark piggy-backed 681 set_concurrent_marking_in_progress(); 682 } 683 684 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 685 assert(active_tasks <= _max_task_num, "we should not have more"); 686 687 _active_tasks = active_tasks; 688 // Need to update the three data structures below according to the 689 // number of active threads for this phase. 690 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 691 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 692 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 693 694 _concurrent = concurrent; 695 // We propagate this to all tasks, not just the active ones. 696 for (int i = 0; i < (int) _max_task_num; ++i) 697 _tasks[i]->set_concurrent(concurrent); 698 699 if (concurrent) { 700 set_concurrent_marking_in_progress(); 701 } else { 702 // We currently assume that the concurrent flag has been set to 703 // false before we start remark. At this point we should also be 704 // in a STW phase. 705 assert(!concurrent_marking_in_progress(), "invariant"); 706 assert(_finger == _heap_end, "only way to get here"); 707 update_g1_committed(true); 708 } 709 } 710 711 void ConcurrentMark::set_non_marking_state() { 712 // We set the global marking state to some default values when we're 713 // not doing marking. 714 clear_marking_state(); 715 _active_tasks = 0; 716 clear_concurrent_marking_in_progress(); 717 } 718 719 ConcurrentMark::~ConcurrentMark() { 720 for (int i = 0; i < (int) _max_task_num; ++i) { 721 delete _task_queues->queue(i); 722 delete _tasks[i]; 723 } 724 delete _task_queues; 725 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 726 } 727 728 // This closure is used to mark refs into the g1 generation 729 // from external roots in the CMS bit map. 730 // Called at the first checkpoint. 731 // 732 733 void ConcurrentMark::clearNextBitmap() { 734 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 735 G1CollectorPolicy* g1p = g1h->g1_policy(); 736 737 // Make sure that the concurrent mark thread looks to still be in 738 // the current cycle. 739 guarantee(cmThread()->during_cycle(), "invariant"); 740 741 // We are finishing up the current cycle by clearing the next 742 // marking bitmap and getting it ready for the next cycle. During 743 // this time no other cycle can start. So, let's make sure that this 744 // is the case. 745 guarantee(!g1h->mark_in_progress(), "invariant"); 746 747 // clear the mark bitmap (no grey objects to start with). 748 // We need to do this in chunks and offer to yield in between 749 // each chunk. 750 HeapWord* start = _nextMarkBitMap->startWord(); 751 HeapWord* end = _nextMarkBitMap->endWord(); 752 HeapWord* cur = start; 753 size_t chunkSize = M; 754 while (cur < end) { 755 HeapWord* next = cur + chunkSize; 756 if (next > end) { 757 next = end; 758 } 759 MemRegion mr(cur,next); 760 _nextMarkBitMap->clearRange(mr); 761 cur = next; 762 do_yield_check(); 763 764 // Repeat the asserts from above. We'll do them as asserts here to 765 // minimize their overhead on the product. However, we'll have 766 // them as guarantees at the beginning / end of the bitmap 767 // clearing to get some checking in the product. 768 assert(cmThread()->during_cycle(), "invariant"); 769 assert(!g1h->mark_in_progress(), "invariant"); 770 } 771 772 // Repeat the asserts from above. 773 guarantee(cmThread()->during_cycle(), "invariant"); 774 guarantee(!g1h->mark_in_progress(), "invariant"); 775 } 776 777 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 778 public: 779 bool doHeapRegion(HeapRegion* r) { 780 if (!r->continuesHumongous()) { 781 r->note_start_of_marking(true); 782 } 783 return false; 784 } 785 }; 786 787 void ConcurrentMark::checkpointRootsInitialPre() { 788 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 789 G1CollectorPolicy* g1p = g1h->g1_policy(); 790 791 _has_aborted = false; 792 793 #ifndef PRODUCT 794 if (G1PrintReachableAtInitialMark) { 795 print_reachable("at-cycle-start", 796 VerifyOption_G1UsePrevMarking, true /* all */); 797 } 798 #endif 799 800 // Initialise marking structures. This has to be done in a STW phase. 801 reset(); 802 } 803 804 class CMMarkRootsClosure: public OopsInGenClosure { 805 private: 806 ConcurrentMark* _cm; 807 G1CollectedHeap* _g1h; 808 bool _do_barrier; 809 810 public: 811 CMMarkRootsClosure(ConcurrentMark* cm, 812 G1CollectedHeap* g1h, 813 bool do_barrier) : _cm(cm), _g1h(g1h), 814 _do_barrier(do_barrier) { } 815 816 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 817 virtual void do_oop( oop* p) { do_oop_work(p); } 818 819 template <class T> void do_oop_work(T* p) { 820 T heap_oop = oopDesc::load_heap_oop(p); 821 if (!oopDesc::is_null(heap_oop)) { 822 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 823 assert(obj->is_oop() || obj->mark() == NULL, 824 "expected an oop, possibly with mark word displaced"); 825 HeapWord* addr = (HeapWord*)obj; 826 if (_g1h->is_in_g1_reserved(addr)) { 827 _cm->grayRoot(obj); 828 } 829 } 830 if (_do_barrier) { 831 assert(!_g1h->is_in_g1_reserved(p), 832 "Should be called on external roots"); 833 do_barrier(p); 834 } 835 } 836 }; 837 838 void ConcurrentMark::checkpointRootsInitialPost() { 839 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 840 841 // If we force an overflow during remark, the remark operation will 842 // actually abort and we'll restart concurrent marking. If we always 843 // force an oveflow during remark we'll never actually complete the 844 // marking phase. So, we initilize this here, at the start of the 845 // cycle, so that at the remaining overflow number will decrease at 846 // every remark and we'll eventually not need to cause one. 847 force_overflow_stw()->init(); 848 849 // For each region note start of marking. 850 NoteStartOfMarkHRClosure startcl; 851 g1h->heap_region_iterate(&startcl); 852 853 // Start Concurrent Marking weak-reference discovery. 854 ReferenceProcessor* rp = g1h->ref_processor_cm(); 855 assert(!rp->discovery_enabled(), "Precondition"); 856 rp->verify_no_references_recorded(); 857 rp->enable_discovery(); // enable ("weak") refs discovery 858 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 859 860 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 861 // This is the start of the marking cycle, we're expected all 862 // threads to have SATB queues with active set to false. 863 satb_mq_set.set_active_all_threads(true, /* new active value */ 864 false /* expected_active */); 865 866 // update_g1_committed() will be called at the end of an evac pause 867 // when marking is on. So, it's also called at the end of the 868 // initial-mark pause to update the heap end, if the heap expands 869 // during it. No need to call it here. 870 } 871 872 // Checkpoint the roots into this generation from outside 873 // this generation. [Note this initial checkpoint need only 874 // be approximate -- we'll do a catch up phase subsequently.] 875 void ConcurrentMark::checkpointRootsInitial() { 876 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 877 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 878 879 double start = os::elapsedTime(); 880 881 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 882 g1p->record_concurrent_mark_init_start(); 883 checkpointRootsInitialPre(); 884 885 // YSR: when concurrent precleaning is in place, we'll 886 // need to clear the cached card table here 887 888 ResourceMark rm; 889 HandleMark hm; 890 891 g1h->ensure_parsability(false); 892 g1h->perm_gen()->save_marks(); 893 894 CMMarkRootsClosure notOlder(this, g1h, false); 895 CMMarkRootsClosure older(this, g1h, true); 896 897 g1h->set_marking_started(); 898 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 899 900 g1h->process_strong_roots(true, // activate StrongRootsScope 901 false, // fake perm gen collection 902 SharedHeap::SO_AllClasses, 903 ¬Older, // Regular roots 904 NULL, // do not visit active blobs 905 &older // Perm Gen Roots 906 ); 907 checkpointRootsInitialPost(); 908 909 // Statistics. 910 double end = os::elapsedTime(); 911 _init_times.add((end - start) * 1000.0); 912 913 g1p->record_concurrent_mark_init_end(); 914 } 915 916 /* 917 * Notice that in the next two methods, we actually leave the STS 918 * during the barrier sync and join it immediately afterwards. If we 919 * do not do this, the following deadlock can occur: one thread could 920 * be in the barrier sync code, waiting for the other thread to also 921 * sync up, whereas another one could be trying to yield, while also 922 * waiting for the other threads to sync up too. 923 * 924 * Note, however, that this code is also used during remark and in 925 * this case we should not attempt to leave / enter the STS, otherwise 926 * we'll either hit an asseert (debug / fastdebug) or deadlock 927 * (product). So we should only leave / enter the STS if we are 928 * operating concurrently. 929 * 930 * Because the thread that does the sync barrier has left the STS, it 931 * is possible to be suspended for a Full GC or an evacuation pause 932 * could occur. This is actually safe, since the entering the sync 933 * barrier is one of the last things do_marking_step() does, and it 934 * doesn't manipulate any data structures afterwards. 935 */ 936 937 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 938 if (verbose_low()) { 939 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 940 } 941 942 if (concurrent()) { 943 ConcurrentGCThread::stsLeave(); 944 } 945 _first_overflow_barrier_sync.enter(); 946 if (concurrent()) { 947 ConcurrentGCThread::stsJoin(); 948 } 949 // at this point everyone should have synced up and not be doing any 950 // more work 951 952 if (verbose_low()) { 953 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 954 } 955 956 // let task 0 do this 957 if (task_num == 0) { 958 // task 0 is responsible for clearing the global data structures 959 // We should be here because of an overflow. During STW we should 960 // not clear the overflow flag since we rely on it being true when 961 // we exit this method to abort the pause and restart concurent 962 // marking. 963 clear_marking_state(concurrent() /* clear_overflow */); 964 force_overflow()->update(); 965 966 if (PrintGC) { 967 gclog_or_tty->date_stamp(PrintGCDateStamps); 968 gclog_or_tty->stamp(PrintGCTimeStamps); 969 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 970 } 971 } 972 973 // after this, each task should reset its own data structures then 974 // then go into the second barrier 975 } 976 977 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 978 if (verbose_low()) { 979 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 980 } 981 982 if (concurrent()) { 983 ConcurrentGCThread::stsLeave(); 984 } 985 _second_overflow_barrier_sync.enter(); 986 if (concurrent()) { 987 ConcurrentGCThread::stsJoin(); 988 } 989 // at this point everything should be re-initialised and ready to go 990 991 if (verbose_low()) { 992 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 993 } 994 } 995 996 #ifndef PRODUCT 997 void ForceOverflowSettings::init() { 998 _num_remaining = G1ConcMarkForceOverflow; 999 _force = false; 1000 update(); 1001 } 1002 1003 void ForceOverflowSettings::update() { 1004 if (_num_remaining > 0) { 1005 _num_remaining -= 1; 1006 _force = true; 1007 } else { 1008 _force = false; 1009 } 1010 } 1011 1012 bool ForceOverflowSettings::should_force() { 1013 if (_force) { 1014 _force = false; 1015 return true; 1016 } else { 1017 return false; 1018 } 1019 } 1020 #endif // !PRODUCT 1021 1022 void ConcurrentMark::grayRoot(oop p) { 1023 HeapWord* addr = (HeapWord*) p; 1024 // We can't really check against _heap_start and _heap_end, since it 1025 // is possible during an evacuation pause with piggy-backed 1026 // initial-mark that the committed space is expanded during the 1027 // pause without CM observing this change. So the assertions below 1028 // is a bit conservative; but better than nothing. 1029 assert(_g1h->g1_committed().contains(addr), 1030 "address should be within the heap bounds"); 1031 1032 if (!_nextMarkBitMap->isMarked(addr)) { 1033 _nextMarkBitMap->parMark(addr); 1034 } 1035 } 1036 1037 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 1038 // The objects on the region have already been marked "in bulk" by 1039 // the caller. We only need to decide whether to push the region on 1040 // the region stack or not. 1041 1042 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1043 // We're done with marking and waiting for remark. We do not need to 1044 // push anything else on the region stack. 1045 return; 1046 } 1047 1048 HeapWord* finger = _finger; 1049 1050 if (verbose_low()) { 1051 gclog_or_tty->print_cr("[global] attempting to push " 1052 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 1053 PTR_FORMAT, mr.start(), mr.end(), finger); 1054 } 1055 1056 if (mr.start() < finger) { 1057 // The finger is always heap region aligned and it is not possible 1058 // for mr to span heap regions. 1059 assert(mr.end() <= finger, "invariant"); 1060 1061 // Separated the asserts so that we know which one fires. 1062 assert(mr.start() <= mr.end(), 1063 "region boundaries should fall within the committed space"); 1064 assert(_heap_start <= mr.start(), 1065 "region boundaries should fall within the committed space"); 1066 assert(mr.end() <= _heap_end, 1067 "region boundaries should fall within the committed space"); 1068 if (verbose_low()) { 1069 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1070 "below the finger, pushing it", 1071 mr.start(), mr.end()); 1072 } 1073 1074 if (!region_stack_push_lock_free(mr)) { 1075 if (verbose_low()) { 1076 gclog_or_tty->print_cr("[global] region stack has overflown."); 1077 } 1078 } 1079 } 1080 } 1081 1082 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1083 // The object is not marked by the caller. We need to at least mark 1084 // it and maybe push in on the stack. 1085 1086 HeapWord* addr = (HeapWord*)p; 1087 if (!_nextMarkBitMap->isMarked(addr)) { 1088 // We definitely need to mark it, irrespective whether we bail out 1089 // because we're done with marking. 1090 if (_nextMarkBitMap->parMark(addr)) { 1091 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1092 // If we're done with concurrent marking and we're waiting for 1093 // remark, then we're not pushing anything on the stack. 1094 return; 1095 } 1096 1097 // No OrderAccess:store_load() is needed. It is implicit in the 1098 // CAS done in parMark(addr) above 1099 HeapWord* finger = _finger; 1100 1101 if (addr < finger) { 1102 if (!mark_stack_push(oop(addr))) { 1103 if (verbose_low()) { 1104 gclog_or_tty->print_cr("[global] global stack overflow " 1105 "during parMark"); 1106 } 1107 } 1108 } 1109 } 1110 } 1111 } 1112 1113 class CMConcurrentMarkingTask: public AbstractGangTask { 1114 private: 1115 ConcurrentMark* _cm; 1116 ConcurrentMarkThread* _cmt; 1117 1118 public: 1119 void work(int worker_i) { 1120 assert(Thread::current()->is_ConcurrentGC_thread(), 1121 "this should only be done by a conc GC thread"); 1122 ResourceMark rm; 1123 1124 double start_vtime = os::elapsedVTime(); 1125 1126 ConcurrentGCThread::stsJoin(); 1127 1128 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1129 CMTask* the_task = _cm->task(worker_i); 1130 the_task->record_start_time(); 1131 if (!_cm->has_aborted()) { 1132 do { 1133 double start_vtime_sec = os::elapsedVTime(); 1134 double start_time_sec = os::elapsedTime(); 1135 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1136 1137 the_task->do_marking_step(mark_step_duration_ms, 1138 true /* do_stealing */, 1139 true /* do_termination */); 1140 1141 double end_time_sec = os::elapsedTime(); 1142 double end_vtime_sec = os::elapsedVTime(); 1143 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1144 double elapsed_time_sec = end_time_sec - start_time_sec; 1145 _cm->clear_has_overflown(); 1146 1147 bool ret = _cm->do_yield_check(worker_i); 1148 1149 jlong sleep_time_ms; 1150 if (!_cm->has_aborted() && the_task->has_aborted()) { 1151 sleep_time_ms = 1152 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1153 ConcurrentGCThread::stsLeave(); 1154 os::sleep(Thread::current(), sleep_time_ms, false); 1155 ConcurrentGCThread::stsJoin(); 1156 } 1157 double end_time2_sec = os::elapsedTime(); 1158 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1159 1160 #if 0 1161 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1162 "overhead %1.4lf", 1163 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1164 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1165 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1166 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1167 #endif 1168 } while (!_cm->has_aborted() && the_task->has_aborted()); 1169 } 1170 the_task->record_end_time(); 1171 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1172 1173 ConcurrentGCThread::stsLeave(); 1174 1175 double end_vtime = os::elapsedVTime(); 1176 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1177 } 1178 1179 CMConcurrentMarkingTask(ConcurrentMark* cm, 1180 ConcurrentMarkThread* cmt) : 1181 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1182 1183 ~CMConcurrentMarkingTask() { } 1184 }; 1185 1186 void ConcurrentMark::markFromRoots() { 1187 // we might be tempted to assert that: 1188 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1189 // "inconsistent argument?"); 1190 // However that wouldn't be right, because it's possible that 1191 // a safepoint is indeed in progress as a younger generation 1192 // stop-the-world GC happens even as we mark in this generation. 1193 1194 _restart_for_overflow = false; 1195 1196 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1197 force_overflow_conc()->init(); 1198 set_phase(active_workers, true /* concurrent */); 1199 1200 CMConcurrentMarkingTask markingTask(this, cmThread()); 1201 if (parallel_marking_threads() > 0) { 1202 _parallel_workers->run_task(&markingTask); 1203 } else { 1204 markingTask.work(0); 1205 } 1206 print_stats(); 1207 } 1208 1209 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1210 // world is stopped at this checkpoint 1211 assert(SafepointSynchronize::is_at_safepoint(), 1212 "world should be stopped"); 1213 1214 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1215 1216 // If a full collection has happened, we shouldn't do this. 1217 if (has_aborted()) { 1218 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1219 return; 1220 } 1221 1222 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1223 1224 if (VerifyDuringGC) { 1225 HandleMark hm; // handle scope 1226 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1227 Universe::heap()->prepare_for_verify(); 1228 Universe::verify(/* allow dirty */ true, 1229 /* silent */ false, 1230 /* option */ VerifyOption_G1UsePrevMarking); 1231 } 1232 1233 G1CollectorPolicy* g1p = g1h->g1_policy(); 1234 g1p->record_concurrent_mark_remark_start(); 1235 1236 double start = os::elapsedTime(); 1237 1238 checkpointRootsFinalWork(); 1239 1240 double mark_work_end = os::elapsedTime(); 1241 1242 weakRefsWork(clear_all_soft_refs); 1243 1244 if (has_overflown()) { 1245 // Oops. We overflowed. Restart concurrent marking. 1246 _restart_for_overflow = true; 1247 // Clear the flag. We do not need it any more. 1248 clear_has_overflown(); 1249 if (G1TraceMarkStackOverflow) { 1250 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1251 } 1252 } else { 1253 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1254 // We're done with marking. 1255 // This is the end of the marking cycle, we're expected all 1256 // threads to have SATB queues with active set to true. 1257 satb_mq_set.set_active_all_threads(false, /* new active value */ 1258 true /* expected_active */); 1259 1260 if (VerifyDuringGC) { 1261 HandleMark hm; // handle scope 1262 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1263 Universe::heap()->prepare_for_verify(); 1264 Universe::verify(/* allow dirty */ true, 1265 /* silent */ false, 1266 /* option */ VerifyOption_G1UseNextMarking); 1267 } 1268 assert(!restart_for_overflow(), "sanity"); 1269 } 1270 1271 // Reset the marking state if marking completed 1272 if (!restart_for_overflow()) { 1273 set_non_marking_state(); 1274 } 1275 1276 #if VERIFY_OBJS_PROCESSED 1277 _scan_obj_cl.objs_processed = 0; 1278 ThreadLocalObjQueue::objs_enqueued = 0; 1279 #endif 1280 1281 // Statistics 1282 double now = os::elapsedTime(); 1283 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1284 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1285 _remark_times.add((now - start) * 1000.0); 1286 1287 g1p->record_concurrent_mark_remark_end(); 1288 } 1289 1290 #define CARD_BM_TEST_MODE 0 1291 1292 class CalcLiveObjectsClosure: public HeapRegionClosure { 1293 1294 CMBitMapRO* _bm; 1295 ConcurrentMark* _cm; 1296 bool _changed; 1297 bool _yield; 1298 size_t _words_done; 1299 size_t _tot_live; 1300 size_t _tot_used; 1301 size_t _regions_done; 1302 double _start_vtime_sec; 1303 1304 BitMap* _region_bm; 1305 BitMap* _card_bm; 1306 intptr_t _bottom_card_num; 1307 bool _final; 1308 1309 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1310 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1311 #if CARD_BM_TEST_MODE 1312 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1313 #else 1314 _card_bm->par_at_put(i - _bottom_card_num, 1); 1315 #endif 1316 } 1317 } 1318 1319 public: 1320 CalcLiveObjectsClosure(bool final, 1321 CMBitMapRO *bm, ConcurrentMark *cm, 1322 BitMap* region_bm, BitMap* card_bm) : 1323 _bm(bm), _cm(cm), _changed(false), _yield(true), 1324 _words_done(0), _tot_live(0), _tot_used(0), 1325 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1326 _regions_done(0), _start_vtime_sec(0.0) 1327 { 1328 _bottom_card_num = 1329 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1330 CardTableModRefBS::card_shift); 1331 } 1332 1333 // It takes a region that's not empty (i.e., it has at least one 1334 // live object in it and sets its corresponding bit on the region 1335 // bitmap to 1. If the region is "starts humongous" it will also set 1336 // to 1 the bits on the region bitmap that correspond to its 1337 // associated "continues humongous" regions. 1338 void set_bit_for_region(HeapRegion* hr) { 1339 assert(!hr->continuesHumongous(), "should have filtered those out"); 1340 1341 size_t index = hr->hrs_index(); 1342 if (!hr->startsHumongous()) { 1343 // Normal (non-humongous) case: just set the bit. 1344 _region_bm->par_at_put((BitMap::idx_t) index, true); 1345 } else { 1346 // Starts humongous case: calculate how many regions are part of 1347 // this humongous region and then set the bit range. It might 1348 // have been a bit more efficient to look at the object that 1349 // spans these humongous regions to calculate their number from 1350 // the object's size. However, it's a good idea to calculate 1351 // this based on the metadata itself, and not the region 1352 // contents, so that this code is not aware of what goes into 1353 // the humongous regions (in case this changes in the future). 1354 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1355 size_t end_index = index + 1; 1356 while (end_index < g1h->n_regions()) { 1357 HeapRegion* chr = g1h->region_at(end_index); 1358 if (!chr->continuesHumongous()) break; 1359 end_index += 1; 1360 } 1361 _region_bm->par_at_put_range((BitMap::idx_t) index, 1362 (BitMap::idx_t) end_index, true); 1363 } 1364 } 1365 1366 bool doHeapRegion(HeapRegion* hr) { 1367 if (!_final && _regions_done == 0) { 1368 _start_vtime_sec = os::elapsedVTime(); 1369 } 1370 1371 if (hr->continuesHumongous()) { 1372 // We will ignore these here and process them when their 1373 // associated "starts humongous" region is processed (see 1374 // set_bit_for_heap_region()). Note that we cannot rely on their 1375 // associated "starts humongous" region to have their bit set to 1376 // 1 since, due to the region chunking in the parallel region 1377 // iteration, a "continues humongous" region might be visited 1378 // before its associated "starts humongous". 1379 return false; 1380 } 1381 1382 HeapWord* nextTop = hr->next_top_at_mark_start(); 1383 HeapWord* start = hr->top_at_conc_mark_count(); 1384 assert(hr->bottom() <= start && start <= hr->end() && 1385 hr->bottom() <= nextTop && nextTop <= hr->end() && 1386 start <= nextTop, 1387 "Preconditions."); 1388 // Otherwise, record the number of word's we'll examine. 1389 size_t words_done = (nextTop - start); 1390 // Find the first marked object at or after "start". 1391 start = _bm->getNextMarkedWordAddress(start, nextTop); 1392 size_t marked_bytes = 0; 1393 1394 // Below, the term "card num" means the result of shifting an address 1395 // by the card shift -- address 0 corresponds to card number 0. One 1396 // must subtract the card num of the bottom of the heap to obtain a 1397 // card table index. 1398 // The first card num of the sequence of live cards currently being 1399 // constructed. -1 ==> no sequence. 1400 intptr_t start_card_num = -1; 1401 // The last card num of the sequence of live cards currently being 1402 // constructed. -1 ==> no sequence. 1403 intptr_t last_card_num = -1; 1404 1405 while (start < nextTop) { 1406 if (_yield && _cm->do_yield_check()) { 1407 // We yielded. It might be for a full collection, in which case 1408 // all bets are off; terminate the traversal. 1409 if (_cm->has_aborted()) { 1410 _changed = false; 1411 return true; 1412 } else { 1413 // Otherwise, it might be a collection pause, and the region 1414 // we're looking at might be in the collection set. We'll 1415 // abandon this region. 1416 return false; 1417 } 1418 } 1419 oop obj = oop(start); 1420 int obj_sz = obj->size(); 1421 // The card num of the start of the current object. 1422 intptr_t obj_card_num = 1423 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1424 1425 HeapWord* obj_last = start + obj_sz - 1; 1426 intptr_t obj_last_card_num = 1427 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1428 1429 if (obj_card_num != last_card_num) { 1430 if (start_card_num == -1) { 1431 assert(last_card_num == -1, "Both or neither."); 1432 start_card_num = obj_card_num; 1433 } else { 1434 assert(last_card_num != -1, "Both or neither."); 1435 assert(obj_card_num >= last_card_num, "Inv"); 1436 if ((obj_card_num - last_card_num) > 1) { 1437 // Mark the last run, and start a new one. 1438 mark_card_num_range(start_card_num, last_card_num); 1439 start_card_num = obj_card_num; 1440 } 1441 } 1442 #if CARD_BM_TEST_MODE 1443 /* 1444 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1445 obj_card_num - _bottom_card_num, 1446 obj_last_card_num - _bottom_card_num); 1447 */ 1448 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1449 _card_bm->par_at_put(j - _bottom_card_num, 1); 1450 } 1451 #endif 1452 } 1453 // In any case, we set the last card num. 1454 last_card_num = obj_last_card_num; 1455 1456 marked_bytes += (size_t)obj_sz * HeapWordSize; 1457 // Find the next marked object after this one. 1458 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1459 _changed = true; 1460 } 1461 // Handle the last range, if any. 1462 if (start_card_num != -1) { 1463 mark_card_num_range(start_card_num, last_card_num); 1464 } 1465 if (_final) { 1466 // Mark the allocated-since-marking portion... 1467 HeapWord* tp = hr->top(); 1468 if (nextTop < tp) { 1469 start_card_num = 1470 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1471 last_card_num = 1472 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1473 mark_card_num_range(start_card_num, last_card_num); 1474 // This definitely means the region has live objects. 1475 set_bit_for_region(hr); 1476 } 1477 } 1478 1479 hr->add_to_marked_bytes(marked_bytes); 1480 // Update the live region bitmap. 1481 if (marked_bytes > 0) { 1482 set_bit_for_region(hr); 1483 } 1484 hr->set_top_at_conc_mark_count(nextTop); 1485 _tot_live += hr->next_live_bytes(); 1486 _tot_used += hr->used(); 1487 _words_done = words_done; 1488 1489 if (!_final) { 1490 ++_regions_done; 1491 if (_regions_done % 10 == 0) { 1492 double end_vtime_sec = os::elapsedVTime(); 1493 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1494 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1495 jlong sleep_time_ms = 1496 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1497 os::sleep(Thread::current(), sleep_time_ms, false); 1498 _start_vtime_sec = end_vtime_sec; 1499 } 1500 } 1501 } 1502 1503 return false; 1504 } 1505 1506 bool changed() { return _changed; } 1507 void reset() { _changed = false; _words_done = 0; } 1508 void no_yield() { _yield = false; } 1509 size_t words_done() { return _words_done; } 1510 size_t tot_live() { return _tot_live; } 1511 size_t tot_used() { return _tot_used; } 1512 }; 1513 1514 1515 void ConcurrentMark::calcDesiredRegions() { 1516 _region_bm.clear(); 1517 _card_bm.clear(); 1518 CalcLiveObjectsClosure calccl(false /*final*/, 1519 nextMarkBitMap(), this, 1520 &_region_bm, &_card_bm); 1521 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1522 g1h->heap_region_iterate(&calccl); 1523 1524 do { 1525 calccl.reset(); 1526 g1h->heap_region_iterate(&calccl); 1527 } while (calccl.changed()); 1528 } 1529 1530 class G1ParFinalCountTask: public AbstractGangTask { 1531 protected: 1532 G1CollectedHeap* _g1h; 1533 CMBitMap* _bm; 1534 size_t _n_workers; 1535 size_t *_live_bytes; 1536 size_t *_used_bytes; 1537 BitMap* _region_bm; 1538 BitMap* _card_bm; 1539 public: 1540 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1541 BitMap* region_bm, BitMap* card_bm) 1542 : AbstractGangTask("G1 final counting"), _g1h(g1h), 1543 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) { 1544 if (ParallelGCThreads > 0) { 1545 _n_workers = _g1h->workers()->total_workers(); 1546 } else { 1547 _n_workers = 1; 1548 } 1549 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1550 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1551 } 1552 1553 ~G1ParFinalCountTask() { 1554 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1555 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1556 } 1557 1558 void work(int i) { 1559 CalcLiveObjectsClosure calccl(true /*final*/, 1560 _bm, _g1h->concurrent_mark(), 1561 _region_bm, _card_bm); 1562 calccl.no_yield(); 1563 if (G1CollectedHeap::use_parallel_gc_threads()) { 1564 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1565 HeapRegion::FinalCountClaimValue); 1566 } else { 1567 _g1h->heap_region_iterate(&calccl); 1568 } 1569 assert(calccl.complete(), "Shouldn't have yielded!"); 1570 1571 assert((size_t) i < _n_workers, "invariant"); 1572 _live_bytes[i] = calccl.tot_live(); 1573 _used_bytes[i] = calccl.tot_used(); 1574 } 1575 size_t live_bytes() { 1576 size_t live_bytes = 0; 1577 for (size_t i = 0; i < _n_workers; ++i) 1578 live_bytes += _live_bytes[i]; 1579 return live_bytes; 1580 } 1581 size_t used_bytes() { 1582 size_t used_bytes = 0; 1583 for (size_t i = 0; i < _n_workers; ++i) 1584 used_bytes += _used_bytes[i]; 1585 return used_bytes; 1586 } 1587 }; 1588 1589 class G1ParNoteEndTask; 1590 1591 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1592 G1CollectedHeap* _g1; 1593 int _worker_num; 1594 size_t _max_live_bytes; 1595 size_t _regions_claimed; 1596 size_t _freed_bytes; 1597 FreeRegionList* _local_cleanup_list; 1598 HumongousRegionSet* _humongous_proxy_set; 1599 HRRSCleanupTask* _hrrs_cleanup_task; 1600 double _claimed_region_time; 1601 double _max_region_time; 1602 1603 public: 1604 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1605 int worker_num, 1606 FreeRegionList* local_cleanup_list, 1607 HumongousRegionSet* humongous_proxy_set, 1608 HRRSCleanupTask* hrrs_cleanup_task); 1609 size_t freed_bytes() { return _freed_bytes; } 1610 1611 bool doHeapRegion(HeapRegion *r); 1612 1613 size_t max_live_bytes() { return _max_live_bytes; } 1614 size_t regions_claimed() { return _regions_claimed; } 1615 double claimed_region_time_sec() { return _claimed_region_time; } 1616 double max_region_time_sec() { return _max_region_time; } 1617 }; 1618 1619 class G1ParNoteEndTask: public AbstractGangTask { 1620 friend class G1NoteEndOfConcMarkClosure; 1621 1622 protected: 1623 G1CollectedHeap* _g1h; 1624 size_t _max_live_bytes; 1625 size_t _freed_bytes; 1626 FreeRegionList* _cleanup_list; 1627 1628 public: 1629 G1ParNoteEndTask(G1CollectedHeap* g1h, 1630 FreeRegionList* cleanup_list) : 1631 AbstractGangTask("G1 note end"), _g1h(g1h), 1632 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1633 1634 void work(int i) { 1635 double start = os::elapsedTime(); 1636 FreeRegionList local_cleanup_list("Local Cleanup List"); 1637 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1638 HRRSCleanupTask hrrs_cleanup_task; 1639 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1640 &humongous_proxy_set, 1641 &hrrs_cleanup_task); 1642 if (G1CollectedHeap::use_parallel_gc_threads()) { 1643 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1644 HeapRegion::NoteEndClaimValue); 1645 } else { 1646 _g1h->heap_region_iterate(&g1_note_end); 1647 } 1648 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1649 1650 // Now update the lists 1651 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1652 NULL /* free_list */, 1653 &humongous_proxy_set, 1654 true /* par */); 1655 { 1656 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1657 _max_live_bytes += g1_note_end.max_live_bytes(); 1658 _freed_bytes += g1_note_end.freed_bytes(); 1659 1660 // If we iterate over the global cleanup list at the end of 1661 // cleanup to do this printing we will not guarantee to only 1662 // generate output for the newly-reclaimed regions (the list 1663 // might not be empty at the beginning of cleanup; we might 1664 // still be working on its previous contents). So we do the 1665 // printing here, before we append the new regions to the global 1666 // cleanup list. 1667 1668 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1669 if (hr_printer->is_active()) { 1670 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1671 while (iter.more_available()) { 1672 HeapRegion* hr = iter.get_next(); 1673 hr_printer->cleanup(hr); 1674 } 1675 } 1676 1677 _cleanup_list->add_as_tail(&local_cleanup_list); 1678 assert(local_cleanup_list.is_empty(), "post-condition"); 1679 1680 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1681 } 1682 double end = os::elapsedTime(); 1683 if (G1PrintParCleanupStats) { 1684 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1685 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1686 i, start, end, (end-start)*1000.0, 1687 g1_note_end.regions_claimed(), 1688 g1_note_end.claimed_region_time_sec()*1000.0, 1689 g1_note_end.max_region_time_sec()*1000.0); 1690 } 1691 } 1692 size_t max_live_bytes() { return _max_live_bytes; } 1693 size_t freed_bytes() { return _freed_bytes; } 1694 }; 1695 1696 class G1ParScrubRemSetTask: public AbstractGangTask { 1697 protected: 1698 G1RemSet* _g1rs; 1699 BitMap* _region_bm; 1700 BitMap* _card_bm; 1701 public: 1702 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1703 BitMap* region_bm, BitMap* card_bm) : 1704 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1705 _region_bm(region_bm), _card_bm(card_bm) 1706 {} 1707 1708 void work(int i) { 1709 if (G1CollectedHeap::use_parallel_gc_threads()) { 1710 _g1rs->scrub_par(_region_bm, _card_bm, i, 1711 HeapRegion::ScrubRemSetClaimValue); 1712 } else { 1713 _g1rs->scrub(_region_bm, _card_bm); 1714 } 1715 } 1716 1717 }; 1718 1719 G1NoteEndOfConcMarkClosure:: 1720 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1721 int worker_num, 1722 FreeRegionList* local_cleanup_list, 1723 HumongousRegionSet* humongous_proxy_set, 1724 HRRSCleanupTask* hrrs_cleanup_task) 1725 : _g1(g1), _worker_num(worker_num), 1726 _max_live_bytes(0), _regions_claimed(0), 1727 _freed_bytes(0), 1728 _claimed_region_time(0.0), _max_region_time(0.0), 1729 _local_cleanup_list(local_cleanup_list), 1730 _humongous_proxy_set(humongous_proxy_set), 1731 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1732 1733 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { 1734 // We use a claim value of zero here because all regions 1735 // were claimed with value 1 in the FinalCount task. 1736 hr->reset_gc_time_stamp(); 1737 if (!hr->continuesHumongous()) { 1738 double start = os::elapsedTime(); 1739 _regions_claimed++; 1740 hr->note_end_of_marking(); 1741 _max_live_bytes += hr->max_live_bytes(); 1742 _g1->free_region_if_empty(hr, 1743 &_freed_bytes, 1744 _local_cleanup_list, 1745 _humongous_proxy_set, 1746 _hrrs_cleanup_task, 1747 true /* par */); 1748 double region_time = (os::elapsedTime() - start); 1749 _claimed_region_time += region_time; 1750 if (region_time > _max_region_time) { 1751 _max_region_time = region_time; 1752 } 1753 } 1754 return false; 1755 } 1756 1757 void ConcurrentMark::cleanup() { 1758 // world is stopped at this checkpoint 1759 assert(SafepointSynchronize::is_at_safepoint(), 1760 "world should be stopped"); 1761 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1762 1763 // If a full collection has happened, we shouldn't do this. 1764 if (has_aborted()) { 1765 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1766 return; 1767 } 1768 1769 g1h->verify_region_sets_optional(); 1770 1771 if (VerifyDuringGC) { 1772 HandleMark hm; // handle scope 1773 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1774 Universe::heap()->prepare_for_verify(); 1775 Universe::verify(/* allow dirty */ true, 1776 /* silent */ false, 1777 /* option */ VerifyOption_G1UsePrevMarking); 1778 } 1779 1780 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1781 g1p->record_concurrent_mark_cleanup_start(); 1782 1783 double start = os::elapsedTime(); 1784 1785 HeapRegionRemSet::reset_for_cleanup_tasks(); 1786 1787 // Do counting once more with the world stopped for good measure. 1788 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1789 &_region_bm, &_card_bm); 1790 if (G1CollectedHeap::use_parallel_gc_threads()) { 1791 assert(g1h->check_heap_region_claim_values( 1792 HeapRegion::InitialClaimValue), 1793 "sanity check"); 1794 1795 int n_workers = g1h->workers()->total_workers(); 1796 g1h->set_par_threads(n_workers); 1797 g1h->workers()->run_task(&g1_par_count_task); 1798 g1h->set_par_threads(0); 1799 1800 assert(g1h->check_heap_region_claim_values( 1801 HeapRegion::FinalCountClaimValue), 1802 "sanity check"); 1803 } else { 1804 g1_par_count_task.work(0); 1805 } 1806 1807 size_t known_garbage_bytes = 1808 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1809 #if 0 1810 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1811 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1812 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1813 (double) known_garbage_bytes / (double) (1024 * 1024)); 1814 #endif // 0 1815 g1p->set_known_garbage_bytes(known_garbage_bytes); 1816 1817 size_t start_used_bytes = g1h->used(); 1818 _at_least_one_mark_complete = true; 1819 g1h->set_marking_complete(); 1820 1821 double count_end = os::elapsedTime(); 1822 double this_final_counting_time = (count_end - start); 1823 if (G1PrintParCleanupStats) { 1824 gclog_or_tty->print_cr("Cleanup:"); 1825 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1826 this_final_counting_time*1000.0); 1827 } 1828 _total_counting_time += this_final_counting_time; 1829 1830 if (G1PrintRegionLivenessInfo) { 1831 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1832 _g1h->heap_region_iterate(&cl); 1833 } 1834 1835 // Install newly created mark bitMap as "prev". 1836 swapMarkBitMaps(); 1837 1838 g1h->reset_gc_time_stamp(); 1839 1840 // Note end of marking in all heap regions. 1841 double note_end_start = os::elapsedTime(); 1842 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1843 if (G1CollectedHeap::use_parallel_gc_threads()) { 1844 int n_workers = g1h->workers()->total_workers(); 1845 g1h->set_par_threads(n_workers); 1846 g1h->workers()->run_task(&g1_par_note_end_task); 1847 g1h->set_par_threads(0); 1848 1849 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1850 "sanity check"); 1851 } else { 1852 g1_par_note_end_task.work(0); 1853 } 1854 1855 if (!cleanup_list_is_empty()) { 1856 // The cleanup list is not empty, so we'll have to process it 1857 // concurrently. Notify anyone else that might be wanting free 1858 // regions that there will be more free regions coming soon. 1859 g1h->set_free_regions_coming(); 1860 } 1861 double note_end_end = os::elapsedTime(); 1862 if (G1PrintParCleanupStats) { 1863 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1864 (note_end_end - note_end_start)*1000.0); 1865 } 1866 1867 1868 // call below, since it affects the metric by which we sort the heap 1869 // regions. 1870 if (G1ScrubRemSets) { 1871 double rs_scrub_start = os::elapsedTime(); 1872 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1873 if (G1CollectedHeap::use_parallel_gc_threads()) { 1874 int n_workers = g1h->workers()->total_workers(); 1875 g1h->set_par_threads(n_workers); 1876 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1877 g1h->set_par_threads(0); 1878 1879 assert(g1h->check_heap_region_claim_values( 1880 HeapRegion::ScrubRemSetClaimValue), 1881 "sanity check"); 1882 } else { 1883 g1_par_scrub_rs_task.work(0); 1884 } 1885 1886 double rs_scrub_end = os::elapsedTime(); 1887 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1888 _total_rs_scrub_time += this_rs_scrub_time; 1889 } 1890 1891 // this will also free any regions totally full of garbage objects, 1892 // and sort the regions. 1893 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1894 g1_par_note_end_task.freed_bytes(), 1895 g1_par_note_end_task.max_live_bytes()); 1896 1897 // Statistics. 1898 double end = os::elapsedTime(); 1899 _cleanup_times.add((end - start) * 1000.0); 1900 1901 // G1CollectedHeap::heap()->print(); 1902 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1903 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1904 1905 if (PrintGC || PrintGCDetails) { 1906 g1h->print_size_transition(gclog_or_tty, 1907 start_used_bytes, 1908 g1h->used(), 1909 g1h->capacity()); 1910 } 1911 1912 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1913 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1914 1915 // Clean up will have freed any regions completely full of garbage. 1916 // Update the soft reference policy with the new heap occupancy. 1917 Universe::update_heap_info_at_gc(); 1918 1919 // We need to make this be a "collection" so any collection pause that 1920 // races with it goes around and waits for completeCleanup to finish. 1921 g1h->increment_total_collections(); 1922 1923 if (VerifyDuringGC) { 1924 HandleMark hm; // handle scope 1925 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1926 Universe::heap()->prepare_for_verify(); 1927 Universe::verify(/* allow dirty */ true, 1928 /* silent */ false, 1929 /* option */ VerifyOption_G1UsePrevMarking); 1930 } 1931 1932 g1h->verify_region_sets_optional(); 1933 } 1934 1935 void ConcurrentMark::completeCleanup() { 1936 if (has_aborted()) return; 1937 1938 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1939 1940 _cleanup_list.verify_optional(); 1941 FreeRegionList tmp_free_list("Tmp Free List"); 1942 1943 if (G1ConcRegionFreeingVerbose) { 1944 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1945 "cleanup list has "SIZE_FORMAT" entries", 1946 _cleanup_list.length()); 1947 } 1948 1949 // Noone else should be accessing the _cleanup_list at this point, 1950 // so it's not necessary to take any locks 1951 while (!_cleanup_list.is_empty()) { 1952 HeapRegion* hr = _cleanup_list.remove_head(); 1953 assert(hr != NULL, "the list was not empty"); 1954 hr->par_clear(); 1955 tmp_free_list.add_as_tail(hr); 1956 1957 // Instead of adding one region at a time to the secondary_free_list, 1958 // we accumulate them in the local list and move them a few at a 1959 // time. This also cuts down on the number of notify_all() calls 1960 // we do during this process. We'll also append the local list when 1961 // _cleanup_list is empty (which means we just removed the last 1962 // region from the _cleanup_list). 1963 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1964 _cleanup_list.is_empty()) { 1965 if (G1ConcRegionFreeingVerbose) { 1966 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1967 "appending "SIZE_FORMAT" entries to the " 1968 "secondary_free_list, clean list still has " 1969 SIZE_FORMAT" entries", 1970 tmp_free_list.length(), 1971 _cleanup_list.length()); 1972 } 1973 1974 { 1975 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1976 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1977 SecondaryFreeList_lock->notify_all(); 1978 } 1979 1980 if (G1StressConcRegionFreeing) { 1981 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1982 os::sleep(Thread::current(), (jlong) 1, false); 1983 } 1984 } 1985 } 1986 } 1987 assert(tmp_free_list.is_empty(), "post-condition"); 1988 } 1989 1990 // Support closures for reference procssing in G1 1991 1992 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1993 HeapWord* addr = (HeapWord*)obj; 1994 return addr != NULL && 1995 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1996 } 1997 1998 class G1CMKeepAliveClosure: public OopClosure { 1999 G1CollectedHeap* _g1; 2000 ConcurrentMark* _cm; 2001 CMBitMap* _bitMap; 2002 public: 2003 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 2004 CMBitMap* bitMap) : 2005 _g1(g1), _cm(cm), 2006 _bitMap(bitMap) {} 2007 2008 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2009 virtual void do_oop( oop* p) { do_oop_work(p); } 2010 2011 template <class T> void do_oop_work(T* p) { 2012 oop obj = oopDesc::load_decode_heap_oop(p); 2013 HeapWord* addr = (HeapWord*)obj; 2014 2015 if (_cm->verbose_high()) { 2016 gclog_or_tty->print_cr("\t[0] we're looking at location " 2017 "*"PTR_FORMAT" = "PTR_FORMAT, 2018 p, (void*) obj); 2019 } 2020 2021 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2022 _bitMap->mark(addr); 2023 _cm->mark_stack_push(obj); 2024 } 2025 } 2026 }; 2027 2028 class G1CMDrainMarkingStackClosure: public VoidClosure { 2029 CMMarkStack* _markStack; 2030 CMBitMap* _bitMap; 2031 G1CMKeepAliveClosure* _oopClosure; 2032 public: 2033 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 2034 G1CMKeepAliveClosure* oopClosure) : 2035 _bitMap(bitMap), 2036 _markStack(markStack), 2037 _oopClosure(oopClosure) 2038 {} 2039 2040 void do_void() { 2041 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 2042 } 2043 }; 2044 2045 // 'Keep Alive' closure used by parallel reference processing. 2046 // An instance of this closure is used in the parallel reference processing 2047 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2048 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2049 // placed on to discovered ref lists once so we can mark and push with no 2050 // need to check whether the object has already been marked. Using the 2051 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2052 // operating on the global mark stack. This means that an individual 2053 // worker would be doing lock-free pushes while it processes its own 2054 // discovered ref list followed by drain call. If the discovered ref lists 2055 // are unbalanced then this could cause interference with the other 2056 // workers. Using a CMTask (and its embedded local data structures) 2057 // avoids that potential interference. 2058 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2059 ConcurrentMark* _cm; 2060 CMTask* _task; 2061 CMBitMap* _bitMap; 2062 int _ref_counter_limit; 2063 int _ref_counter; 2064 public: 2065 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 2066 CMTask* task, 2067 CMBitMap* bitMap) : 2068 _cm(cm), _task(task), _bitMap(bitMap), 2069 _ref_counter_limit(G1RefProcDrainInterval) 2070 { 2071 assert(_ref_counter_limit > 0, "sanity"); 2072 _ref_counter = _ref_counter_limit; 2073 } 2074 2075 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2076 virtual void do_oop( oop* p) { do_oop_work(p); } 2077 2078 template <class T> void do_oop_work(T* p) { 2079 if (!_cm->has_overflown()) { 2080 oop obj = oopDesc::load_decode_heap_oop(p); 2081 if (_cm->verbose_high()) { 2082 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2083 "*"PTR_FORMAT" = "PTR_FORMAT, 2084 _task->task_id(), p, (void*) obj); 2085 } 2086 2087 _task->deal_with_reference(obj); 2088 _ref_counter--; 2089 2090 if (_ref_counter == 0) { 2091 // We have dealt with _ref_counter_limit references, pushing them and objects 2092 // reachable from them on to the local stack (and possibly the global stack). 2093 // Call do_marking_step() to process these entries. We call the routine in a 2094 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2095 // with the entries that we've pushed as a result of the deal_with_reference 2096 // calls above) or we overflow. 2097 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2098 // while there may still be some work to do. (See the comment at the 2099 // beginning of CMTask::do_marking_step() for those conditions - one of which 2100 // is reaching the specified time target.) It is only when 2101 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2102 // that the marking has completed. 2103 do { 2104 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2105 _task->do_marking_step(mark_step_duration_ms, 2106 false /* do_stealing */, 2107 false /* do_termination */); 2108 } while (_task->has_aborted() && !_cm->has_overflown()); 2109 _ref_counter = _ref_counter_limit; 2110 } 2111 } else { 2112 if (_cm->verbose_high()) { 2113 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2114 } 2115 } 2116 } 2117 }; 2118 2119 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2120 ConcurrentMark* _cm; 2121 CMTask* _task; 2122 public: 2123 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2124 _cm(cm), _task(task) 2125 {} 2126 2127 void do_void() { 2128 do { 2129 if (_cm->verbose_high()) { 2130 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2131 _task->task_id()); 2132 } 2133 2134 // We call CMTask::do_marking_step() to completely drain the local and 2135 // global marking stacks. The routine is called in a loop, which we'll 2136 // exit if there's nothing more to do (i.e. we'completely drained the 2137 // entries that were pushed as a result of applying the 2138 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2139 // lists above) or we overflow the global marking stack. 2140 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2141 // while there may still be some work to do. (See the comment at the 2142 // beginning of CMTask::do_marking_step() for those conditions - one of which 2143 // is reaching the specified time target.) It is only when 2144 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2145 // that the marking has completed. 2146 2147 _task->do_marking_step(1000000000.0 /* something very large */, 2148 true /* do_stealing */, 2149 true /* do_termination */); 2150 } while (_task->has_aborted() && !_cm->has_overflown()); 2151 } 2152 }; 2153 2154 // Implementation of AbstractRefProcTaskExecutor for parallel 2155 // reference processing at the end of G1 concurrent marking 2156 2157 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2158 private: 2159 G1CollectedHeap* _g1h; 2160 ConcurrentMark* _cm; 2161 CMBitMap* _bitmap; 2162 WorkGang* _workers; 2163 int _active_workers; 2164 2165 public: 2166 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2167 ConcurrentMark* cm, 2168 CMBitMap* bitmap, 2169 WorkGang* workers, 2170 int n_workers) : 2171 _g1h(g1h), _cm(cm), _bitmap(bitmap), 2172 _workers(workers), _active_workers(n_workers) 2173 { } 2174 2175 // Executes the given task using concurrent marking worker threads. 2176 virtual void execute(ProcessTask& task); 2177 virtual void execute(EnqueueTask& task); 2178 }; 2179 2180 class G1CMRefProcTaskProxy: public AbstractGangTask { 2181 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2182 ProcessTask& _proc_task; 2183 G1CollectedHeap* _g1h; 2184 ConcurrentMark* _cm; 2185 CMBitMap* _bitmap; 2186 2187 public: 2188 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2189 G1CollectedHeap* g1h, 2190 ConcurrentMark* cm, 2191 CMBitMap* bitmap) : 2192 AbstractGangTask("Process reference objects in parallel"), 2193 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 2194 {} 2195 2196 virtual void work(int i) { 2197 CMTask* marking_task = _cm->task(i); 2198 G1CMIsAliveClosure g1_is_alive(_g1h); 2199 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 2200 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2201 2202 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2203 } 2204 }; 2205 2206 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2207 assert(_workers != NULL, "Need parallel worker threads."); 2208 2209 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 2210 2211 // We need to reset the phase for each task execution so that 2212 // the termination protocol of CMTask::do_marking_step works. 2213 _cm->set_phase(_active_workers, false /* concurrent */); 2214 _g1h->set_par_threads(_active_workers); 2215 _workers->run_task(&proc_task_proxy); 2216 _g1h->set_par_threads(0); 2217 } 2218 2219 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2220 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2221 EnqueueTask& _enq_task; 2222 2223 public: 2224 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2225 AbstractGangTask("Enqueue reference objects in parallel"), 2226 _enq_task(enq_task) 2227 { } 2228 2229 virtual void work(int i) { 2230 _enq_task.work(i); 2231 } 2232 }; 2233 2234 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2235 assert(_workers != NULL, "Need parallel worker threads."); 2236 2237 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2238 2239 _g1h->set_par_threads(_active_workers); 2240 _workers->run_task(&enq_task_proxy); 2241 _g1h->set_par_threads(0); 2242 } 2243 2244 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2245 ResourceMark rm; 2246 HandleMark hm; 2247 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2248 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2249 2250 // See the comment in G1CollectedHeap::ref_processing_init() 2251 // about how reference processing currently works in G1. 2252 2253 // Process weak references. 2254 rp->setup_policy(clear_all_soft_refs); 2255 assert(_markStack.isEmpty(), "mark stack should be empty"); 2256 2257 G1CMIsAliveClosure g1_is_alive(g1h); 2258 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2259 G1CMDrainMarkingStackClosure 2260 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2261 // We use the work gang from the G1CollectedHeap and we utilize all 2262 // the worker threads. 2263 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 2264 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2265 2266 G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 2267 g1h->workers(), active_workers); 2268 2269 2270 if (rp->processing_is_mt()) { 2271 // Set the degree of MT here. If the discovery is done MT, there 2272 // may have been a different number of threads doing the discovery 2273 // and a different number of discovered lists may have Ref objects. 2274 // That is OK as long as the Reference lists are balanced (see 2275 // balance_all_queues() and balance_queues()). 2276 rp->set_active_mt_degree(active_workers); 2277 2278 rp->process_discovered_references(&g1_is_alive, 2279 &g1_keep_alive, 2280 &g1_drain_mark_stack, 2281 &par_task_executor); 2282 2283 // The work routines of the parallel keep_alive and drain_marking_stack 2284 // will set the has_overflown flag if we overflow the global marking 2285 // stack. 2286 } else { 2287 rp->process_discovered_references(&g1_is_alive, 2288 &g1_keep_alive, 2289 &g1_drain_mark_stack, 2290 NULL); 2291 2292 } 2293 2294 assert(_markStack.overflow() || _markStack.isEmpty(), 2295 "mark stack should be empty (unless it overflowed)"); 2296 if (_markStack.overflow()) { 2297 // Should have been done already when we tried to push an 2298 // entry on to the global mark stack. But let's do it again. 2299 set_has_overflown(); 2300 } 2301 2302 if (rp->processing_is_mt()) { 2303 assert(rp->num_q() == active_workers, "why not"); 2304 rp->enqueue_discovered_references(&par_task_executor); 2305 } else { 2306 rp->enqueue_discovered_references(); 2307 } 2308 2309 rp->verify_no_references_recorded(); 2310 assert(!rp->discovery_enabled(), "Post condition"); 2311 2312 // Now clean up stale oops in StringTable 2313 StringTable::unlink(&g1_is_alive); 2314 // Clean up unreferenced symbols in symbol table. 2315 SymbolTable::unlink(); 2316 } 2317 2318 void ConcurrentMark::swapMarkBitMaps() { 2319 CMBitMapRO* temp = _prevMarkBitMap; 2320 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2321 _nextMarkBitMap = (CMBitMap*) temp; 2322 } 2323 2324 class CMRemarkTask: public AbstractGangTask { 2325 private: 2326 ConcurrentMark *_cm; 2327 2328 public: 2329 void work(int worker_i) { 2330 // Since all available tasks are actually started, we should 2331 // only proceed if we're supposed to be actived. 2332 if ((size_t)worker_i < _cm->active_tasks()) { 2333 CMTask* task = _cm->task(worker_i); 2334 task->record_start_time(); 2335 do { 2336 task->do_marking_step(1000000000.0 /* something very large */, 2337 true /* do_stealing */, 2338 true /* do_termination */); 2339 } while (task->has_aborted() && !_cm->has_overflown()); 2340 // If we overflow, then we do not want to restart. We instead 2341 // want to abort remark and do concurrent marking again. 2342 task->record_end_time(); 2343 } 2344 } 2345 2346 CMRemarkTask(ConcurrentMark* cm) : 2347 AbstractGangTask("Par Remark"), _cm(cm) { } 2348 }; 2349 2350 void ConcurrentMark::checkpointRootsFinalWork() { 2351 ResourceMark rm; 2352 HandleMark hm; 2353 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2354 2355 g1h->ensure_parsability(false); 2356 2357 if (G1CollectedHeap::use_parallel_gc_threads()) { 2358 G1CollectedHeap::StrongRootsScope srs(g1h); 2359 // this is remark, so we'll use up all available threads 2360 int active_workers = ParallelGCThreads; 2361 set_phase(active_workers, false /* concurrent */); 2362 2363 CMRemarkTask remarkTask(this); 2364 // We will start all available threads, even if we decide that the 2365 // active_workers will be fewer. The extra ones will just bail out 2366 // immediately. 2367 int n_workers = g1h->workers()->total_workers(); 2368 g1h->set_par_threads(n_workers); 2369 g1h->workers()->run_task(&remarkTask); 2370 g1h->set_par_threads(0); 2371 } else { 2372 G1CollectedHeap::StrongRootsScope srs(g1h); 2373 // this is remark, so we'll use up all available threads 2374 int active_workers = 1; 2375 set_phase(active_workers, false /* concurrent */); 2376 2377 CMRemarkTask remarkTask(this); 2378 // We will start all available threads, even if we decide that the 2379 // active_workers will be fewer. The extra ones will just bail out 2380 // immediately. 2381 remarkTask.work(0); 2382 } 2383 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2384 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2385 2386 print_stats(); 2387 2388 #if VERIFY_OBJS_PROCESSED 2389 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2390 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2391 _scan_obj_cl.objs_processed, 2392 ThreadLocalObjQueue::objs_enqueued); 2393 guarantee(_scan_obj_cl.objs_processed == 2394 ThreadLocalObjQueue::objs_enqueued, 2395 "Different number of objs processed and enqueued."); 2396 } 2397 #endif 2398 } 2399 2400 #ifndef PRODUCT 2401 2402 class PrintReachableOopClosure: public OopClosure { 2403 private: 2404 G1CollectedHeap* _g1h; 2405 outputStream* _out; 2406 VerifyOption _vo; 2407 bool _all; 2408 2409 public: 2410 PrintReachableOopClosure(outputStream* out, 2411 VerifyOption vo, 2412 bool all) : 2413 _g1h(G1CollectedHeap::heap()), 2414 _out(out), _vo(vo), _all(all) { } 2415 2416 void do_oop(narrowOop* p) { do_oop_work(p); } 2417 void do_oop( oop* p) { do_oop_work(p); } 2418 2419 template <class T> void do_oop_work(T* p) { 2420 oop obj = oopDesc::load_decode_heap_oop(p); 2421 const char* str = NULL; 2422 const char* str2 = ""; 2423 2424 if (obj == NULL) { 2425 str = ""; 2426 } else if (!_g1h->is_in_g1_reserved(obj)) { 2427 str = " O"; 2428 } else { 2429 HeapRegion* hr = _g1h->heap_region_containing(obj); 2430 guarantee(hr != NULL, "invariant"); 2431 bool over_tams = false; 2432 bool marked = false; 2433 2434 switch (_vo) { 2435 case VerifyOption_G1UsePrevMarking: 2436 over_tams = hr->obj_allocated_since_prev_marking(obj); 2437 marked = _g1h->isMarkedPrev(obj); 2438 break; 2439 case VerifyOption_G1UseNextMarking: 2440 over_tams = hr->obj_allocated_since_next_marking(obj); 2441 marked = _g1h->isMarkedNext(obj); 2442 break; 2443 case VerifyOption_G1UseMarkWord: 2444 marked = obj->is_gc_marked(); 2445 break; 2446 default: 2447 ShouldNotReachHere(); 2448 } 2449 2450 if (over_tams) { 2451 str = " >"; 2452 if (marked) { 2453 str2 = " AND MARKED"; 2454 } 2455 } else if (marked) { 2456 str = " M"; 2457 } else { 2458 str = " NOT"; 2459 } 2460 } 2461 2462 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2463 p, (void*) obj, str, str2); 2464 } 2465 }; 2466 2467 class PrintReachableObjectClosure : public ObjectClosure { 2468 private: 2469 G1CollectedHeap* _g1h; 2470 outputStream* _out; 2471 VerifyOption _vo; 2472 bool _all; 2473 HeapRegion* _hr; 2474 2475 public: 2476 PrintReachableObjectClosure(outputStream* out, 2477 VerifyOption vo, 2478 bool all, 2479 HeapRegion* hr) : 2480 _g1h(G1CollectedHeap::heap()), 2481 _out(out), _vo(vo), _all(all), _hr(hr) { } 2482 2483 void do_object(oop o) { 2484 bool over_tams = false; 2485 bool marked = false; 2486 2487 switch (_vo) { 2488 case VerifyOption_G1UsePrevMarking: 2489 over_tams = _hr->obj_allocated_since_prev_marking(o); 2490 marked = _g1h->isMarkedPrev(o); 2491 break; 2492 case VerifyOption_G1UseNextMarking: 2493 over_tams = _hr->obj_allocated_since_next_marking(o); 2494 marked = _g1h->isMarkedNext(o); 2495 break; 2496 case VerifyOption_G1UseMarkWord: 2497 marked = o->is_gc_marked(); 2498 break; 2499 default: 2500 ShouldNotReachHere(); 2501 } 2502 bool print_it = _all || over_tams || marked; 2503 2504 if (print_it) { 2505 _out->print_cr(" "PTR_FORMAT"%s", 2506 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2507 PrintReachableOopClosure oopCl(_out, _vo, _all); 2508 o->oop_iterate(&oopCl); 2509 } 2510 } 2511 }; 2512 2513 class PrintReachableRegionClosure : public HeapRegionClosure { 2514 private: 2515 outputStream* _out; 2516 VerifyOption _vo; 2517 bool _all; 2518 2519 public: 2520 bool doHeapRegion(HeapRegion* hr) { 2521 HeapWord* b = hr->bottom(); 2522 HeapWord* e = hr->end(); 2523 HeapWord* t = hr->top(); 2524 HeapWord* p = NULL; 2525 2526 switch (_vo) { 2527 case VerifyOption_G1UsePrevMarking: 2528 p = hr->prev_top_at_mark_start(); 2529 break; 2530 case VerifyOption_G1UseNextMarking: 2531 p = hr->next_top_at_mark_start(); 2532 break; 2533 case VerifyOption_G1UseMarkWord: 2534 // When we are verifying marking using the mark word 2535 // TAMS has no relevance. 2536 assert(p == NULL, "post-condition"); 2537 break; 2538 default: 2539 ShouldNotReachHere(); 2540 } 2541 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2542 "TAMS: "PTR_FORMAT, b, e, t, p); 2543 _out->cr(); 2544 2545 HeapWord* from = b; 2546 HeapWord* to = t; 2547 2548 if (to > from) { 2549 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2550 _out->cr(); 2551 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2552 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2553 _out->cr(); 2554 } 2555 2556 return false; 2557 } 2558 2559 PrintReachableRegionClosure(outputStream* out, 2560 VerifyOption vo, 2561 bool all) : 2562 _out(out), _vo(vo), _all(all) { } 2563 }; 2564 2565 static const char* verify_option_to_tams(VerifyOption vo) { 2566 switch (vo) { 2567 case VerifyOption_G1UsePrevMarking: 2568 return "PTAMS"; 2569 case VerifyOption_G1UseNextMarking: 2570 return "NTAMS"; 2571 default: 2572 return "NONE"; 2573 } 2574 } 2575 2576 void ConcurrentMark::print_reachable(const char* str, 2577 VerifyOption vo, 2578 bool all) { 2579 gclog_or_tty->cr(); 2580 gclog_or_tty->print_cr("== Doing heap dump... "); 2581 2582 if (G1PrintReachableBaseFile == NULL) { 2583 gclog_or_tty->print_cr(" #### error: no base file defined"); 2584 return; 2585 } 2586 2587 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2588 (JVM_MAXPATHLEN - 1)) { 2589 gclog_or_tty->print_cr(" #### error: file name too long"); 2590 return; 2591 } 2592 2593 char file_name[JVM_MAXPATHLEN]; 2594 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2595 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2596 2597 fileStream fout(file_name); 2598 if (!fout.is_open()) { 2599 gclog_or_tty->print_cr(" #### error: could not open file"); 2600 return; 2601 } 2602 2603 outputStream* out = &fout; 2604 out->print_cr("-- USING %s", verify_option_to_tams(vo)); 2605 out->cr(); 2606 2607 out->print_cr("--- ITERATING OVER REGIONS"); 2608 out->cr(); 2609 PrintReachableRegionClosure rcl(out, vo, all); 2610 _g1h->heap_region_iterate(&rcl); 2611 out->cr(); 2612 2613 gclog_or_tty->print_cr(" done"); 2614 gclog_or_tty->flush(); 2615 } 2616 2617 #endif // PRODUCT 2618 2619 // This note is for drainAllSATBBuffers and the code in between. 2620 // In the future we could reuse a task to do this work during an 2621 // evacuation pause (since now tasks are not active and can be claimed 2622 // during an evacuation pause). This was a late change to the code and 2623 // is currently not being taken advantage of. 2624 2625 class CMGlobalObjectClosure : public ObjectClosure { 2626 private: 2627 ConcurrentMark* _cm; 2628 2629 public: 2630 void do_object(oop obj) { 2631 _cm->deal_with_reference(obj); 2632 } 2633 2634 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2635 }; 2636 2637 void ConcurrentMark::deal_with_reference(oop obj) { 2638 if (verbose_high()) { 2639 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2640 (void*) obj); 2641 } 2642 2643 HeapWord* objAddr = (HeapWord*) obj; 2644 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2645 if (_g1h->is_in_g1_reserved(objAddr)) { 2646 assert(obj != NULL, "null check is implicit"); 2647 if (!_nextMarkBitMap->isMarked(objAddr)) { 2648 // Only get the containing region if the object is not marked on the 2649 // bitmap (otherwise, it's a waste of time since we won't do 2650 // anything with it). 2651 HeapRegion* hr = _g1h->heap_region_containing_raw(obj); 2652 if (!hr->obj_allocated_since_next_marking(obj)) { 2653 if (verbose_high()) { 2654 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2655 "marked", (void*) obj); 2656 } 2657 2658 // we need to mark it first 2659 if (_nextMarkBitMap->parMark(objAddr)) { 2660 // No OrderAccess:store_load() is needed. It is implicit in the 2661 // CAS done in parMark(objAddr) above 2662 HeapWord* finger = _finger; 2663 if (objAddr < finger) { 2664 if (verbose_high()) { 2665 gclog_or_tty->print_cr("[global] below the global finger " 2666 "("PTR_FORMAT"), pushing it", finger); 2667 } 2668 if (!mark_stack_push(obj)) { 2669 if (verbose_low()) { 2670 gclog_or_tty->print_cr("[global] global stack overflow during " 2671 "deal_with_reference"); 2672 } 2673 } 2674 } 2675 } 2676 } 2677 } 2678 } 2679 } 2680 2681 void ConcurrentMark::drainAllSATBBuffers() { 2682 CMGlobalObjectClosure oc(this); 2683 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2684 satb_mq_set.set_closure(&oc); 2685 2686 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2687 if (verbose_medium()) { 2688 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2689 } 2690 } 2691 2692 // no need to check whether we should do this, as this is only 2693 // called during an evacuation pause 2694 satb_mq_set.iterate_closure_all_threads(); 2695 2696 satb_mq_set.set_closure(NULL); 2697 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2698 } 2699 2700 void ConcurrentMark::markPrev(oop p) { 2701 // Note we are overriding the read-only view of the prev map here, via 2702 // the cast. 2703 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2704 } 2705 2706 void ConcurrentMark::clear(oop p) { 2707 assert(p != NULL && p->is_oop(), "expected an oop"); 2708 HeapWord* addr = (HeapWord*)p; 2709 assert(addr >= _nextMarkBitMap->startWord() || 2710 addr < _nextMarkBitMap->endWord(), "in a region"); 2711 2712 _nextMarkBitMap->clear(addr); 2713 } 2714 2715 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2716 // Note we are overriding the read-only view of the prev map here, via 2717 // the cast. 2718 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2719 _nextMarkBitMap->clearRange(mr); 2720 } 2721 2722 HeapRegion* 2723 ConcurrentMark::claim_region(int task_num) { 2724 // "checkpoint" the finger 2725 HeapWord* finger = _finger; 2726 2727 // _heap_end will not change underneath our feet; it only changes at 2728 // yield points. 2729 while (finger < _heap_end) { 2730 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2731 2732 // Note on how this code handles humongous regions. In the 2733 // normal case the finger will reach the start of a "starts 2734 // humongous" (SH) region. Its end will either be the end of the 2735 // last "continues humongous" (CH) region in the sequence, or the 2736 // standard end of the SH region (if the SH is the only region in 2737 // the sequence). That way claim_region() will skip over the CH 2738 // regions. However, there is a subtle race between a CM thread 2739 // executing this method and a mutator thread doing a humongous 2740 // object allocation. The two are not mutually exclusive as the CM 2741 // thread does not need to hold the Heap_lock when it gets 2742 // here. So there is a chance that claim_region() will come across 2743 // a free region that's in the progress of becoming a SH or a CH 2744 // region. In the former case, it will either 2745 // a) Miss the update to the region's end, in which case it will 2746 // visit every subsequent CH region, will find their bitmaps 2747 // empty, and do nothing, or 2748 // b) Will observe the update of the region's end (in which case 2749 // it will skip the subsequent CH regions). 2750 // If it comes across a region that suddenly becomes CH, the 2751 // scenario will be similar to b). So, the race between 2752 // claim_region() and a humongous object allocation might force us 2753 // to do a bit of unnecessary work (due to some unnecessary bitmap 2754 // iterations) but it should not introduce and correctness issues. 2755 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2756 HeapWord* bottom = curr_region->bottom(); 2757 HeapWord* end = curr_region->end(); 2758 HeapWord* limit = curr_region->next_top_at_mark_start(); 2759 2760 if (verbose_low()) { 2761 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2762 "["PTR_FORMAT", "PTR_FORMAT"), " 2763 "limit = "PTR_FORMAT, 2764 task_num, curr_region, bottom, end, limit); 2765 } 2766 2767 // Is the gap between reading the finger and doing the CAS too long? 2768 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2769 if (res == finger) { 2770 // we succeeded 2771 2772 // notice that _finger == end cannot be guaranteed here since, 2773 // someone else might have moved the finger even further 2774 assert(_finger >= end, "the finger should have moved forward"); 2775 2776 if (verbose_low()) { 2777 gclog_or_tty->print_cr("[%d] we were successful with region = " 2778 PTR_FORMAT, task_num, curr_region); 2779 } 2780 2781 if (limit > bottom) { 2782 if (verbose_low()) { 2783 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2784 "returning it ", task_num, curr_region); 2785 } 2786 return curr_region; 2787 } else { 2788 assert(limit == bottom, 2789 "the region limit should be at bottom"); 2790 if (verbose_low()) { 2791 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2792 "returning NULL", task_num, curr_region); 2793 } 2794 // we return NULL and the caller should try calling 2795 // claim_region() again. 2796 return NULL; 2797 } 2798 } else { 2799 assert(_finger > finger, "the finger should have moved forward"); 2800 if (verbose_low()) { 2801 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2802 "global finger = "PTR_FORMAT", " 2803 "our finger = "PTR_FORMAT, 2804 task_num, _finger, finger); 2805 } 2806 2807 // read it again 2808 finger = _finger; 2809 } 2810 } 2811 2812 return NULL; 2813 } 2814 2815 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2816 bool result = false; 2817 for (int i = 0; i < (int)_max_task_num; ++i) { 2818 CMTask* the_task = _tasks[i]; 2819 MemRegion mr = the_task->aborted_region(); 2820 if (mr.start() != NULL) { 2821 assert(mr.end() != NULL, "invariant"); 2822 assert(mr.word_size() > 0, "invariant"); 2823 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2824 assert(hr != NULL, "invariant"); 2825 if (hr->in_collection_set()) { 2826 // The region points into the collection set 2827 the_task->set_aborted_region(MemRegion()); 2828 result = true; 2829 } 2830 } 2831 } 2832 return result; 2833 } 2834 2835 bool ConcurrentMark::has_aborted_regions() { 2836 for (int i = 0; i < (int)_max_task_num; ++i) { 2837 CMTask* the_task = _tasks[i]; 2838 MemRegion mr = the_task->aborted_region(); 2839 if (mr.start() != NULL) { 2840 assert(mr.end() != NULL, "invariant"); 2841 assert(mr.word_size() > 0, "invariant"); 2842 return true; 2843 } 2844 } 2845 return false; 2846 } 2847 2848 void ConcurrentMark::oops_do(OopClosure* cl) { 2849 if (_markStack.size() > 0 && verbose_low()) { 2850 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2851 "size = %d", _markStack.size()); 2852 } 2853 // we first iterate over the contents of the mark stack... 2854 _markStack.oops_do(cl); 2855 2856 for (int i = 0; i < (int)_max_task_num; ++i) { 2857 OopTaskQueue* queue = _task_queues->queue((int)i); 2858 2859 if (queue->size() > 0 && verbose_low()) { 2860 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2861 "size = %d", i, queue->size()); 2862 } 2863 2864 // ...then over the contents of the all the task queues. 2865 queue->oops_do(cl); 2866 } 2867 2868 // Invalidate any entries, that are in the region stack, that 2869 // point into the collection set 2870 if (_regionStack.invalidate_entries_into_cset()) { 2871 // otherwise, any gray objects copied during the evacuation pause 2872 // might not be visited. 2873 assert(_should_gray_objects, "invariant"); 2874 } 2875 2876 // Invalidate any aborted regions, recorded in the individual CM 2877 // tasks, that point into the collection set. 2878 if (invalidate_aborted_regions_in_cset()) { 2879 // otherwise, any gray objects copied during the evacuation pause 2880 // might not be visited. 2881 assert(_should_gray_objects, "invariant"); 2882 } 2883 2884 } 2885 2886 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2887 _markStack.setEmpty(); 2888 _markStack.clear_overflow(); 2889 _regionStack.setEmpty(); 2890 _regionStack.clear_overflow(); 2891 if (clear_overflow) { 2892 clear_has_overflown(); 2893 } else { 2894 assert(has_overflown(), "pre-condition"); 2895 } 2896 _finger = _heap_start; 2897 2898 for (int i = 0; i < (int)_max_task_num; ++i) { 2899 OopTaskQueue* queue = _task_queues->queue(i); 2900 queue->set_empty(); 2901 // Clear any partial regions from the CMTasks 2902 _tasks[i]->clear_aborted_region(); 2903 } 2904 } 2905 2906 void ConcurrentMark::print_stats() { 2907 if (verbose_stats()) { 2908 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2909 for (size_t i = 0; i < _active_tasks; ++i) { 2910 _tasks[i]->print_stats(); 2911 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2912 } 2913 } 2914 } 2915 2916 class CSMarkOopClosure: public OopClosure { 2917 friend class CSMarkBitMapClosure; 2918 2919 G1CollectedHeap* _g1h; 2920 CMBitMap* _bm; 2921 ConcurrentMark* _cm; 2922 oop* _ms; 2923 jint* _array_ind_stack; 2924 int _ms_size; 2925 int _ms_ind; 2926 int _array_increment; 2927 2928 bool push(oop obj, int arr_ind = 0) { 2929 if (_ms_ind == _ms_size) { 2930 gclog_or_tty->print_cr("Mark stack is full."); 2931 return false; 2932 } 2933 _ms[_ms_ind] = obj; 2934 if (obj->is_objArray()) { 2935 _array_ind_stack[_ms_ind] = arr_ind; 2936 } 2937 _ms_ind++; 2938 return true; 2939 } 2940 2941 oop pop() { 2942 if (_ms_ind == 0) { 2943 return NULL; 2944 } else { 2945 _ms_ind--; 2946 return _ms[_ms_ind]; 2947 } 2948 } 2949 2950 template <class T> bool drain() { 2951 while (_ms_ind > 0) { 2952 oop obj = pop(); 2953 assert(obj != NULL, "Since index was non-zero."); 2954 if (obj->is_objArray()) { 2955 jint arr_ind = _array_ind_stack[_ms_ind]; 2956 objArrayOop aobj = objArrayOop(obj); 2957 jint len = aobj->length(); 2958 jint next_arr_ind = arr_ind + _array_increment; 2959 if (next_arr_ind < len) { 2960 push(obj, next_arr_ind); 2961 } 2962 // Now process this portion of this one. 2963 int lim = MIN2(next_arr_ind, len); 2964 for (int j = arr_ind; j < lim; j++) { 2965 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2966 } 2967 2968 } else { 2969 obj->oop_iterate(this); 2970 } 2971 if (abort()) return false; 2972 } 2973 return true; 2974 } 2975 2976 public: 2977 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2978 _g1h(G1CollectedHeap::heap()), 2979 _cm(cm), 2980 _bm(cm->nextMarkBitMap()), 2981 _ms_size(ms_size), _ms_ind(0), 2982 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2983 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2984 _array_increment(MAX2(ms_size/8, 16)) 2985 {} 2986 2987 ~CSMarkOopClosure() { 2988 FREE_C_HEAP_ARRAY(oop, _ms); 2989 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2990 } 2991 2992 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2993 virtual void do_oop( oop* p) { do_oop_work(p); } 2994 2995 template <class T> void do_oop_work(T* p) { 2996 T heap_oop = oopDesc::load_heap_oop(p); 2997 if (oopDesc::is_null(heap_oop)) return; 2998 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2999 if (obj->is_forwarded()) { 3000 // If the object has already been forwarded, we have to make sure 3001 // that it's marked. So follow the forwarding pointer. Note that 3002 // this does the right thing for self-forwarding pointers in the 3003 // evacuation failure case. 3004 obj = obj->forwardee(); 3005 } 3006 HeapRegion* hr = _g1h->heap_region_containing(obj); 3007 if (hr != NULL) { 3008 if (hr->in_collection_set()) { 3009 if (_g1h->is_obj_ill(obj)) { 3010 _bm->mark((HeapWord*)obj); 3011 if (!push(obj)) { 3012 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 3013 set_abort(); 3014 } 3015 } 3016 } else { 3017 // Outside the collection set; we need to gray it 3018 _cm->deal_with_reference(obj); 3019 } 3020 } 3021 } 3022 }; 3023 3024 class CSMarkBitMapClosure: public BitMapClosure { 3025 G1CollectedHeap* _g1h; 3026 CMBitMap* _bitMap; 3027 ConcurrentMark* _cm; 3028 CSMarkOopClosure _oop_cl; 3029 public: 3030 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 3031 _g1h(G1CollectedHeap::heap()), 3032 _bitMap(cm->nextMarkBitMap()), 3033 _oop_cl(cm, ms_size) 3034 {} 3035 3036 ~CSMarkBitMapClosure() {} 3037 3038 bool do_bit(size_t offset) { 3039 // convert offset into a HeapWord* 3040 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 3041 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 3042 "address out of range"); 3043 assert(_bitMap->isMarked(addr), "tautology"); 3044 oop obj = oop(addr); 3045 if (!obj->is_forwarded()) { 3046 if (!_oop_cl.push(obj)) return false; 3047 if (UseCompressedOops) { 3048 if (!_oop_cl.drain<narrowOop>()) return false; 3049 } else { 3050 if (!_oop_cl.drain<oop>()) return false; 3051 } 3052 } 3053 // Otherwise... 3054 return true; 3055 } 3056 }; 3057 3058 3059 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 3060 CMBitMap* _bm; 3061 CSMarkBitMapClosure _bit_cl; 3062 enum SomePrivateConstants { 3063 MSSize = 1000 3064 }; 3065 bool _completed; 3066 public: 3067 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 3068 _bm(cm->nextMarkBitMap()), 3069 _bit_cl(cm, MSSize), 3070 _completed(true) 3071 {} 3072 3073 ~CompleteMarkingInCSHRClosure() {} 3074 3075 bool doHeapRegion(HeapRegion* r) { 3076 if (!r->evacuation_failed()) { 3077 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 3078 if (!mr.is_empty()) { 3079 if (!_bm->iterate(&_bit_cl, mr)) { 3080 _completed = false; 3081 return true; 3082 } 3083 } 3084 } 3085 return false; 3086 } 3087 3088 bool completed() { return _completed; } 3089 }; 3090 3091 class ClearMarksInHRClosure: public HeapRegionClosure { 3092 CMBitMap* _bm; 3093 public: 3094 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 3095 3096 bool doHeapRegion(HeapRegion* r) { 3097 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 3098 MemRegion usedMR = r->used_region(); 3099 _bm->clearRange(r->used_region()); 3100 } 3101 return false; 3102 } 3103 }; 3104 3105 void ConcurrentMark::complete_marking_in_collection_set() { 3106 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3107 3108 if (!g1h->mark_in_progress()) { 3109 g1h->g1_policy()->record_mark_closure_time(0.0); 3110 return; 3111 } 3112 3113 int i = 1; 3114 double start = os::elapsedTime(); 3115 while (true) { 3116 i++; 3117 CompleteMarkingInCSHRClosure cmplt(this); 3118 g1h->collection_set_iterate(&cmplt); 3119 if (cmplt.completed()) break; 3120 } 3121 double end_time = os::elapsedTime(); 3122 double elapsed_time_ms = (end_time - start) * 1000.0; 3123 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 3124 3125 ClearMarksInHRClosure clr(nextMarkBitMap()); 3126 g1h->collection_set_iterate(&clr); 3127 } 3128 3129 // The next two methods deal with the following optimisation. Some 3130 // objects are gray by being marked and located above the finger. If 3131 // they are copied, during an evacuation pause, below the finger then 3132 // the need to be pushed on the stack. The observation is that, if 3133 // there are no regions in the collection set located above the 3134 // finger, then the above cannot happen, hence we do not need to 3135 // explicitly gray any objects when copying them to below the 3136 // finger. The global stack will be scanned to ensure that, if it 3137 // points to objects being copied, it will update their 3138 // location. There is a tricky situation with the gray objects in 3139 // region stack that are being coped, however. See the comment in 3140 // newCSet(). 3141 3142 void ConcurrentMark::newCSet() { 3143 if (!concurrent_marking_in_progress()) { 3144 // nothing to do if marking is not in progress 3145 return; 3146 } 3147 3148 // find what the lowest finger is among the global and local fingers 3149 _min_finger = _finger; 3150 for (int i = 0; i < (int)_max_task_num; ++i) { 3151 CMTask* task = _tasks[i]; 3152 HeapWord* task_finger = task->finger(); 3153 if (task_finger != NULL && task_finger < _min_finger) { 3154 _min_finger = task_finger; 3155 } 3156 } 3157 3158 _should_gray_objects = false; 3159 3160 // This fixes a very subtle and fustrating bug. It might be the case 3161 // that, during en evacuation pause, heap regions that contain 3162 // objects that are gray (by being in regions contained in the 3163 // region stack) are included in the collection set. Since such gray 3164 // objects will be moved, and because it's not easy to redirect 3165 // region stack entries to point to a new location (because objects 3166 // in one region might be scattered to multiple regions after they 3167 // are copied), one option is to ensure that all marked objects 3168 // copied during a pause are pushed on the stack. Notice, however, 3169 // that this problem can only happen when the region stack is not 3170 // empty during an evacuation pause. So, we make the fix a bit less 3171 // conservative and ensure that regions are pushed on the stack, 3172 // irrespective whether all collection set regions are below the 3173 // finger, if the region stack is not empty. This is expected to be 3174 // a rare case, so I don't think it's necessary to be smarted about it. 3175 if (!region_stack_empty() || has_aborted_regions()) { 3176 _should_gray_objects = true; 3177 } 3178 } 3179 3180 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3181 if (!concurrent_marking_in_progress()) return; 3182 3183 HeapWord* region_end = hr->end(); 3184 if (region_end > _min_finger) { 3185 _should_gray_objects = true; 3186 } 3187 } 3188 3189 // Resets the region fields of active CMTasks whose values point 3190 // into the collection set. 3191 void ConcurrentMark::reset_active_task_region_fields_in_cset() { 3192 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW"); 3193 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 3194 3195 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) { 3196 CMTask* task = _tasks[i]; 3197 HeapWord* task_finger = task->finger(); 3198 if (task_finger != NULL) { 3199 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap"); 3200 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger); 3201 if (finger_region->in_collection_set()) { 3202 // The task's current region is in the collection set. 3203 // This region will be evacuated in the current GC and 3204 // the region fields in the task will be stale. 3205 task->giveup_current_region(); 3206 } 3207 } 3208 } 3209 } 3210 3211 // abandon current marking iteration due to a Full GC 3212 void ConcurrentMark::abort() { 3213 // Clear all marks to force marking thread to do nothing 3214 _nextMarkBitMap->clearAll(); 3215 // Empty mark stack 3216 clear_marking_state(); 3217 for (int i = 0; i < (int)_max_task_num; ++i) { 3218 _tasks[i]->clear_region_fields(); 3219 } 3220 _has_aborted = true; 3221 3222 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3223 satb_mq_set.abandon_partial_marking(); 3224 // This can be called either during or outside marking, we'll read 3225 // the expected_active value from the SATB queue set. 3226 satb_mq_set.set_active_all_threads( 3227 false, /* new active value */ 3228 satb_mq_set.is_active() /* expected_active */); 3229 } 3230 3231 static void print_ms_time_info(const char* prefix, const char* name, 3232 NumberSeq& ns) { 3233 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3234 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3235 if (ns.num() > 0) { 3236 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3237 prefix, ns.sd(), ns.maximum()); 3238 } 3239 } 3240 3241 void ConcurrentMark::print_summary_info() { 3242 gclog_or_tty->print_cr(" Concurrent marking:"); 3243 print_ms_time_info(" ", "init marks", _init_times); 3244 print_ms_time_info(" ", "remarks", _remark_times); 3245 { 3246 print_ms_time_info(" ", "final marks", _remark_mark_times); 3247 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3248 3249 } 3250 print_ms_time_info(" ", "cleanups", _cleanup_times); 3251 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3252 _total_counting_time, 3253 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3254 (double)_cleanup_times.num() 3255 : 0.0)); 3256 if (G1ScrubRemSets) { 3257 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3258 _total_rs_scrub_time, 3259 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3260 (double)_cleanup_times.num() 3261 : 0.0)); 3262 } 3263 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3264 (_init_times.sum() + _remark_times.sum() + 3265 _cleanup_times.sum())/1000.0); 3266 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3267 "(%8.2f s marking, %8.2f s counting).", 3268 cmThread()->vtime_accum(), 3269 cmThread()->vtime_mark_accum(), 3270 cmThread()->vtime_count_accum()); 3271 } 3272 3273 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3274 _parallel_workers->print_worker_threads_on(st); 3275 } 3276 3277 // Closures 3278 // XXX: there seems to be a lot of code duplication here; 3279 // should refactor and consolidate the shared code. 3280 3281 // This closure is used to mark refs into the CMS generation in 3282 // the CMS bit map. Called at the first checkpoint. 3283 3284 // We take a break if someone is trying to stop the world. 3285 bool ConcurrentMark::do_yield_check(int worker_i) { 3286 if (should_yield()) { 3287 if (worker_i == 0) { 3288 _g1h->g1_policy()->record_concurrent_pause(); 3289 } 3290 cmThread()->yield(); 3291 if (worker_i == 0) { 3292 _g1h->g1_policy()->record_concurrent_pause_end(); 3293 } 3294 return true; 3295 } else { 3296 return false; 3297 } 3298 } 3299 3300 bool ConcurrentMark::should_yield() { 3301 return cmThread()->should_yield(); 3302 } 3303 3304 bool ConcurrentMark::containing_card_is_marked(void* p) { 3305 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3306 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3307 } 3308 3309 bool ConcurrentMark::containing_cards_are_marked(void* start, 3310 void* last) { 3311 return containing_card_is_marked(start) && 3312 containing_card_is_marked(last); 3313 } 3314 3315 #ifndef PRODUCT 3316 // for debugging purposes 3317 void ConcurrentMark::print_finger() { 3318 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3319 _heap_start, _heap_end, _finger); 3320 for (int i = 0; i < (int) _max_task_num; ++i) { 3321 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3322 } 3323 gclog_or_tty->print_cr(""); 3324 } 3325 #endif 3326 3327 void CMTask::scan_object(oop obj) { 3328 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3329 3330 if (_cm->verbose_high()) { 3331 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3332 _task_id, (void*) obj); 3333 } 3334 3335 size_t obj_size = obj->size(); 3336 _words_scanned += obj_size; 3337 3338 obj->oop_iterate(_cm_oop_closure); 3339 statsOnly( ++_objs_scanned ); 3340 check_limits(); 3341 } 3342 3343 // Closure for iteration over bitmaps 3344 class CMBitMapClosure : public BitMapClosure { 3345 private: 3346 // the bitmap that is being iterated over 3347 CMBitMap* _nextMarkBitMap; 3348 ConcurrentMark* _cm; 3349 CMTask* _task; 3350 // true if we're scanning a heap region claimed by the task (so that 3351 // we move the finger along), false if we're not, i.e. currently when 3352 // scanning a heap region popped from the region stack (so that we 3353 // do not move the task finger along; it'd be a mistake if we did so). 3354 bool _scanning_heap_region; 3355 3356 public: 3357 CMBitMapClosure(CMTask *task, 3358 ConcurrentMark* cm, 3359 CMBitMap* nextMarkBitMap) 3360 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3361 3362 void set_scanning_heap_region(bool scanning_heap_region) { 3363 _scanning_heap_region = scanning_heap_region; 3364 } 3365 3366 bool do_bit(size_t offset) { 3367 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3368 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3369 assert( addr < _cm->finger(), "invariant"); 3370 3371 if (_scanning_heap_region) { 3372 statsOnly( _task->increase_objs_found_on_bitmap() ); 3373 assert(addr >= _task->finger(), "invariant"); 3374 // We move that task's local finger along. 3375 _task->move_finger_to(addr); 3376 } else { 3377 // We move the task's region finger along. 3378 _task->move_region_finger_to(addr); 3379 } 3380 3381 _task->scan_object(oop(addr)); 3382 // we only partially drain the local queue and global stack 3383 _task->drain_local_queue(true); 3384 _task->drain_global_stack(true); 3385 3386 // if the has_aborted flag has been raised, we need to bail out of 3387 // the iteration 3388 return !_task->has_aborted(); 3389 } 3390 }; 3391 3392 // Closure for iterating over objects, currently only used for 3393 // processing SATB buffers. 3394 class CMObjectClosure : public ObjectClosure { 3395 private: 3396 CMTask* _task; 3397 3398 public: 3399 void do_object(oop obj) { 3400 _task->deal_with_reference(obj); 3401 } 3402 3403 CMObjectClosure(CMTask* task) : _task(task) { } 3404 }; 3405 3406 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3407 ConcurrentMark* cm, 3408 CMTask* task) 3409 : _g1h(g1h), _cm(cm), _task(task) { 3410 assert(_ref_processor == NULL, "should be initialized to NULL"); 3411 3412 if (G1UseConcMarkReferenceProcessing) { 3413 _ref_processor = g1h->ref_processor_cm(); 3414 assert(_ref_processor != NULL, "should not be NULL"); 3415 } 3416 } 3417 3418 void CMTask::setup_for_region(HeapRegion* hr) { 3419 // Separated the asserts so that we know which one fires. 3420 assert(hr != NULL, 3421 "claim_region() should have filtered out continues humongous regions"); 3422 assert(!hr->continuesHumongous(), 3423 "claim_region() should have filtered out continues humongous regions"); 3424 3425 if (_cm->verbose_low()) { 3426 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3427 _task_id, hr); 3428 } 3429 3430 _curr_region = hr; 3431 _finger = hr->bottom(); 3432 update_region_limit(); 3433 } 3434 3435 void CMTask::update_region_limit() { 3436 HeapRegion* hr = _curr_region; 3437 HeapWord* bottom = hr->bottom(); 3438 HeapWord* limit = hr->next_top_at_mark_start(); 3439 3440 if (limit == bottom) { 3441 if (_cm->verbose_low()) { 3442 gclog_or_tty->print_cr("[%d] found an empty region " 3443 "["PTR_FORMAT", "PTR_FORMAT")", 3444 _task_id, bottom, limit); 3445 } 3446 // The region was collected underneath our feet. 3447 // We set the finger to bottom to ensure that the bitmap 3448 // iteration that will follow this will not do anything. 3449 // (this is not a condition that holds when we set the region up, 3450 // as the region is not supposed to be empty in the first place) 3451 _finger = bottom; 3452 } else if (limit >= _region_limit) { 3453 assert(limit >= _finger, "peace of mind"); 3454 } else { 3455 assert(limit < _region_limit, "only way to get here"); 3456 // This can happen under some pretty unusual circumstances. An 3457 // evacuation pause empties the region underneath our feet (NTAMS 3458 // at bottom). We then do some allocation in the region (NTAMS 3459 // stays at bottom), followed by the region being used as a GC 3460 // alloc region (NTAMS will move to top() and the objects 3461 // originally below it will be grayed). All objects now marked in 3462 // the region are explicitly grayed, if below the global finger, 3463 // and we do not need in fact to scan anything else. So, we simply 3464 // set _finger to be limit to ensure that the bitmap iteration 3465 // doesn't do anything. 3466 _finger = limit; 3467 } 3468 3469 _region_limit = limit; 3470 } 3471 3472 void CMTask::giveup_current_region() { 3473 assert(_curr_region != NULL, "invariant"); 3474 if (_cm->verbose_low()) { 3475 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3476 _task_id, _curr_region); 3477 } 3478 clear_region_fields(); 3479 } 3480 3481 void CMTask::clear_region_fields() { 3482 // Values for these three fields that indicate that we're not 3483 // holding on to a region. 3484 _curr_region = NULL; 3485 _finger = NULL; 3486 _region_limit = NULL; 3487 3488 _region_finger = NULL; 3489 } 3490 3491 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3492 if (cm_oop_closure == NULL) { 3493 assert(_cm_oop_closure != NULL, "invariant"); 3494 } else { 3495 assert(_cm_oop_closure == NULL, "invariant"); 3496 } 3497 _cm_oop_closure = cm_oop_closure; 3498 } 3499 3500 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3501 guarantee(nextMarkBitMap != NULL, "invariant"); 3502 3503 if (_cm->verbose_low()) { 3504 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3505 } 3506 3507 _nextMarkBitMap = nextMarkBitMap; 3508 clear_region_fields(); 3509 assert(_aborted_region.is_empty(), "should have been cleared"); 3510 3511 _calls = 0; 3512 _elapsed_time_ms = 0.0; 3513 _termination_time_ms = 0.0; 3514 _termination_start_time_ms = 0.0; 3515 3516 #if _MARKING_STATS_ 3517 _local_pushes = 0; 3518 _local_pops = 0; 3519 _local_max_size = 0; 3520 _objs_scanned = 0; 3521 _global_pushes = 0; 3522 _global_pops = 0; 3523 _global_max_size = 0; 3524 _global_transfers_to = 0; 3525 _global_transfers_from = 0; 3526 _region_stack_pops = 0; 3527 _regions_claimed = 0; 3528 _objs_found_on_bitmap = 0; 3529 _satb_buffers_processed = 0; 3530 _steal_attempts = 0; 3531 _steals = 0; 3532 _aborted = 0; 3533 _aborted_overflow = 0; 3534 _aborted_cm_aborted = 0; 3535 _aborted_yield = 0; 3536 _aborted_timed_out = 0; 3537 _aborted_satb = 0; 3538 _aborted_termination = 0; 3539 #endif // _MARKING_STATS_ 3540 } 3541 3542 bool CMTask::should_exit_termination() { 3543 regular_clock_call(); 3544 // This is called when we are in the termination protocol. We should 3545 // quit if, for some reason, this task wants to abort or the global 3546 // stack is not empty (this means that we can get work from it). 3547 return !_cm->mark_stack_empty() || has_aborted(); 3548 } 3549 3550 void CMTask::reached_limit() { 3551 assert(_words_scanned >= _words_scanned_limit || 3552 _refs_reached >= _refs_reached_limit , 3553 "shouldn't have been called otherwise"); 3554 regular_clock_call(); 3555 } 3556 3557 void CMTask::regular_clock_call() { 3558 if (has_aborted()) return; 3559 3560 // First, we need to recalculate the words scanned and refs reached 3561 // limits for the next clock call. 3562 recalculate_limits(); 3563 3564 // During the regular clock call we do the following 3565 3566 // (1) If an overflow has been flagged, then we abort. 3567 if (_cm->has_overflown()) { 3568 set_has_aborted(); 3569 return; 3570 } 3571 3572 // If we are not concurrent (i.e. we're doing remark) we don't need 3573 // to check anything else. The other steps are only needed during 3574 // the concurrent marking phase. 3575 if (!concurrent()) return; 3576 3577 // (2) If marking has been aborted for Full GC, then we also abort. 3578 if (_cm->has_aborted()) { 3579 set_has_aborted(); 3580 statsOnly( ++_aborted_cm_aborted ); 3581 return; 3582 } 3583 3584 double curr_time_ms = os::elapsedVTime() * 1000.0; 3585 3586 // (3) If marking stats are enabled, then we update the step history. 3587 #if _MARKING_STATS_ 3588 if (_words_scanned >= _words_scanned_limit) { 3589 ++_clock_due_to_scanning; 3590 } 3591 if (_refs_reached >= _refs_reached_limit) { 3592 ++_clock_due_to_marking; 3593 } 3594 3595 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3596 _interval_start_time_ms = curr_time_ms; 3597 _all_clock_intervals_ms.add(last_interval_ms); 3598 3599 if (_cm->verbose_medium()) { 3600 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3601 "scanned = %d%s, refs reached = %d%s", 3602 _task_id, last_interval_ms, 3603 _words_scanned, 3604 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3605 _refs_reached, 3606 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3607 } 3608 #endif // _MARKING_STATS_ 3609 3610 // (4) We check whether we should yield. If we have to, then we abort. 3611 if (_cm->should_yield()) { 3612 // We should yield. To do this we abort the task. The caller is 3613 // responsible for yielding. 3614 set_has_aborted(); 3615 statsOnly( ++_aborted_yield ); 3616 return; 3617 } 3618 3619 // (5) We check whether we've reached our time quota. If we have, 3620 // then we abort. 3621 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3622 if (elapsed_time_ms > _time_target_ms) { 3623 set_has_aborted(); 3624 _has_timed_out = true; 3625 statsOnly( ++_aborted_timed_out ); 3626 return; 3627 } 3628 3629 // (6) Finally, we check whether there are enough completed STAB 3630 // buffers available for processing. If there are, we abort. 3631 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3632 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3633 if (_cm->verbose_low()) { 3634 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3635 _task_id); 3636 } 3637 // we do need to process SATB buffers, we'll abort and restart 3638 // the marking task to do so 3639 set_has_aborted(); 3640 statsOnly( ++_aborted_satb ); 3641 return; 3642 } 3643 } 3644 3645 void CMTask::recalculate_limits() { 3646 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3647 _words_scanned_limit = _real_words_scanned_limit; 3648 3649 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3650 _refs_reached_limit = _real_refs_reached_limit; 3651 } 3652 3653 void CMTask::decrease_limits() { 3654 // This is called when we believe that we're going to do an infrequent 3655 // operation which will increase the per byte scanned cost (i.e. move 3656 // entries to/from the global stack). It basically tries to decrease the 3657 // scanning limit so that the clock is called earlier. 3658 3659 if (_cm->verbose_medium()) { 3660 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3661 } 3662 3663 _words_scanned_limit = _real_words_scanned_limit - 3664 3 * words_scanned_period / 4; 3665 _refs_reached_limit = _real_refs_reached_limit - 3666 3 * refs_reached_period / 4; 3667 } 3668 3669 void CMTask::move_entries_to_global_stack() { 3670 // local array where we'll store the entries that will be popped 3671 // from the local queue 3672 oop buffer[global_stack_transfer_size]; 3673 3674 int n = 0; 3675 oop obj; 3676 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3677 buffer[n] = obj; 3678 ++n; 3679 } 3680 3681 if (n > 0) { 3682 // we popped at least one entry from the local queue 3683 3684 statsOnly( ++_global_transfers_to; _local_pops += n ); 3685 3686 if (!_cm->mark_stack_push(buffer, n)) { 3687 if (_cm->verbose_low()) { 3688 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3689 _task_id); 3690 } 3691 set_has_aborted(); 3692 } else { 3693 // the transfer was successful 3694 3695 if (_cm->verbose_medium()) { 3696 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3697 _task_id, n); 3698 } 3699 statsOnly( int tmp_size = _cm->mark_stack_size(); 3700 if (tmp_size > _global_max_size) { 3701 _global_max_size = tmp_size; 3702 } 3703 _global_pushes += n ); 3704 } 3705 } 3706 3707 // this operation was quite expensive, so decrease the limits 3708 decrease_limits(); 3709 } 3710 3711 void CMTask::get_entries_from_global_stack() { 3712 // local array where we'll store the entries that will be popped 3713 // from the global stack. 3714 oop buffer[global_stack_transfer_size]; 3715 int n; 3716 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3717 assert(n <= global_stack_transfer_size, 3718 "we should not pop more than the given limit"); 3719 if (n > 0) { 3720 // yes, we did actually pop at least one entry 3721 3722 statsOnly( ++_global_transfers_from; _global_pops += n ); 3723 if (_cm->verbose_medium()) { 3724 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3725 _task_id, n); 3726 } 3727 for (int i = 0; i < n; ++i) { 3728 bool success = _task_queue->push(buffer[i]); 3729 // We only call this when the local queue is empty or under a 3730 // given target limit. So, we do not expect this push to fail. 3731 assert(success, "invariant"); 3732 } 3733 3734 statsOnly( int tmp_size = _task_queue->size(); 3735 if (tmp_size > _local_max_size) { 3736 _local_max_size = tmp_size; 3737 } 3738 _local_pushes += n ); 3739 } 3740 3741 // this operation was quite expensive, so decrease the limits 3742 decrease_limits(); 3743 } 3744 3745 void CMTask::drain_local_queue(bool partially) { 3746 if (has_aborted()) return; 3747 3748 // Decide what the target size is, depending whether we're going to 3749 // drain it partially (so that other tasks can steal if they run out 3750 // of things to do) or totally (at the very end). 3751 size_t target_size; 3752 if (partially) { 3753 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3754 } else { 3755 target_size = 0; 3756 } 3757 3758 if (_task_queue->size() > target_size) { 3759 if (_cm->verbose_high()) { 3760 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3761 _task_id, target_size); 3762 } 3763 3764 oop obj; 3765 bool ret = _task_queue->pop_local(obj); 3766 while (ret) { 3767 statsOnly( ++_local_pops ); 3768 3769 if (_cm->verbose_high()) { 3770 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3771 (void*) obj); 3772 } 3773 3774 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3775 assert(!_g1h->is_on_master_free_list( 3776 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3777 3778 scan_object(obj); 3779 3780 if (_task_queue->size() <= target_size || has_aborted()) { 3781 ret = false; 3782 } else { 3783 ret = _task_queue->pop_local(obj); 3784 } 3785 } 3786 3787 if (_cm->verbose_high()) { 3788 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3789 _task_id, _task_queue->size()); 3790 } 3791 } 3792 } 3793 3794 void CMTask::drain_global_stack(bool partially) { 3795 if (has_aborted()) return; 3796 3797 // We have a policy to drain the local queue before we attempt to 3798 // drain the global stack. 3799 assert(partially || _task_queue->size() == 0, "invariant"); 3800 3801 // Decide what the target size is, depending whether we're going to 3802 // drain it partially (so that other tasks can steal if they run out 3803 // of things to do) or totally (at the very end). Notice that, 3804 // because we move entries from the global stack in chunks or 3805 // because another task might be doing the same, we might in fact 3806 // drop below the target. But, this is not a problem. 3807 size_t target_size; 3808 if (partially) { 3809 target_size = _cm->partial_mark_stack_size_target(); 3810 } else { 3811 target_size = 0; 3812 } 3813 3814 if (_cm->mark_stack_size() > target_size) { 3815 if (_cm->verbose_low()) { 3816 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3817 _task_id, target_size); 3818 } 3819 3820 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3821 get_entries_from_global_stack(); 3822 drain_local_queue(partially); 3823 } 3824 3825 if (_cm->verbose_low()) { 3826 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3827 _task_id, _cm->mark_stack_size()); 3828 } 3829 } 3830 } 3831 3832 // SATB Queue has several assumptions on whether to call the par or 3833 // non-par versions of the methods. this is why some of the code is 3834 // replicated. We should really get rid of the single-threaded version 3835 // of the code to simplify things. 3836 void CMTask::drain_satb_buffers() { 3837 if (has_aborted()) return; 3838 3839 // We set this so that the regular clock knows that we're in the 3840 // middle of draining buffers and doesn't set the abort flag when it 3841 // notices that SATB buffers are available for draining. It'd be 3842 // very counter productive if it did that. :-) 3843 _draining_satb_buffers = true; 3844 3845 CMObjectClosure oc(this); 3846 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3847 if (G1CollectedHeap::use_parallel_gc_threads()) { 3848 satb_mq_set.set_par_closure(_task_id, &oc); 3849 } else { 3850 satb_mq_set.set_closure(&oc); 3851 } 3852 3853 // This keeps claiming and applying the closure to completed buffers 3854 // until we run out of buffers or we need to abort. 3855 if (G1CollectedHeap::use_parallel_gc_threads()) { 3856 while (!has_aborted() && 3857 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3858 if (_cm->verbose_medium()) { 3859 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3860 } 3861 statsOnly( ++_satb_buffers_processed ); 3862 regular_clock_call(); 3863 } 3864 } else { 3865 while (!has_aborted() && 3866 satb_mq_set.apply_closure_to_completed_buffer()) { 3867 if (_cm->verbose_medium()) { 3868 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3869 } 3870 statsOnly( ++_satb_buffers_processed ); 3871 regular_clock_call(); 3872 } 3873 } 3874 3875 if (!concurrent() && !has_aborted()) { 3876 // We should only do this during remark. 3877 if (G1CollectedHeap::use_parallel_gc_threads()) { 3878 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3879 } else { 3880 satb_mq_set.iterate_closure_all_threads(); 3881 } 3882 } 3883 3884 _draining_satb_buffers = false; 3885 3886 assert(has_aborted() || 3887 concurrent() || 3888 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3889 3890 if (G1CollectedHeap::use_parallel_gc_threads()) { 3891 satb_mq_set.set_par_closure(_task_id, NULL); 3892 } else { 3893 satb_mq_set.set_closure(NULL); 3894 } 3895 3896 // again, this was a potentially expensive operation, decrease the 3897 // limits to get the regular clock call early 3898 decrease_limits(); 3899 } 3900 3901 void CMTask::drain_region_stack(BitMapClosure* bc) { 3902 if (has_aborted()) return; 3903 3904 assert(_region_finger == NULL, 3905 "it should be NULL when we're not scanning a region"); 3906 3907 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3908 if (_cm->verbose_low()) { 3909 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3910 _task_id, _cm->region_stack_size()); 3911 } 3912 3913 MemRegion mr; 3914 3915 if (!_aborted_region.is_empty()) { 3916 mr = _aborted_region; 3917 _aborted_region = MemRegion(); 3918 3919 if (_cm->verbose_low()) { 3920 gclog_or_tty->print_cr("[%d] scanning aborted region " 3921 "[ " PTR_FORMAT ", " PTR_FORMAT " )", 3922 _task_id, mr.start(), mr.end()); 3923 } 3924 } else { 3925 mr = _cm->region_stack_pop_lock_free(); 3926 // it returns MemRegion() if the pop fails 3927 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3928 } 3929 3930 while (mr.start() != NULL) { 3931 if (_cm->verbose_medium()) { 3932 gclog_or_tty->print_cr("[%d] we are scanning region " 3933 "["PTR_FORMAT", "PTR_FORMAT")", 3934 _task_id, mr.start(), mr.end()); 3935 } 3936 3937 assert(mr.end() <= _cm->finger(), 3938 "otherwise the region shouldn't be on the stack"); 3939 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3940 if (_nextMarkBitMap->iterate(bc, mr)) { 3941 assert(!has_aborted(), 3942 "cannot abort the task without aborting the bitmap iteration"); 3943 3944 // We finished iterating over the region without aborting. 3945 regular_clock_call(); 3946 if (has_aborted()) { 3947 mr = MemRegion(); 3948 } else { 3949 mr = _cm->region_stack_pop_lock_free(); 3950 // it returns MemRegion() if the pop fails 3951 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3952 } 3953 } else { 3954 assert(has_aborted(), "currently the only way to do so"); 3955 3956 // The only way to abort the bitmap iteration is to return 3957 // false from the do_bit() method. However, inside the 3958 // do_bit() method we move the _region_finger to point to the 3959 // object currently being looked at. So, if we bail out, we 3960 // have definitely set _region_finger to something non-null. 3961 assert(_region_finger != NULL, "invariant"); 3962 3963 // Make sure that any previously aborted region has been 3964 // cleared. 3965 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3966 3967 // The iteration was actually aborted. So now _region_finger 3968 // points to the address of the object we last scanned. If we 3969 // leave it there, when we restart this task, we will rescan 3970 // the object. It is easy to avoid this. We move the finger by 3971 // enough to point to the next possible object header (the 3972 // bitmap knows by how much we need to move it as it knows its 3973 // granularity). 3974 MemRegion newRegion = 3975 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3976 3977 if (!newRegion.is_empty()) { 3978 if (_cm->verbose_low()) { 3979 gclog_or_tty->print_cr("[%d] recording unscanned region" 3980 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3981 _task_id, 3982 newRegion.start(), newRegion.end()); 3983 } 3984 // Now record the part of the region we didn't scan to 3985 // make sure this task scans it later. 3986 _aborted_region = newRegion; 3987 } 3988 // break from while 3989 mr = MemRegion(); 3990 } 3991 _region_finger = NULL; 3992 } 3993 3994 if (_cm->verbose_low()) { 3995 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3996 _task_id, _cm->region_stack_size()); 3997 } 3998 } 3999 } 4000 4001 void CMTask::print_stats() { 4002 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 4003 _task_id, _calls); 4004 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 4005 _elapsed_time_ms, _termination_time_ms); 4006 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4007 _step_times_ms.num(), _step_times_ms.avg(), 4008 _step_times_ms.sd()); 4009 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4010 _step_times_ms.maximum(), _step_times_ms.sum()); 4011 4012 #if _MARKING_STATS_ 4013 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4014 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4015 _all_clock_intervals_ms.sd()); 4016 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4017 _all_clock_intervals_ms.maximum(), 4018 _all_clock_intervals_ms.sum()); 4019 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4020 _clock_due_to_scanning, _clock_due_to_marking); 4021 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4022 _objs_scanned, _objs_found_on_bitmap); 4023 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4024 _local_pushes, _local_pops, _local_max_size); 4025 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4026 _global_pushes, _global_pops, _global_max_size); 4027 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4028 _global_transfers_to,_global_transfers_from); 4029 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 4030 _regions_claimed, _region_stack_pops); 4031 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4032 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4033 _steal_attempts, _steals); 4034 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4035 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4036 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4037 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4038 _aborted_timed_out, _aborted_satb, _aborted_termination); 4039 #endif // _MARKING_STATS_ 4040 } 4041 4042 /***************************************************************************** 4043 4044 The do_marking_step(time_target_ms) method is the building block 4045 of the parallel marking framework. It can be called in parallel 4046 with other invocations of do_marking_step() on different tasks 4047 (but only one per task, obviously) and concurrently with the 4048 mutator threads, or during remark, hence it eliminates the need 4049 for two versions of the code. When called during remark, it will 4050 pick up from where the task left off during the concurrent marking 4051 phase. Interestingly, tasks are also claimable during evacuation 4052 pauses too, since do_marking_step() ensures that it aborts before 4053 it needs to yield. 4054 4055 The data structures that is uses to do marking work are the 4056 following: 4057 4058 (1) Marking Bitmap. If there are gray objects that appear only 4059 on the bitmap (this happens either when dealing with an overflow 4060 or when the initial marking phase has simply marked the roots 4061 and didn't push them on the stack), then tasks claim heap 4062 regions whose bitmap they then scan to find gray objects. A 4063 global finger indicates where the end of the last claimed region 4064 is. A local finger indicates how far into the region a task has 4065 scanned. The two fingers are used to determine how to gray an 4066 object (i.e. whether simply marking it is OK, as it will be 4067 visited by a task in the future, or whether it needs to be also 4068 pushed on a stack). 4069 4070 (2) Local Queue. The local queue of the task which is accessed 4071 reasonably efficiently by the task. Other tasks can steal from 4072 it when they run out of work. Throughout the marking phase, a 4073 task attempts to keep its local queue short but not totally 4074 empty, so that entries are available for stealing by other 4075 tasks. Only when there is no more work, a task will totally 4076 drain its local queue. 4077 4078 (3) Global Mark Stack. This handles local queue overflow. During 4079 marking only sets of entries are moved between it and the local 4080 queues, as access to it requires a mutex and more fine-grain 4081 interaction with it which might cause contention. If it 4082 overflows, then the marking phase should restart and iterate 4083 over the bitmap to identify gray objects. Throughout the marking 4084 phase, tasks attempt to keep the global mark stack at a small 4085 length but not totally empty, so that entries are available for 4086 popping by other tasks. Only when there is no more work, tasks 4087 will totally drain the global mark stack. 4088 4089 (4) Global Region Stack. Entries on it correspond to areas of 4090 the bitmap that need to be scanned since they contain gray 4091 objects. Pushes on the region stack only happen during 4092 evacuation pauses and typically correspond to areas covered by 4093 GC LABS. If it overflows, then the marking phase should restart 4094 and iterate over the bitmap to identify gray objects. Tasks will 4095 try to totally drain the region stack as soon as possible. 4096 4097 (5) SATB Buffer Queue. This is where completed SATB buffers are 4098 made available. Buffers are regularly removed from this queue 4099 and scanned for roots, so that the queue doesn't get too 4100 long. During remark, all completed buffers are processed, as 4101 well as the filled in parts of any uncompleted buffers. 4102 4103 The do_marking_step() method tries to abort when the time target 4104 has been reached. There are a few other cases when the 4105 do_marking_step() method also aborts: 4106 4107 (1) When the marking phase has been aborted (after a Full GC). 4108 4109 (2) When a global overflow (either on the global stack or the 4110 region stack) has been triggered. Before the task aborts, it 4111 will actually sync up with the other tasks to ensure that all 4112 the marking data structures (local queues, stacks, fingers etc.) 4113 are re-initialised so that when do_marking_step() completes, 4114 the marking phase can immediately restart. 4115 4116 (3) When enough completed SATB buffers are available. The 4117 do_marking_step() method only tries to drain SATB buffers right 4118 at the beginning. So, if enough buffers are available, the 4119 marking step aborts and the SATB buffers are processed at 4120 the beginning of the next invocation. 4121 4122 (4) To yield. when we have to yield then we abort and yield 4123 right at the end of do_marking_step(). This saves us from a lot 4124 of hassle as, by yielding we might allow a Full GC. If this 4125 happens then objects will be compacted underneath our feet, the 4126 heap might shrink, etc. We save checking for this by just 4127 aborting and doing the yield right at the end. 4128 4129 From the above it follows that the do_marking_step() method should 4130 be called in a loop (or, otherwise, regularly) until it completes. 4131 4132 If a marking step completes without its has_aborted() flag being 4133 true, it means it has completed the current marking phase (and 4134 also all other marking tasks have done so and have all synced up). 4135 4136 A method called regular_clock_call() is invoked "regularly" (in 4137 sub ms intervals) throughout marking. It is this clock method that 4138 checks all the abort conditions which were mentioned above and 4139 decides when the task should abort. A work-based scheme is used to 4140 trigger this clock method: when the number of object words the 4141 marking phase has scanned or the number of references the marking 4142 phase has visited reach a given limit. Additional invocations to 4143 the method clock have been planted in a few other strategic places 4144 too. The initial reason for the clock method was to avoid calling 4145 vtime too regularly, as it is quite expensive. So, once it was in 4146 place, it was natural to piggy-back all the other conditions on it 4147 too and not constantly check them throughout the code. 4148 4149 *****************************************************************************/ 4150 4151 void CMTask::do_marking_step(double time_target_ms, 4152 bool do_stealing, 4153 bool do_termination) { 4154 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4155 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4156 4157 assert(concurrent() || _cm->region_stack_empty(), 4158 "the region stack should have been cleared before remark"); 4159 assert(concurrent() || !_cm->has_aborted_regions(), 4160 "aborted regions should have been cleared before remark"); 4161 assert(_region_finger == NULL, 4162 "this should be non-null only when a region is being scanned"); 4163 4164 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4165 assert(_task_queues != NULL, "invariant"); 4166 assert(_task_queue != NULL, "invariant"); 4167 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4168 4169 assert(!_claimed, 4170 "only one thread should claim this task at any one time"); 4171 4172 // OK, this doesn't safeguard again all possible scenarios, as it is 4173 // possible for two threads to set the _claimed flag at the same 4174 // time. But it is only for debugging purposes anyway and it will 4175 // catch most problems. 4176 _claimed = true; 4177 4178 _start_time_ms = os::elapsedVTime() * 1000.0; 4179 statsOnly( _interval_start_time_ms = _start_time_ms ); 4180 4181 double diff_prediction_ms = 4182 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4183 _time_target_ms = time_target_ms - diff_prediction_ms; 4184 4185 // set up the variables that are used in the work-based scheme to 4186 // call the regular clock method 4187 _words_scanned = 0; 4188 _refs_reached = 0; 4189 recalculate_limits(); 4190 4191 // clear all flags 4192 clear_has_aborted(); 4193 _has_timed_out = false; 4194 _draining_satb_buffers = false; 4195 4196 ++_calls; 4197 4198 if (_cm->verbose_low()) { 4199 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4200 "target = %1.2lfms >>>>>>>>>>", 4201 _task_id, _calls, _time_target_ms); 4202 } 4203 4204 // Set up the bitmap and oop closures. Anything that uses them is 4205 // eventually called from this method, so it is OK to allocate these 4206 // statically. 4207 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4208 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4209 set_cm_oop_closure(&cm_oop_closure); 4210 4211 if (_cm->has_overflown()) { 4212 // This can happen if the region stack or the mark stack overflows 4213 // during a GC pause and this task, after a yield point, 4214 // restarts. We have to abort as we need to get into the overflow 4215 // protocol which happens right at the end of this task. 4216 set_has_aborted(); 4217 } 4218 4219 // First drain any available SATB buffers. After this, we will not 4220 // look at SATB buffers before the next invocation of this method. 4221 // If enough completed SATB buffers are queued up, the regular clock 4222 // will abort this task so that it restarts. 4223 drain_satb_buffers(); 4224 // ...then partially drain the local queue and the global stack 4225 drain_local_queue(true); 4226 drain_global_stack(true); 4227 4228 // Then totally drain the region stack. We will not look at 4229 // it again before the next invocation of this method. Entries on 4230 // the region stack are only added during evacuation pauses, for 4231 // which we have to yield. When we do, we abort the task anyway so 4232 // it will look at the region stack again when it restarts. 4233 bitmap_closure.set_scanning_heap_region(false); 4234 drain_region_stack(&bitmap_closure); 4235 // ...then partially drain the local queue and the global stack 4236 drain_local_queue(true); 4237 drain_global_stack(true); 4238 4239 do { 4240 if (!has_aborted() && _curr_region != NULL) { 4241 // This means that we're already holding on to a region. 4242 assert(_finger != NULL, "if region is not NULL, then the finger " 4243 "should not be NULL either"); 4244 4245 // We might have restarted this task after an evacuation pause 4246 // which might have evacuated the region we're holding on to 4247 // underneath our feet. Let's read its limit again to make sure 4248 // that we do not iterate over a region of the heap that 4249 // contains garbage (update_region_limit() will also move 4250 // _finger to the start of the region if it is found empty). 4251 update_region_limit(); 4252 // We will start from _finger not from the start of the region, 4253 // as we might be restarting this task after aborting half-way 4254 // through scanning this region. In this case, _finger points to 4255 // the address where we last found a marked object. If this is a 4256 // fresh region, _finger points to start(). 4257 MemRegion mr = MemRegion(_finger, _region_limit); 4258 4259 if (_cm->verbose_low()) { 4260 gclog_or_tty->print_cr("[%d] we're scanning part " 4261 "["PTR_FORMAT", "PTR_FORMAT") " 4262 "of region "PTR_FORMAT, 4263 _task_id, _finger, _region_limit, _curr_region); 4264 } 4265 4266 // Let's iterate over the bitmap of the part of the 4267 // region that is left. 4268 bitmap_closure.set_scanning_heap_region(true); 4269 if (mr.is_empty() || 4270 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4271 // We successfully completed iterating over the region. Now, 4272 // let's give up the region. 4273 giveup_current_region(); 4274 regular_clock_call(); 4275 } else { 4276 assert(has_aborted(), "currently the only way to do so"); 4277 // The only way to abort the bitmap iteration is to return 4278 // false from the do_bit() method. However, inside the 4279 // do_bit() method we move the _finger to point to the 4280 // object currently being looked at. So, if we bail out, we 4281 // have definitely set _finger to something non-null. 4282 assert(_finger != NULL, "invariant"); 4283 4284 // Region iteration was actually aborted. So now _finger 4285 // points to the address of the object we last scanned. If we 4286 // leave it there, when we restart this task, we will rescan 4287 // the object. It is easy to avoid this. We move the finger by 4288 // enough to point to the next possible object header (the 4289 // bitmap knows by how much we need to move it as it knows its 4290 // granularity). 4291 assert(_finger < _region_limit, "invariant"); 4292 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4293 // Check if bitmap iteration was aborted while scanning the last object 4294 if (new_finger >= _region_limit) { 4295 giveup_current_region(); 4296 } else { 4297 move_finger_to(new_finger); 4298 } 4299 } 4300 } 4301 // At this point we have either completed iterating over the 4302 // region we were holding on to, or we have aborted. 4303 4304 // We then partially drain the local queue and the global stack. 4305 // (Do we really need this?) 4306 drain_local_queue(true); 4307 drain_global_stack(true); 4308 4309 // Read the note on the claim_region() method on why it might 4310 // return NULL with potentially more regions available for 4311 // claiming and why we have to check out_of_regions() to determine 4312 // whether we're done or not. 4313 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4314 // We are going to try to claim a new region. We should have 4315 // given up on the previous one. 4316 // Separated the asserts so that we know which one fires. 4317 assert(_curr_region == NULL, "invariant"); 4318 assert(_finger == NULL, "invariant"); 4319 assert(_region_limit == NULL, "invariant"); 4320 if (_cm->verbose_low()) { 4321 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4322 } 4323 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4324 if (claimed_region != NULL) { 4325 // Yes, we managed to claim one 4326 statsOnly( ++_regions_claimed ); 4327 4328 if (_cm->verbose_low()) { 4329 gclog_or_tty->print_cr("[%d] we successfully claimed " 4330 "region "PTR_FORMAT, 4331 _task_id, claimed_region); 4332 } 4333 4334 setup_for_region(claimed_region); 4335 assert(_curr_region == claimed_region, "invariant"); 4336 } 4337 // It is important to call the regular clock here. It might take 4338 // a while to claim a region if, for example, we hit a large 4339 // block of empty regions. So we need to call the regular clock 4340 // method once round the loop to make sure it's called 4341 // frequently enough. 4342 regular_clock_call(); 4343 } 4344 4345 if (!has_aborted() && _curr_region == NULL) { 4346 assert(_cm->out_of_regions(), 4347 "at this point we should be out of regions"); 4348 } 4349 } while ( _curr_region != NULL && !has_aborted()); 4350 4351 if (!has_aborted()) { 4352 // We cannot check whether the global stack is empty, since other 4353 // tasks might be pushing objects to it concurrently. We also cannot 4354 // check if the region stack is empty because if a thread is aborting 4355 // it can push a partially done region back. 4356 assert(_cm->out_of_regions(), 4357 "at this point we should be out of regions"); 4358 4359 if (_cm->verbose_low()) { 4360 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4361 } 4362 4363 // Try to reduce the number of available SATB buffers so that 4364 // remark has less work to do. 4365 drain_satb_buffers(); 4366 } 4367 4368 // Since we've done everything else, we can now totally drain the 4369 // local queue and global stack. 4370 drain_local_queue(false); 4371 drain_global_stack(false); 4372 4373 // Attempt at work stealing from other task's queues. 4374 if (do_stealing && !has_aborted()) { 4375 // We have not aborted. This means that we have finished all that 4376 // we could. Let's try to do some stealing... 4377 4378 // We cannot check whether the global stack is empty, since other 4379 // tasks might be pushing objects to it concurrently. We also cannot 4380 // check if the region stack is empty because if a thread is aborting 4381 // it can push a partially done region back. 4382 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4383 "only way to reach here"); 4384 4385 if (_cm->verbose_low()) { 4386 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4387 } 4388 4389 while (!has_aborted()) { 4390 oop obj; 4391 statsOnly( ++_steal_attempts ); 4392 4393 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4394 if (_cm->verbose_medium()) { 4395 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4396 _task_id, (void*) obj); 4397 } 4398 4399 statsOnly( ++_steals ); 4400 4401 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4402 "any stolen object should be marked"); 4403 scan_object(obj); 4404 4405 // And since we're towards the end, let's totally drain the 4406 // local queue and global stack. 4407 drain_local_queue(false); 4408 drain_global_stack(false); 4409 } else { 4410 break; 4411 } 4412 } 4413 } 4414 4415 // If we are about to wrap up and go into termination, check if we 4416 // should raise the overflow flag. 4417 if (do_termination && !has_aborted()) { 4418 if (_cm->force_overflow()->should_force()) { 4419 _cm->set_has_overflown(); 4420 regular_clock_call(); 4421 } 4422 } 4423 4424 // We still haven't aborted. Now, let's try to get into the 4425 // termination protocol. 4426 if (do_termination && !has_aborted()) { 4427 // We cannot check whether the global stack is empty, since other 4428 // tasks might be concurrently pushing objects on it. We also cannot 4429 // check if the region stack is empty because if a thread is aborting 4430 // it can push a partially done region back. 4431 // Separated the asserts so that we know which one fires. 4432 assert(_cm->out_of_regions(), "only way to reach here"); 4433 assert(_task_queue->size() == 0, "only way to reach here"); 4434 4435 if (_cm->verbose_low()) { 4436 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4437 } 4438 4439 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4440 // The CMTask class also extends the TerminatorTerminator class, 4441 // hence its should_exit_termination() method will also decide 4442 // whether to exit the termination protocol or not. 4443 bool finished = _cm->terminator()->offer_termination(this); 4444 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4445 _termination_time_ms += 4446 termination_end_time_ms - _termination_start_time_ms; 4447 4448 if (finished) { 4449 // We're all done. 4450 4451 if (_task_id == 0) { 4452 // let's allow task 0 to do this 4453 if (concurrent()) { 4454 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4455 // we need to set this to false before the next 4456 // safepoint. This way we ensure that the marking phase 4457 // doesn't observe any more heap expansions. 4458 _cm->clear_concurrent_marking_in_progress(); 4459 } 4460 } 4461 4462 // We can now guarantee that the global stack is empty, since 4463 // all other tasks have finished. We separated the guarantees so 4464 // that, if a condition is false, we can immediately find out 4465 // which one. 4466 guarantee(_cm->out_of_regions(), "only way to reach here"); 4467 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4468 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4469 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4470 guarantee(_task_queue->size() == 0, "only way to reach here"); 4471 guarantee(!_cm->has_overflown(), "only way to reach here"); 4472 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4473 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4474 4475 if (_cm->verbose_low()) { 4476 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4477 } 4478 } else { 4479 // Apparently there's more work to do. Let's abort this task. It 4480 // will restart it and we can hopefully find more things to do. 4481 4482 if (_cm->verbose_low()) { 4483 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4484 _task_id); 4485 } 4486 4487 set_has_aborted(); 4488 statsOnly( ++_aborted_termination ); 4489 } 4490 } 4491 4492 // Mainly for debugging purposes to make sure that a pointer to the 4493 // closure which was statically allocated in this frame doesn't 4494 // escape it by accident. 4495 set_cm_oop_closure(NULL); 4496 double end_time_ms = os::elapsedVTime() * 1000.0; 4497 double elapsed_time_ms = end_time_ms - _start_time_ms; 4498 // Update the step history. 4499 _step_times_ms.add(elapsed_time_ms); 4500 4501 if (has_aborted()) { 4502 // The task was aborted for some reason. 4503 4504 statsOnly( ++_aborted ); 4505 4506 if (_has_timed_out) { 4507 double diff_ms = elapsed_time_ms - _time_target_ms; 4508 // Keep statistics of how well we did with respect to hitting 4509 // our target only if we actually timed out (if we aborted for 4510 // other reasons, then the results might get skewed). 4511 _marking_step_diffs_ms.add(diff_ms); 4512 } 4513 4514 if (_cm->has_overflown()) { 4515 // This is the interesting one. We aborted because a global 4516 // overflow was raised. This means we have to restart the 4517 // marking phase and start iterating over regions. However, in 4518 // order to do this we have to make sure that all tasks stop 4519 // what they are doing and re-initialise in a safe manner. We 4520 // will achieve this with the use of two barrier sync points. 4521 4522 if (_cm->verbose_low()) { 4523 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4524 } 4525 4526 _cm->enter_first_sync_barrier(_task_id); 4527 // When we exit this sync barrier we know that all tasks have 4528 // stopped doing marking work. So, it's now safe to 4529 // re-initialise our data structures. At the end of this method, 4530 // task 0 will clear the global data structures. 4531 4532 statsOnly( ++_aborted_overflow ); 4533 4534 // We clear the local state of this task... 4535 clear_region_fields(); 4536 4537 // ...and enter the second barrier. 4538 _cm->enter_second_sync_barrier(_task_id); 4539 // At this point everything has bee re-initialised and we're 4540 // ready to restart. 4541 } 4542 4543 if (_cm->verbose_low()) { 4544 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4545 "elapsed = %1.2lfms <<<<<<<<<<", 4546 _task_id, _time_target_ms, elapsed_time_ms); 4547 if (_cm->has_aborted()) { 4548 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4549 _task_id); 4550 } 4551 } 4552 } else { 4553 if (_cm->verbose_low()) { 4554 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4555 "elapsed = %1.2lfms <<<<<<<<<<", 4556 _task_id, _time_target_ms, elapsed_time_ms); 4557 } 4558 } 4559 4560 _claimed = false; 4561 } 4562 4563 CMTask::CMTask(int task_id, 4564 ConcurrentMark* cm, 4565 CMTaskQueue* task_queue, 4566 CMTaskQueueSet* task_queues) 4567 : _g1h(G1CollectedHeap::heap()), 4568 _task_id(task_id), _cm(cm), 4569 _claimed(false), 4570 _nextMarkBitMap(NULL), _hash_seed(17), 4571 _task_queue(task_queue), 4572 _task_queues(task_queues), 4573 _cm_oop_closure(NULL), 4574 _aborted_region(MemRegion()) { 4575 guarantee(task_queue != NULL, "invariant"); 4576 guarantee(task_queues != NULL, "invariant"); 4577 4578 statsOnly( _clock_due_to_scanning = 0; 4579 _clock_due_to_marking = 0 ); 4580 4581 _marking_step_diffs_ms.add(0.5); 4582 } 4583 4584 // These are formatting macros that are used below to ensure 4585 // consistent formatting. The *_H_* versions are used to format the 4586 // header for a particular value and they should be kept consistent 4587 // with the corresponding macro. Also note that most of the macros add 4588 // the necessary white space (as a prefix) which makes them a bit 4589 // easier to compose. 4590 4591 // All the output lines are prefixed with this string to be able to 4592 // identify them easily in a large log file. 4593 #define G1PPRL_LINE_PREFIX "###" 4594 4595 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4596 #ifdef _LP64 4597 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4598 #else // _LP64 4599 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4600 #endif // _LP64 4601 4602 // For per-region info 4603 #define G1PPRL_TYPE_FORMAT " %-4s" 4604 #define G1PPRL_TYPE_H_FORMAT " %4s" 4605 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4606 #define G1PPRL_BYTE_H_FORMAT " %9s" 4607 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4608 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4609 4610 // For summary info 4611 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4612 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4613 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4614 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4615 4616 G1PrintRegionLivenessInfoClosure:: 4617 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4618 : _out(out), 4619 _total_used_bytes(0), _total_capacity_bytes(0), 4620 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4621 _hum_used_bytes(0), _hum_capacity_bytes(0), 4622 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4623 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4624 MemRegion g1_committed = g1h->g1_committed(); 4625 MemRegion g1_reserved = g1h->g1_reserved(); 4626 double now = os::elapsedTime(); 4627 4628 // Print the header of the output. 4629 _out->cr(); 4630 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4631 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4632 G1PPRL_SUM_ADDR_FORMAT("committed") 4633 G1PPRL_SUM_ADDR_FORMAT("reserved") 4634 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4635 g1_committed.start(), g1_committed.end(), 4636 g1_reserved.start(), g1_reserved.end(), 4637 HeapRegion::GrainBytes); 4638 _out->print_cr(G1PPRL_LINE_PREFIX); 4639 _out->print_cr(G1PPRL_LINE_PREFIX 4640 G1PPRL_TYPE_H_FORMAT 4641 G1PPRL_ADDR_BASE_H_FORMAT 4642 G1PPRL_BYTE_H_FORMAT 4643 G1PPRL_BYTE_H_FORMAT 4644 G1PPRL_BYTE_H_FORMAT 4645 G1PPRL_DOUBLE_H_FORMAT, 4646 "type", "address-range", 4647 "used", "prev-live", "next-live", "gc-eff"); 4648 } 4649 4650 // It takes as a parameter a reference to one of the _hum_* fields, it 4651 // deduces the corresponding value for a region in a humongous region 4652 // series (either the region size, or what's left if the _hum_* field 4653 // is < the region size), and updates the _hum_* field accordingly. 4654 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4655 size_t bytes = 0; 4656 // The > 0 check is to deal with the prev and next live bytes which 4657 // could be 0. 4658 if (*hum_bytes > 0) { 4659 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes); 4660 *hum_bytes -= bytes; 4661 } 4662 return bytes; 4663 } 4664 4665 // It deduces the values for a region in a humongous region series 4666 // from the _hum_* fields and updates those accordingly. It assumes 4667 // that that _hum_* fields have already been set up from the "starts 4668 // humongous" region and we visit the regions in address order. 4669 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4670 size_t* capacity_bytes, 4671 size_t* prev_live_bytes, 4672 size_t* next_live_bytes) { 4673 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4674 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4675 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4676 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4677 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4678 } 4679 4680 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4681 const char* type = ""; 4682 HeapWord* bottom = r->bottom(); 4683 HeapWord* end = r->end(); 4684 size_t capacity_bytes = r->capacity(); 4685 size_t used_bytes = r->used(); 4686 size_t prev_live_bytes = r->live_bytes(); 4687 size_t next_live_bytes = r->next_live_bytes(); 4688 double gc_eff = r->gc_efficiency(); 4689 if (r->used() == 0) { 4690 type = "FREE"; 4691 } else if (r->is_survivor()) { 4692 type = "SURV"; 4693 } else if (r->is_young()) { 4694 type = "EDEN"; 4695 } else if (r->startsHumongous()) { 4696 type = "HUMS"; 4697 4698 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4699 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4700 "they should have been zeroed after the last time we used them"); 4701 // Set up the _hum_* fields. 4702 _hum_capacity_bytes = capacity_bytes; 4703 _hum_used_bytes = used_bytes; 4704 _hum_prev_live_bytes = prev_live_bytes; 4705 _hum_next_live_bytes = next_live_bytes; 4706 get_hum_bytes(&used_bytes, &capacity_bytes, 4707 &prev_live_bytes, &next_live_bytes); 4708 end = bottom + HeapRegion::GrainWords; 4709 } else if (r->continuesHumongous()) { 4710 type = "HUMC"; 4711 get_hum_bytes(&used_bytes, &capacity_bytes, 4712 &prev_live_bytes, &next_live_bytes); 4713 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4714 } else { 4715 type = "OLD"; 4716 } 4717 4718 _total_used_bytes += used_bytes; 4719 _total_capacity_bytes += capacity_bytes; 4720 _total_prev_live_bytes += prev_live_bytes; 4721 _total_next_live_bytes += next_live_bytes; 4722 4723 // Print a line for this particular region. 4724 _out->print_cr(G1PPRL_LINE_PREFIX 4725 G1PPRL_TYPE_FORMAT 4726 G1PPRL_ADDR_BASE_FORMAT 4727 G1PPRL_BYTE_FORMAT 4728 G1PPRL_BYTE_FORMAT 4729 G1PPRL_BYTE_FORMAT 4730 G1PPRL_DOUBLE_FORMAT, 4731 type, bottom, end, 4732 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4733 4734 return false; 4735 } 4736 4737 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4738 // Print the footer of the output. 4739 _out->print_cr(G1PPRL_LINE_PREFIX); 4740 _out->print_cr(G1PPRL_LINE_PREFIX 4741 " SUMMARY" 4742 G1PPRL_SUM_MB_FORMAT("capacity") 4743 G1PPRL_SUM_MB_PERC_FORMAT("used") 4744 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4745 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4746 bytes_to_mb(_total_capacity_bytes), 4747 bytes_to_mb(_total_used_bytes), 4748 perc(_total_used_bytes, _total_capacity_bytes), 4749 bytes_to_mb(_total_prev_live_bytes), 4750 perc(_total_prev_live_bytes, _total_capacity_bytes), 4751 bytes_to_mb(_total_next_live_bytes), 4752 perc(_total_next_live_bytes, _total_capacity_bytes)); 4753 _out->cr(); 4754 }