1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 32 #include "gc_implementation/g1/g1RemSet.hpp" 33 #include "gc_implementation/g1/heapRegionRemSet.hpp" 34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 35 #include "gc_implementation/shared/vmGCOperations.hpp" 36 #include "memory/genOopClosures.inline.hpp" 37 #include "memory/referencePolicy.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "oops/oop.inline.hpp" 40 #include "runtime/handles.inline.hpp" 41 #include "runtime/java.hpp" 42 43 // 44 // CMS Bit Map Wrapper 45 46 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 47 _bm((uintptr_t*)NULL,0), 48 _shifter(shifter) { 49 _bmStartWord = (HeapWord*)(rs.base()); 50 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 51 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 52 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 53 54 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 55 // For now we'll just commit all of the bit map up fromt. 56 // Later on we'll try to be more parsimonious with swap. 57 guarantee(_virtual_space.initialize(brs, brs.size()), 58 "couldn't reseve backing store for CMS bit map"); 59 assert(_virtual_space.committed_size() == brs.size(), 60 "didn't reserve backing store for all of CMS bit map?"); 61 _bm.set_map((uintptr_t*)_virtual_space.low()); 62 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 63 _bmWordSize, "inconsistency in bit map sizing"); 64 _bm.set_size(_bmWordSize >> _shifter); 65 } 66 67 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 68 HeapWord* limit) const { 69 // First we must round addr *up* to a possible object boundary. 70 addr = (HeapWord*)align_size_up((intptr_t)addr, 71 HeapWordSize << _shifter); 72 size_t addrOffset = heapWordToOffset(addr); 73 if (limit == NULL) { 74 limit = _bmStartWord + _bmWordSize; 75 } 76 size_t limitOffset = heapWordToOffset(limit); 77 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 78 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 79 assert(nextAddr >= addr, "get_next_one postcondition"); 80 assert(nextAddr == limit || isMarked(nextAddr), 81 "get_next_one postcondition"); 82 return nextAddr; 83 } 84 85 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 86 HeapWord* limit) const { 87 size_t addrOffset = heapWordToOffset(addr); 88 if (limit == NULL) { 89 limit = _bmStartWord + _bmWordSize; 90 } 91 size_t limitOffset = heapWordToOffset(limit); 92 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 93 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 94 assert(nextAddr >= addr, "get_next_one postcondition"); 95 assert(nextAddr == limit || !isMarked(nextAddr), 96 "get_next_one postcondition"); 97 return nextAddr; 98 } 99 100 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 101 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 102 return (int) (diff >> _shifter); 103 } 104 105 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 106 HeapWord* left = MAX2(_bmStartWord, mr.start()); 107 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 108 if (right > left) { 109 // Right-open interval [leftOffset, rightOffset). 110 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 111 } else { 112 return true; 113 } 114 } 115 116 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 117 size_t from_start_index, 118 HeapWord* to_start_word, 119 size_t word_num) { 120 _bm.mostly_disjoint_range_union(from_bitmap, 121 from_start_index, 122 heapWordToOffset(to_start_word), 123 word_num); 124 } 125 126 #ifndef PRODUCT 127 bool CMBitMapRO::covers(ReservedSpace rs) const { 128 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 129 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 130 "size inconsistency"); 131 return _bmStartWord == (HeapWord*)(rs.base()) && 132 _bmWordSize == rs.size()>>LogHeapWordSize; 133 } 134 #endif 135 136 void CMBitMap::clearAll() { 137 _bm.clear(); 138 return; 139 } 140 141 void CMBitMap::markRange(MemRegion mr) { 142 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 143 assert(!mr.is_empty(), "unexpected empty region"); 144 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 145 ((HeapWord *) mr.end())), 146 "markRange memory region end is not card aligned"); 147 // convert address range into offset range 148 _bm.at_put_range(heapWordToOffset(mr.start()), 149 heapWordToOffset(mr.end()), true); 150 } 151 152 void CMBitMap::clearRange(MemRegion mr) { 153 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 154 assert(!mr.is_empty(), "unexpected empty region"); 155 // convert address range into offset range 156 _bm.at_put_range(heapWordToOffset(mr.start()), 157 heapWordToOffset(mr.end()), false); 158 } 159 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 161 HeapWord* end_addr) { 162 HeapWord* start = getNextMarkedWordAddress(addr); 163 start = MIN2(start, end_addr); 164 HeapWord* end = getNextUnmarkedWordAddress(start); 165 end = MIN2(end, end_addr); 166 assert(start <= end, "Consistency check"); 167 MemRegion mr(start, end); 168 if (!mr.is_empty()) { 169 clearRange(mr); 170 } 171 return mr; 172 } 173 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 175 _base(NULL), _cm(cm) 176 #ifdef ASSERT 177 , _drain_in_progress(false) 178 , _drain_in_progress_yields(false) 179 #endif 180 {} 181 182 void CMMarkStack::allocate(size_t size) { 183 _base = NEW_C_HEAP_ARRAY(oop, size); 184 if (_base == NULL) { 185 vm_exit_during_initialization("Failed to allocate " 186 "CM region mark stack"); 187 } 188 _index = 0; 189 _capacity = (jint) size; 190 _oops_do_bound = -1; 191 NOT_PRODUCT(_max_depth = 0); 192 } 193 194 CMMarkStack::~CMMarkStack() { 195 if (_base != NULL) { 196 FREE_C_HEAP_ARRAY(oop, _base); 197 } 198 } 199 200 void CMMarkStack::par_push(oop ptr) { 201 while (true) { 202 if (isFull()) { 203 _overflow = true; 204 return; 205 } 206 // Otherwise... 207 jint index = _index; 208 jint next_index = index+1; 209 jint res = Atomic::cmpxchg(next_index, &_index, index); 210 if (res == index) { 211 _base[index] = ptr; 212 // Note that we don't maintain this atomically. We could, but it 213 // doesn't seem necessary. 214 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 215 return; 216 } 217 // Otherwise, we need to try again. 218 } 219 } 220 221 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 222 while (true) { 223 if (isFull()) { 224 _overflow = true; 225 return; 226 } 227 // Otherwise... 228 jint index = _index; 229 jint next_index = index + n; 230 if (next_index > _capacity) { 231 _overflow = true; 232 return; 233 } 234 jint res = Atomic::cmpxchg(next_index, &_index, index); 235 if (res == index) { 236 for (int i = 0; i < n; i++) { 237 int ind = index + i; 238 assert(ind < _capacity, "By overflow test above."); 239 _base[ind] = ptr_arr[i]; 240 } 241 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 242 return; 243 } 244 // Otherwise, we need to try again. 245 } 246 } 247 248 249 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 250 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 251 jint start = _index; 252 jint next_index = start + n; 253 if (next_index > _capacity) { 254 _overflow = true; 255 return; 256 } 257 // Otherwise. 258 _index = next_index; 259 for (int i = 0; i < n; i++) { 260 int ind = start + i; 261 assert(ind < _capacity, "By overflow test above."); 262 _base[ind] = ptr_arr[i]; 263 } 264 } 265 266 267 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 268 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 269 jint index = _index; 270 if (index == 0) { 271 *n = 0; 272 return false; 273 } else { 274 int k = MIN2(max, index); 275 jint new_ind = index - k; 276 for (int j = 0; j < k; j++) { 277 ptr_arr[j] = _base[new_ind + j]; 278 } 279 _index = new_ind; 280 *n = k; 281 return true; 282 } 283 } 284 285 286 CMRegionStack::CMRegionStack() : _base(NULL) {} 287 288 void CMRegionStack::allocate(size_t size) { 289 _base = NEW_C_HEAP_ARRAY(MemRegion, size); 290 if (_base == NULL) { 291 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 292 } 293 _index = 0; 294 _capacity = (jint) size; 295 } 296 297 CMRegionStack::~CMRegionStack() { 298 if (_base != NULL) { 299 FREE_C_HEAP_ARRAY(oop, _base); 300 } 301 } 302 303 void CMRegionStack::push_lock_free(MemRegion mr) { 304 assert(mr.word_size() > 0, "Precondition"); 305 while (true) { 306 jint index = _index; 307 308 if (index >= _capacity) { 309 _overflow = true; 310 return; 311 } 312 // Otherwise... 313 jint next_index = index+1; 314 jint res = Atomic::cmpxchg(next_index, &_index, index); 315 if (res == index) { 316 _base[index] = mr; 317 return; 318 } 319 // Otherwise, we need to try again. 320 } 321 } 322 323 // Lock-free pop of the region stack. Called during the concurrent 324 // marking / remark phases. Should only be called in tandem with 325 // other lock-free pops. 326 MemRegion CMRegionStack::pop_lock_free() { 327 while (true) { 328 jint index = _index; 329 330 if (index == 0) { 331 return MemRegion(); 332 } 333 // Otherwise... 334 jint next_index = index-1; 335 jint res = Atomic::cmpxchg(next_index, &_index, index); 336 if (res == index) { 337 MemRegion mr = _base[next_index]; 338 if (mr.start() != NULL) { 339 assert(mr.end() != NULL, "invariant"); 340 assert(mr.word_size() > 0, "invariant"); 341 return mr; 342 } else { 343 // that entry was invalidated... let's skip it 344 assert(mr.end() == NULL, "invariant"); 345 } 346 } 347 // Otherwise, we need to try again. 348 } 349 } 350 351 #if 0 352 // The routines that manipulate the region stack with a lock are 353 // not currently used. They should be retained, however, as a 354 // diagnostic aid. 355 356 void CMRegionStack::push_with_lock(MemRegion mr) { 357 assert(mr.word_size() > 0, "Precondition"); 358 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 359 360 if (isFull()) { 361 _overflow = true; 362 return; 363 } 364 365 _base[_index] = mr; 366 _index += 1; 367 } 368 369 MemRegion CMRegionStack::pop_with_lock() { 370 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag); 371 372 while (true) { 373 if (_index == 0) { 374 return MemRegion(); 375 } 376 _index -= 1; 377 378 MemRegion mr = _base[_index]; 379 if (mr.start() != NULL) { 380 assert(mr.end() != NULL, "invariant"); 381 assert(mr.word_size() > 0, "invariant"); 382 return mr; 383 } else { 384 // that entry was invalidated... let's skip it 385 assert(mr.end() == NULL, "invariant"); 386 } 387 } 388 } 389 #endif 390 391 bool CMRegionStack::invalidate_entries_into_cset() { 392 bool result = false; 393 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 394 for (int i = 0; i < _oops_do_bound; ++i) { 395 MemRegion mr = _base[i]; 396 if (mr.start() != NULL) { 397 assert(mr.end() != NULL, "invariant"); 398 assert(mr.word_size() > 0, "invariant"); 399 HeapRegion* hr = g1h->heap_region_containing(mr.start()); 400 assert(hr != NULL, "invariant"); 401 if (hr->in_collection_set()) { 402 // The region points into the collection set 403 _base[i] = MemRegion(); 404 result = true; 405 } 406 } else { 407 // that entry was invalidated... let's skip it 408 assert(mr.end() == NULL, "invariant"); 409 } 410 } 411 return result; 412 } 413 414 template<class OopClosureClass> 415 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 416 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 417 || SafepointSynchronize::is_at_safepoint(), 418 "Drain recursion must be yield-safe."); 419 bool res = true; 420 debug_only(_drain_in_progress = true); 421 debug_only(_drain_in_progress_yields = yield_after); 422 while (!isEmpty()) { 423 oop newOop = pop(); 424 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 425 assert(newOop->is_oop(), "Expected an oop"); 426 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 427 "only grey objects on this stack"); 428 // iterate over the oops in this oop, marking and pushing 429 // the ones in CMS generation. 430 newOop->oop_iterate(cl); 431 if (yield_after && _cm->do_yield_check()) { 432 res = false; 433 break; 434 } 435 } 436 debug_only(_drain_in_progress = false); 437 return res; 438 } 439 440 void CMMarkStack::oops_do(OopClosure* f) { 441 if (_index == 0) return; 442 assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 443 "Bound must be set."); 444 for (int i = 0; i < _oops_do_bound; i++) { 445 f->do_oop(&_base[i]); 446 } 447 _oops_do_bound = -1; 448 } 449 450 bool ConcurrentMark::not_yet_marked(oop obj) const { 451 return (_g1h->is_obj_ill(obj) 452 || (_g1h->is_in_permanent(obj) 453 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 454 } 455 456 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 457 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 458 #endif // _MSC_VER 459 460 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 461 int max_regions) : 462 _markBitMap1(rs, MinObjAlignment - 1), 463 _markBitMap2(rs, MinObjAlignment - 1), 464 465 _parallel_marking_threads(0), 466 _sleep_factor(0.0), 467 _marking_task_overhead(1.0), 468 _cleanup_sleep_factor(0.0), 469 _cleanup_task_overhead(1.0), 470 _cleanup_list("Cleanup List"), 471 _region_bm(max_regions, false /* in_resource_area*/), 472 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 473 CardTableModRefBS::card_shift, 474 false /* in_resource_area*/), 475 _prevMarkBitMap(&_markBitMap1), 476 _nextMarkBitMap(&_markBitMap2), 477 _at_least_one_mark_complete(false), 478 479 _markStack(this), 480 _regionStack(), 481 // _finger set in set_non_marking_state 482 483 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 484 // _active_tasks set in set_non_marking_state 485 // _tasks set inside the constructor 486 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 487 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 488 489 _has_overflown(false), 490 _concurrent(false), 491 _has_aborted(false), 492 _restart_for_overflow(false), 493 _concurrent_marking_in_progress(false), 494 _should_gray_objects(false), 495 496 // _verbose_level set below 497 498 _init_times(), 499 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 500 _cleanup_times(), 501 _total_counting_time(0.0), 502 _total_rs_scrub_time(0.0), 503 504 _parallel_workers(NULL) { 505 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 506 if (verbose_level < no_verbose) { 507 verbose_level = no_verbose; 508 } 509 if (verbose_level > high_verbose) { 510 verbose_level = high_verbose; 511 } 512 _verbose_level = verbose_level; 513 514 if (verbose_low()) { 515 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 516 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 517 } 518 519 _markStack.allocate(MarkStackSize); 520 _regionStack.allocate(G1MarkRegionStackSize); 521 522 // Create & start a ConcurrentMark thread. 523 _cmThread = new ConcurrentMarkThread(this); 524 assert(cmThread() != NULL, "CM Thread should have been created"); 525 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 526 527 _g1h = G1CollectedHeap::heap(); 528 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 529 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 530 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 531 532 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 533 satb_qs.set_buffer_size(G1SATBBufferSize); 534 535 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 536 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 537 538 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 539 _active_tasks = _max_task_num; 540 for (int i = 0; i < (int) _max_task_num; ++i) { 541 CMTaskQueue* task_queue = new CMTaskQueue(); 542 task_queue->initialize(); 543 _task_queues->register_queue(i, task_queue); 544 545 _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 546 _accum_task_vtime[i] = 0.0; 547 } 548 549 if (ConcGCThreads > ParallelGCThreads) { 550 vm_exit_during_initialization("Can't have more ConcGCThreads " 551 "than ParallelGCThreads."); 552 } 553 if (ParallelGCThreads == 0) { 554 // if we are not running with any parallel GC threads we will not 555 // spawn any marking threads either 556 _parallel_marking_threads = 0; 557 _sleep_factor = 0.0; 558 _marking_task_overhead = 1.0; 559 } else { 560 if (ConcGCThreads > 0) { 561 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 562 // if both are set 563 564 _parallel_marking_threads = ConcGCThreads; 565 _sleep_factor = 0.0; 566 _marking_task_overhead = 1.0; 567 } else if (G1MarkingOverheadPercent > 0) { 568 // we will calculate the number of parallel marking threads 569 // based on a target overhead with respect to the soft real-time 570 // goal 571 572 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 573 double overall_cm_overhead = 574 (double) MaxGCPauseMillis * marking_overhead / 575 (double) GCPauseIntervalMillis; 576 double cpu_ratio = 1.0 / (double) os::processor_count(); 577 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 578 double marking_task_overhead = 579 overall_cm_overhead / marking_thread_num * 580 (double) os::processor_count(); 581 double sleep_factor = 582 (1.0 - marking_task_overhead) / marking_task_overhead; 583 584 _parallel_marking_threads = (size_t) marking_thread_num; 585 _sleep_factor = sleep_factor; 586 _marking_task_overhead = marking_task_overhead; 587 } else { 588 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 589 _sleep_factor = 0.0; 590 _marking_task_overhead = 1.0; 591 } 592 593 if (parallel_marking_threads() > 1) { 594 _cleanup_task_overhead = 1.0; 595 } else { 596 _cleanup_task_overhead = marking_task_overhead(); 597 } 598 _cleanup_sleep_factor = 599 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 600 601 #if 0 602 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 603 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 604 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 605 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 606 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 607 #endif 608 609 guarantee(parallel_marking_threads() > 0, "peace of mind"); 610 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 611 (int) _parallel_marking_threads, false, true); 612 if (_parallel_workers == NULL) { 613 vm_exit_during_initialization("Failed necessary allocation."); 614 } else { 615 _parallel_workers->initialize_workers(); 616 } 617 } 618 619 // so that the call below can read a sensible value 620 _heap_start = (HeapWord*) rs.base(); 621 set_non_marking_state(); 622 } 623 624 void ConcurrentMark::update_g1_committed(bool force) { 625 // If concurrent marking is not in progress, then we do not need to 626 // update _heap_end. This has a subtle and important 627 // side-effect. Imagine that two evacuation pauses happen between 628 // marking completion and remark. The first one can grow the 629 // heap (hence now the finger is below the heap end). Then, the 630 // second one could unnecessarily push regions on the region 631 // stack. This causes the invariant that the region stack is empty 632 // at the beginning of remark to be false. By ensuring that we do 633 // not observe heap expansions after marking is complete, then we do 634 // not have this problem. 635 if (!concurrent_marking_in_progress() && !force) return; 636 637 MemRegion committed = _g1h->g1_committed(); 638 assert(committed.start() == _heap_start, "start shouldn't change"); 639 HeapWord* new_end = committed.end(); 640 if (new_end > _heap_end) { 641 // The heap has been expanded. 642 643 _heap_end = new_end; 644 } 645 // Notice that the heap can also shrink. However, this only happens 646 // during a Full GC (at least currently) and the entire marking 647 // phase will bail out and the task will not be restarted. So, let's 648 // do nothing. 649 } 650 651 void ConcurrentMark::reset() { 652 // Starting values for these two. This should be called in a STW 653 // phase. CM will be notified of any future g1_committed expansions 654 // will be at the end of evacuation pauses, when tasks are 655 // inactive. 656 MemRegion committed = _g1h->g1_committed(); 657 _heap_start = committed.start(); 658 _heap_end = committed.end(); 659 660 // Separated the asserts so that we know which one fires. 661 assert(_heap_start != NULL, "heap bounds should look ok"); 662 assert(_heap_end != NULL, "heap bounds should look ok"); 663 assert(_heap_start < _heap_end, "heap bounds should look ok"); 664 665 // reset all the marking data structures and any necessary flags 666 clear_marking_state(); 667 668 if (verbose_low()) { 669 gclog_or_tty->print_cr("[global] resetting"); 670 } 671 672 // We do reset all of them, since different phases will use 673 // different number of active threads. So, it's easiest to have all 674 // of them ready. 675 for (int i = 0; i < (int) _max_task_num; ++i) { 676 _tasks[i]->reset(_nextMarkBitMap); 677 } 678 679 // we need this to make sure that the flag is on during the evac 680 // pause with initial mark piggy-backed 681 set_concurrent_marking_in_progress(); 682 } 683 684 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 685 assert(active_tasks <= _max_task_num, "we should not have more"); 686 687 _active_tasks = active_tasks; 688 // Need to update the three data structures below according to the 689 // number of active threads for this phase. 690 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 691 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 692 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 693 694 _concurrent = concurrent; 695 // We propagate this to all tasks, not just the active ones. 696 for (int i = 0; i < (int) _max_task_num; ++i) 697 _tasks[i]->set_concurrent(concurrent); 698 699 if (concurrent) { 700 set_concurrent_marking_in_progress(); 701 } else { 702 // We currently assume that the concurrent flag has been set to 703 // false before we start remark. At this point we should also be 704 // in a STW phase. 705 assert(!concurrent_marking_in_progress(), "invariant"); 706 assert(_finger == _heap_end, "only way to get here"); 707 update_g1_committed(true); 708 } 709 } 710 711 void ConcurrentMark::set_non_marking_state() { 712 // We set the global marking state to some default values when we're 713 // not doing marking. 714 clear_marking_state(); 715 _active_tasks = 0; 716 clear_concurrent_marking_in_progress(); 717 } 718 719 ConcurrentMark::~ConcurrentMark() { 720 for (int i = 0; i < (int) _max_task_num; ++i) { 721 delete _task_queues->queue(i); 722 delete _tasks[i]; 723 } 724 delete _task_queues; 725 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 726 } 727 728 // This closure is used to mark refs into the g1 generation 729 // from external roots in the CMS bit map. 730 // Called at the first checkpoint. 731 // 732 733 void ConcurrentMark::clearNextBitmap() { 734 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 735 G1CollectorPolicy* g1p = g1h->g1_policy(); 736 737 // Make sure that the concurrent mark thread looks to still be in 738 // the current cycle. 739 guarantee(cmThread()->during_cycle(), "invariant"); 740 741 // We are finishing up the current cycle by clearing the next 742 // marking bitmap and getting it ready for the next cycle. During 743 // this time no other cycle can start. So, let's make sure that this 744 // is the case. 745 guarantee(!g1h->mark_in_progress(), "invariant"); 746 747 // clear the mark bitmap (no grey objects to start with). 748 // We need to do this in chunks and offer to yield in between 749 // each chunk. 750 HeapWord* start = _nextMarkBitMap->startWord(); 751 HeapWord* end = _nextMarkBitMap->endWord(); 752 HeapWord* cur = start; 753 size_t chunkSize = M; 754 while (cur < end) { 755 HeapWord* next = cur + chunkSize; 756 if (next > end) { 757 next = end; 758 } 759 MemRegion mr(cur,next); 760 _nextMarkBitMap->clearRange(mr); 761 cur = next; 762 do_yield_check(); 763 764 // Repeat the asserts from above. We'll do them as asserts here to 765 // minimize their overhead on the product. However, we'll have 766 // them as guarantees at the beginning / end of the bitmap 767 // clearing to get some checking in the product. 768 assert(cmThread()->during_cycle(), "invariant"); 769 assert(!g1h->mark_in_progress(), "invariant"); 770 } 771 772 // Repeat the asserts from above. 773 guarantee(cmThread()->during_cycle(), "invariant"); 774 guarantee(!g1h->mark_in_progress(), "invariant"); 775 } 776 777 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 778 public: 779 bool doHeapRegion(HeapRegion* r) { 780 if (!r->continuesHumongous()) { 781 r->note_start_of_marking(true); 782 } 783 return false; 784 } 785 }; 786 787 void ConcurrentMark::checkpointRootsInitialPre() { 788 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 789 G1CollectorPolicy* g1p = g1h->g1_policy(); 790 791 _has_aborted = false; 792 793 #ifndef PRODUCT 794 if (G1PrintReachableAtInitialMark) { 795 print_reachable("at-cycle-start", 796 VerifyOption_G1UsePrevMarking, true /* all */); 797 } 798 #endif 799 800 // Initialise marking structures. This has to be done in a STW phase. 801 reset(); 802 } 803 804 class CMMarkRootsClosure: public OopsInGenClosure { 805 private: 806 ConcurrentMark* _cm; 807 G1CollectedHeap* _g1h; 808 bool _do_barrier; 809 810 public: 811 CMMarkRootsClosure(ConcurrentMark* cm, 812 G1CollectedHeap* g1h, 813 bool do_barrier) : _cm(cm), _g1h(g1h), 814 _do_barrier(do_barrier) { } 815 816 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 817 virtual void do_oop( oop* p) { do_oop_work(p); } 818 819 template <class T> void do_oop_work(T* p) { 820 T heap_oop = oopDesc::load_heap_oop(p); 821 if (!oopDesc::is_null(heap_oop)) { 822 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 823 assert(obj->is_oop() || obj->mark() == NULL, 824 "expected an oop, possibly with mark word displaced"); 825 HeapWord* addr = (HeapWord*)obj; 826 if (_g1h->is_in_g1_reserved(addr)) { 827 _cm->grayRoot(obj); 828 } 829 } 830 if (_do_barrier) { 831 assert(!_g1h->is_in_g1_reserved(p), 832 "Should be called on external roots"); 833 do_barrier(p); 834 } 835 } 836 }; 837 838 void ConcurrentMark::checkpointRootsInitialPost() { 839 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 840 841 // If we force an overflow during remark, the remark operation will 842 // actually abort and we'll restart concurrent marking. If we always 843 // force an oveflow during remark we'll never actually complete the 844 // marking phase. So, we initilize this here, at the start of the 845 // cycle, so that at the remaining overflow number will decrease at 846 // every remark and we'll eventually not need to cause one. 847 force_overflow_stw()->init(); 848 849 // For each region note start of marking. 850 NoteStartOfMarkHRClosure startcl; 851 g1h->heap_region_iterate(&startcl); 852 853 // Start weak-reference discovery. 854 ReferenceProcessor* rp = g1h->ref_processor(); 855 rp->verify_no_references_recorded(); 856 rp->enable_discovery(); // enable ("weak") refs discovery 857 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 858 859 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 860 // This is the start of the marking cycle, we're expected all 861 // threads to have SATB queues with active set to false. 862 satb_mq_set.set_active_all_threads(true, /* new active value */ 863 false /* expected_active */); 864 865 // update_g1_committed() will be called at the end of an evac pause 866 // when marking is on. So, it's also called at the end of the 867 // initial-mark pause to update the heap end, if the heap expands 868 // during it. No need to call it here. 869 } 870 871 // Checkpoint the roots into this generation from outside 872 // this generation. [Note this initial checkpoint need only 873 // be approximate -- we'll do a catch up phase subsequently.] 874 void ConcurrentMark::checkpointRootsInitial() { 875 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 876 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 877 878 double start = os::elapsedTime(); 879 880 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 881 g1p->record_concurrent_mark_init_start(); 882 checkpointRootsInitialPre(); 883 884 // YSR: when concurrent precleaning is in place, we'll 885 // need to clear the cached card table here 886 887 ResourceMark rm; 888 HandleMark hm; 889 890 g1h->ensure_parsability(false); 891 g1h->perm_gen()->save_marks(); 892 893 CMMarkRootsClosure notOlder(this, g1h, false); 894 CMMarkRootsClosure older(this, g1h, true); 895 896 g1h->set_marking_started(); 897 g1h->rem_set()->prepare_for_younger_refs_iterate(false); 898 899 g1h->process_strong_roots(true, // activate StrongRootsScope 900 false, // fake perm gen collection 901 SharedHeap::SO_AllClasses, 902 ¬Older, // Regular roots 903 NULL, // do not visit active blobs 904 &older // Perm Gen Roots 905 ); 906 checkpointRootsInitialPost(); 907 908 // Statistics. 909 double end = os::elapsedTime(); 910 _init_times.add((end - start) * 1000.0); 911 912 g1p->record_concurrent_mark_init_end(); 913 } 914 915 /* 916 * Notice that in the next two methods, we actually leave the STS 917 * during the barrier sync and join it immediately afterwards. If we 918 * do not do this, the following deadlock can occur: one thread could 919 * be in the barrier sync code, waiting for the other thread to also 920 * sync up, whereas another one could be trying to yield, while also 921 * waiting for the other threads to sync up too. 922 * 923 * Note, however, that this code is also used during remark and in 924 * this case we should not attempt to leave / enter the STS, otherwise 925 * we'll either hit an asseert (debug / fastdebug) or deadlock 926 * (product). So we should only leave / enter the STS if we are 927 * operating concurrently. 928 * 929 * Because the thread that does the sync barrier has left the STS, it 930 * is possible to be suspended for a Full GC or an evacuation pause 931 * could occur. This is actually safe, since the entering the sync 932 * barrier is one of the last things do_marking_step() does, and it 933 * doesn't manipulate any data structures afterwards. 934 */ 935 936 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 937 if (verbose_low()) { 938 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 939 } 940 941 if (concurrent()) { 942 ConcurrentGCThread::stsLeave(); 943 } 944 _first_overflow_barrier_sync.enter(); 945 if (concurrent()) { 946 ConcurrentGCThread::stsJoin(); 947 } 948 // at this point everyone should have synced up and not be doing any 949 // more work 950 951 if (verbose_low()) { 952 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 953 } 954 955 // let task 0 do this 956 if (task_num == 0) { 957 // task 0 is responsible for clearing the global data structures 958 // We should be here because of an overflow. During STW we should 959 // not clear the overflow flag since we rely on it being true when 960 // we exit this method to abort the pause and restart concurent 961 // marking. 962 clear_marking_state(concurrent() /* clear_overflow */); 963 force_overflow()->update(); 964 965 if (PrintGC) { 966 gclog_or_tty->date_stamp(PrintGCDateStamps); 967 gclog_or_tty->stamp(PrintGCTimeStamps); 968 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 969 } 970 } 971 972 // after this, each task should reset its own data structures then 973 // then go into the second barrier 974 } 975 976 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 977 if (verbose_low()) { 978 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 979 } 980 981 if (concurrent()) { 982 ConcurrentGCThread::stsLeave(); 983 } 984 _second_overflow_barrier_sync.enter(); 985 if (concurrent()) { 986 ConcurrentGCThread::stsJoin(); 987 } 988 // at this point everything should be re-initialised and ready to go 989 990 if (verbose_low()) { 991 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 992 } 993 } 994 995 #ifndef PRODUCT 996 void ForceOverflowSettings::init() { 997 _num_remaining = G1ConcMarkForceOverflow; 998 _force = false; 999 update(); 1000 } 1001 1002 void ForceOverflowSettings::update() { 1003 if (_num_remaining > 0) { 1004 _num_remaining -= 1; 1005 _force = true; 1006 } else { 1007 _force = false; 1008 } 1009 } 1010 1011 bool ForceOverflowSettings::should_force() { 1012 if (_force) { 1013 _force = false; 1014 return true; 1015 } else { 1016 return false; 1017 } 1018 } 1019 #endif // !PRODUCT 1020 1021 void ConcurrentMark::grayRoot(oop p) { 1022 HeapWord* addr = (HeapWord*) p; 1023 // We can't really check against _heap_start and _heap_end, since it 1024 // is possible during an evacuation pause with piggy-backed 1025 // initial-mark that the committed space is expanded during the 1026 // pause without CM observing this change. So the assertions below 1027 // is a bit conservative; but better than nothing. 1028 assert(_g1h->g1_committed().contains(addr), 1029 "address should be within the heap bounds"); 1030 1031 if (!_nextMarkBitMap->isMarked(addr)) { 1032 _nextMarkBitMap->parMark(addr); 1033 } 1034 } 1035 1036 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 1037 // The objects on the region have already been marked "in bulk" by 1038 // the caller. We only need to decide whether to push the region on 1039 // the region stack or not. 1040 1041 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1042 // We're done with marking and waiting for remark. We do not need to 1043 // push anything else on the region stack. 1044 return; 1045 } 1046 1047 HeapWord* finger = _finger; 1048 1049 if (verbose_low()) { 1050 gclog_or_tty->print_cr("[global] attempting to push " 1051 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 1052 PTR_FORMAT, mr.start(), mr.end(), finger); 1053 } 1054 1055 if (mr.start() < finger) { 1056 // The finger is always heap region aligned and it is not possible 1057 // for mr to span heap regions. 1058 assert(mr.end() <= finger, "invariant"); 1059 1060 // Separated the asserts so that we know which one fires. 1061 assert(mr.start() <= mr.end(), 1062 "region boundaries should fall within the committed space"); 1063 assert(_heap_start <= mr.start(), 1064 "region boundaries should fall within the committed space"); 1065 assert(mr.end() <= _heap_end, 1066 "region boundaries should fall within the committed space"); 1067 if (verbose_low()) { 1068 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1069 "below the finger, pushing it", 1070 mr.start(), mr.end()); 1071 } 1072 1073 if (!region_stack_push_lock_free(mr)) { 1074 if (verbose_low()) { 1075 gclog_or_tty->print_cr("[global] region stack has overflown."); 1076 } 1077 } 1078 } 1079 } 1080 1081 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1082 // The object is not marked by the caller. We need to at least mark 1083 // it and maybe push in on the stack. 1084 1085 HeapWord* addr = (HeapWord*)p; 1086 if (!_nextMarkBitMap->isMarked(addr)) { 1087 // We definitely need to mark it, irrespective whether we bail out 1088 // because we're done with marking. 1089 if (_nextMarkBitMap->parMark(addr)) { 1090 if (!concurrent_marking_in_progress() || !_should_gray_objects) { 1091 // If we're done with concurrent marking and we're waiting for 1092 // remark, then we're not pushing anything on the stack. 1093 return; 1094 } 1095 1096 // No OrderAccess:store_load() is needed. It is implicit in the 1097 // CAS done in parMark(addr) above 1098 HeapWord* finger = _finger; 1099 1100 if (addr < finger) { 1101 if (!mark_stack_push(oop(addr))) { 1102 if (verbose_low()) { 1103 gclog_or_tty->print_cr("[global] global stack overflow " 1104 "during parMark"); 1105 } 1106 } 1107 } 1108 } 1109 } 1110 } 1111 1112 class CMConcurrentMarkingTask: public AbstractGangTask { 1113 private: 1114 ConcurrentMark* _cm; 1115 ConcurrentMarkThread* _cmt; 1116 1117 public: 1118 void work(int worker_i) { 1119 assert(Thread::current()->is_ConcurrentGC_thread(), 1120 "this should only be done by a conc GC thread"); 1121 ResourceMark rm; 1122 1123 double start_vtime = os::elapsedVTime(); 1124 1125 ConcurrentGCThread::stsJoin(); 1126 1127 assert((size_t) worker_i < _cm->active_tasks(), "invariant"); 1128 CMTask* the_task = _cm->task(worker_i); 1129 the_task->record_start_time(); 1130 if (!_cm->has_aborted()) { 1131 do { 1132 double start_vtime_sec = os::elapsedVTime(); 1133 double start_time_sec = os::elapsedTime(); 1134 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1135 1136 the_task->do_marking_step(mark_step_duration_ms, 1137 true /* do_stealing */, 1138 true /* do_termination */); 1139 1140 double end_time_sec = os::elapsedTime(); 1141 double end_vtime_sec = os::elapsedVTime(); 1142 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1143 double elapsed_time_sec = end_time_sec - start_time_sec; 1144 _cm->clear_has_overflown(); 1145 1146 bool ret = _cm->do_yield_check(worker_i); 1147 1148 jlong sleep_time_ms; 1149 if (!_cm->has_aborted() && the_task->has_aborted()) { 1150 sleep_time_ms = 1151 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1152 ConcurrentGCThread::stsLeave(); 1153 os::sleep(Thread::current(), sleep_time_ms, false); 1154 ConcurrentGCThread::stsJoin(); 1155 } 1156 double end_time2_sec = os::elapsedTime(); 1157 double elapsed_time2_sec = end_time2_sec - start_time_sec; 1158 1159 #if 0 1160 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1161 "overhead %1.4lf", 1162 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1163 the_task->conc_overhead(os::elapsedTime()) * 8.0); 1164 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1165 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1166 #endif 1167 } while (!_cm->has_aborted() && the_task->has_aborted()); 1168 } 1169 the_task->record_end_time(); 1170 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1171 1172 ConcurrentGCThread::stsLeave(); 1173 1174 double end_vtime = os::elapsedVTime(); 1175 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1176 } 1177 1178 CMConcurrentMarkingTask(ConcurrentMark* cm, 1179 ConcurrentMarkThread* cmt) : 1180 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1181 1182 ~CMConcurrentMarkingTask() { } 1183 }; 1184 1185 void ConcurrentMark::markFromRoots() { 1186 // we might be tempted to assert that: 1187 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1188 // "inconsistent argument?"); 1189 // However that wouldn't be right, because it's possible that 1190 // a safepoint is indeed in progress as a younger generation 1191 // stop-the-world GC happens even as we mark in this generation. 1192 1193 _restart_for_overflow = false; 1194 1195 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1196 force_overflow_conc()->init(); 1197 set_phase(active_workers, true /* concurrent */); 1198 1199 CMConcurrentMarkingTask markingTask(this, cmThread()); 1200 if (parallel_marking_threads() > 0) { 1201 _parallel_workers->run_task(&markingTask); 1202 } else { 1203 markingTask.work(0); 1204 } 1205 print_stats(); 1206 } 1207 1208 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1209 // world is stopped at this checkpoint 1210 assert(SafepointSynchronize::is_at_safepoint(), 1211 "world should be stopped"); 1212 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1213 1214 // If a full collection has happened, we shouldn't do this. 1215 if (has_aborted()) { 1216 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1217 return; 1218 } 1219 1220 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1221 1222 if (VerifyDuringGC) { 1223 HandleMark hm; // handle scope 1224 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1225 Universe::heap()->prepare_for_verify(); 1226 Universe::verify(/* allow dirty */ true, 1227 /* silent */ false, 1228 /* option */ VerifyOption_G1UsePrevMarking); 1229 } 1230 1231 G1CollectorPolicy* g1p = g1h->g1_policy(); 1232 g1p->record_concurrent_mark_remark_start(); 1233 1234 double start = os::elapsedTime(); 1235 1236 checkpointRootsFinalWork(); 1237 1238 double mark_work_end = os::elapsedTime(); 1239 1240 weakRefsWork(clear_all_soft_refs); 1241 1242 if (has_overflown()) { 1243 // Oops. We overflowed. Restart concurrent marking. 1244 _restart_for_overflow = true; 1245 // Clear the flag. We do not need it any more. 1246 clear_has_overflown(); 1247 if (G1TraceMarkStackOverflow) { 1248 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1249 } 1250 } else { 1251 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1252 // We're done with marking. 1253 // This is the end of the marking cycle, we're expected all 1254 // threads to have SATB queues with active set to true. 1255 satb_mq_set.set_active_all_threads(false, /* new active value */ 1256 true /* expected_active */); 1257 1258 if (VerifyDuringGC) { 1259 HandleMark hm; // handle scope 1260 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1261 Universe::heap()->prepare_for_verify(); 1262 Universe::verify(/* allow dirty */ true, 1263 /* silent */ false, 1264 /* option */ VerifyOption_G1UseNextMarking); 1265 } 1266 assert(!restart_for_overflow(), "sanity"); 1267 } 1268 1269 // Reset the marking state if marking completed 1270 if (!restart_for_overflow()) { 1271 set_non_marking_state(); 1272 } 1273 1274 #if VERIFY_OBJS_PROCESSED 1275 _scan_obj_cl.objs_processed = 0; 1276 ThreadLocalObjQueue::objs_enqueued = 0; 1277 #endif 1278 1279 // Statistics 1280 double now = os::elapsedTime(); 1281 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1282 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1283 _remark_times.add((now - start) * 1000.0); 1284 1285 g1p->record_concurrent_mark_remark_end(); 1286 } 1287 1288 #define CARD_BM_TEST_MODE 0 1289 1290 class CalcLiveObjectsClosure: public HeapRegionClosure { 1291 1292 CMBitMapRO* _bm; 1293 ConcurrentMark* _cm; 1294 bool _changed; 1295 bool _yield; 1296 size_t _words_done; 1297 size_t _tot_live; 1298 size_t _tot_used; 1299 size_t _regions_done; 1300 double _start_vtime_sec; 1301 1302 BitMap* _region_bm; 1303 BitMap* _card_bm; 1304 intptr_t _bottom_card_num; 1305 bool _final; 1306 1307 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1308 for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1309 #if CARD_BM_TEST_MODE 1310 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); 1311 #else 1312 _card_bm->par_at_put(i - _bottom_card_num, 1); 1313 #endif 1314 } 1315 } 1316 1317 public: 1318 CalcLiveObjectsClosure(bool final, 1319 CMBitMapRO *bm, ConcurrentMark *cm, 1320 BitMap* region_bm, BitMap* card_bm) : 1321 _bm(bm), _cm(cm), _changed(false), _yield(true), 1322 _words_done(0), _tot_live(0), _tot_used(0), 1323 _region_bm(region_bm), _card_bm(card_bm),_final(final), 1324 _regions_done(0), _start_vtime_sec(0.0) 1325 { 1326 _bottom_card_num = 1327 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1328 CardTableModRefBS::card_shift); 1329 } 1330 1331 // It takes a region that's not empty (i.e., it has at least one 1332 // live object in it and sets its corresponding bit on the region 1333 // bitmap to 1. If the region is "starts humongous" it will also set 1334 // to 1 the bits on the region bitmap that correspond to its 1335 // associated "continues humongous" regions. 1336 void set_bit_for_region(HeapRegion* hr) { 1337 assert(!hr->continuesHumongous(), "should have filtered those out"); 1338 1339 size_t index = hr->hrs_index(); 1340 if (!hr->startsHumongous()) { 1341 // Normal (non-humongous) case: just set the bit. 1342 _region_bm->par_at_put((BitMap::idx_t) index, true); 1343 } else { 1344 // Starts humongous case: calculate how many regions are part of 1345 // this humongous region and then set the bit range. It might 1346 // have been a bit more efficient to look at the object that 1347 // spans these humongous regions to calculate their number from 1348 // the object's size. However, it's a good idea to calculate 1349 // this based on the metadata itself, and not the region 1350 // contents, so that this code is not aware of what goes into 1351 // the humongous regions (in case this changes in the future). 1352 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1353 size_t end_index = index + 1; 1354 while (end_index < g1h->n_regions()) { 1355 HeapRegion* chr = g1h->region_at(end_index); 1356 if (!chr->continuesHumongous()) break; 1357 end_index += 1; 1358 } 1359 _region_bm->par_at_put_range((BitMap::idx_t) index, 1360 (BitMap::idx_t) end_index, true); 1361 } 1362 } 1363 1364 bool doHeapRegion(HeapRegion* hr) { 1365 if (!_final && _regions_done == 0) { 1366 _start_vtime_sec = os::elapsedVTime(); 1367 } 1368 1369 if (hr->continuesHumongous()) { 1370 // We will ignore these here and process them when their 1371 // associated "starts humongous" region is processed (see 1372 // set_bit_for_heap_region()). Note that we cannot rely on their 1373 // associated "starts humongous" region to have their bit set to 1374 // 1 since, due to the region chunking in the parallel region 1375 // iteration, a "continues humongous" region might be visited 1376 // before its associated "starts humongous". 1377 return false; 1378 } 1379 1380 HeapWord* nextTop = hr->next_top_at_mark_start(); 1381 HeapWord* start = hr->top_at_conc_mark_count(); 1382 assert(hr->bottom() <= start && start <= hr->end() && 1383 hr->bottom() <= nextTop && nextTop <= hr->end() && 1384 start <= nextTop, 1385 "Preconditions."); 1386 // Otherwise, record the number of word's we'll examine. 1387 size_t words_done = (nextTop - start); 1388 // Find the first marked object at or after "start". 1389 start = _bm->getNextMarkedWordAddress(start, nextTop); 1390 size_t marked_bytes = 0; 1391 1392 // Below, the term "card num" means the result of shifting an address 1393 // by the card shift -- address 0 corresponds to card number 0. One 1394 // must subtract the card num of the bottom of the heap to obtain a 1395 // card table index. 1396 // The first card num of the sequence of live cards currently being 1397 // constructed. -1 ==> no sequence. 1398 intptr_t start_card_num = -1; 1399 // The last card num of the sequence of live cards currently being 1400 // constructed. -1 ==> no sequence. 1401 intptr_t last_card_num = -1; 1402 1403 while (start < nextTop) { 1404 if (_yield && _cm->do_yield_check()) { 1405 // We yielded. It might be for a full collection, in which case 1406 // all bets are off; terminate the traversal. 1407 if (_cm->has_aborted()) { 1408 _changed = false; 1409 return true; 1410 } else { 1411 // Otherwise, it might be a collection pause, and the region 1412 // we're looking at might be in the collection set. We'll 1413 // abandon this region. 1414 return false; 1415 } 1416 } 1417 oop obj = oop(start); 1418 int obj_sz = obj->size(); 1419 // The card num of the start of the current object. 1420 intptr_t obj_card_num = 1421 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1422 1423 HeapWord* obj_last = start + obj_sz - 1; 1424 intptr_t obj_last_card_num = 1425 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1426 1427 if (obj_card_num != last_card_num) { 1428 if (start_card_num == -1) { 1429 assert(last_card_num == -1, "Both or neither."); 1430 start_card_num = obj_card_num; 1431 } else { 1432 assert(last_card_num != -1, "Both or neither."); 1433 assert(obj_card_num >= last_card_num, "Inv"); 1434 if ((obj_card_num - last_card_num) > 1) { 1435 // Mark the last run, and start a new one. 1436 mark_card_num_range(start_card_num, last_card_num); 1437 start_card_num = obj_card_num; 1438 } 1439 } 1440 #if CARD_BM_TEST_MODE 1441 /* 1442 gclog_or_tty->print_cr("Setting bits from %d/%d.", 1443 obj_card_num - _bottom_card_num, 1444 obj_last_card_num - _bottom_card_num); 1445 */ 1446 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1447 _card_bm->par_at_put(j - _bottom_card_num, 1); 1448 } 1449 #endif 1450 } 1451 // In any case, we set the last card num. 1452 last_card_num = obj_last_card_num; 1453 1454 marked_bytes += (size_t)obj_sz * HeapWordSize; 1455 // Find the next marked object after this one. 1456 start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1457 _changed = true; 1458 } 1459 // Handle the last range, if any. 1460 if (start_card_num != -1) { 1461 mark_card_num_range(start_card_num, last_card_num); 1462 } 1463 if (_final) { 1464 // Mark the allocated-since-marking portion... 1465 HeapWord* tp = hr->top(); 1466 if (nextTop < tp) { 1467 start_card_num = 1468 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1469 last_card_num = 1470 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1471 mark_card_num_range(start_card_num, last_card_num); 1472 // This definitely means the region has live objects. 1473 set_bit_for_region(hr); 1474 } 1475 } 1476 1477 hr->add_to_marked_bytes(marked_bytes); 1478 // Update the live region bitmap. 1479 if (marked_bytes > 0) { 1480 set_bit_for_region(hr); 1481 } 1482 hr->set_top_at_conc_mark_count(nextTop); 1483 _tot_live += hr->next_live_bytes(); 1484 _tot_used += hr->used(); 1485 _words_done = words_done; 1486 1487 if (!_final) { 1488 ++_regions_done; 1489 if (_regions_done % 10 == 0) { 1490 double end_vtime_sec = os::elapsedVTime(); 1491 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1492 if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1493 jlong sleep_time_ms = 1494 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1495 os::sleep(Thread::current(), sleep_time_ms, false); 1496 _start_vtime_sec = end_vtime_sec; 1497 } 1498 } 1499 } 1500 1501 return false; 1502 } 1503 1504 bool changed() { return _changed; } 1505 void reset() { _changed = false; _words_done = 0; } 1506 void no_yield() { _yield = false; } 1507 size_t words_done() { return _words_done; } 1508 size_t tot_live() { return _tot_live; } 1509 size_t tot_used() { return _tot_used; } 1510 }; 1511 1512 1513 void ConcurrentMark::calcDesiredRegions() { 1514 _region_bm.clear(); 1515 _card_bm.clear(); 1516 CalcLiveObjectsClosure calccl(false /*final*/, 1517 nextMarkBitMap(), this, 1518 &_region_bm, &_card_bm); 1519 G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1520 g1h->heap_region_iterate(&calccl); 1521 1522 do { 1523 calccl.reset(); 1524 g1h->heap_region_iterate(&calccl); 1525 } while (calccl.changed()); 1526 } 1527 1528 class G1ParFinalCountTask: public AbstractGangTask { 1529 protected: 1530 G1CollectedHeap* _g1h; 1531 CMBitMap* _bm; 1532 size_t _n_workers; 1533 size_t *_live_bytes; 1534 size_t *_used_bytes; 1535 BitMap* _region_bm; 1536 BitMap* _card_bm; 1537 public: 1538 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1539 BitMap* region_bm, BitMap* card_bm) 1540 : AbstractGangTask("G1 final counting"), _g1h(g1h), 1541 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) { 1542 if (ParallelGCThreads > 0) { 1543 _n_workers = _g1h->workers()->total_workers(); 1544 } else { 1545 _n_workers = 1; 1546 } 1547 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1548 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1549 } 1550 1551 ~G1ParFinalCountTask() { 1552 FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1553 FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1554 } 1555 1556 void work(int i) { 1557 CalcLiveObjectsClosure calccl(true /*final*/, 1558 _bm, _g1h->concurrent_mark(), 1559 _region_bm, _card_bm); 1560 calccl.no_yield(); 1561 if (G1CollectedHeap::use_parallel_gc_threads()) { 1562 _g1h->heap_region_par_iterate_chunked(&calccl, i, 1563 HeapRegion::FinalCountClaimValue); 1564 } else { 1565 _g1h->heap_region_iterate(&calccl); 1566 } 1567 assert(calccl.complete(), "Shouldn't have yielded!"); 1568 1569 assert((size_t) i < _n_workers, "invariant"); 1570 _live_bytes[i] = calccl.tot_live(); 1571 _used_bytes[i] = calccl.tot_used(); 1572 } 1573 size_t live_bytes() { 1574 size_t live_bytes = 0; 1575 for (size_t i = 0; i < _n_workers; ++i) 1576 live_bytes += _live_bytes[i]; 1577 return live_bytes; 1578 } 1579 size_t used_bytes() { 1580 size_t used_bytes = 0; 1581 for (size_t i = 0; i < _n_workers; ++i) 1582 used_bytes += _used_bytes[i]; 1583 return used_bytes; 1584 } 1585 }; 1586 1587 class G1ParNoteEndTask; 1588 1589 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1590 G1CollectedHeap* _g1; 1591 int _worker_num; 1592 size_t _max_live_bytes; 1593 size_t _regions_claimed; 1594 size_t _freed_bytes; 1595 FreeRegionList* _local_cleanup_list; 1596 HumongousRegionSet* _humongous_proxy_set; 1597 HRRSCleanupTask* _hrrs_cleanup_task; 1598 double _claimed_region_time; 1599 double _max_region_time; 1600 1601 public: 1602 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1603 int worker_num, 1604 FreeRegionList* local_cleanup_list, 1605 HumongousRegionSet* humongous_proxy_set, 1606 HRRSCleanupTask* hrrs_cleanup_task); 1607 size_t freed_bytes() { return _freed_bytes; } 1608 1609 bool doHeapRegion(HeapRegion *r); 1610 1611 size_t max_live_bytes() { return _max_live_bytes; } 1612 size_t regions_claimed() { return _regions_claimed; } 1613 double claimed_region_time_sec() { return _claimed_region_time; } 1614 double max_region_time_sec() { return _max_region_time; } 1615 }; 1616 1617 class G1ParNoteEndTask: public AbstractGangTask { 1618 friend class G1NoteEndOfConcMarkClosure; 1619 1620 protected: 1621 G1CollectedHeap* _g1h; 1622 size_t _max_live_bytes; 1623 size_t _freed_bytes; 1624 FreeRegionList* _cleanup_list; 1625 1626 public: 1627 G1ParNoteEndTask(G1CollectedHeap* g1h, 1628 FreeRegionList* cleanup_list) : 1629 AbstractGangTask("G1 note end"), _g1h(g1h), 1630 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1631 1632 void work(int i) { 1633 double start = os::elapsedTime(); 1634 FreeRegionList local_cleanup_list("Local Cleanup List"); 1635 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1636 HRRSCleanupTask hrrs_cleanup_task; 1637 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list, 1638 &humongous_proxy_set, 1639 &hrrs_cleanup_task); 1640 if (G1CollectedHeap::use_parallel_gc_threads()) { 1641 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 1642 HeapRegion::NoteEndClaimValue); 1643 } else { 1644 _g1h->heap_region_iterate(&g1_note_end); 1645 } 1646 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1647 1648 // Now update the lists 1649 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1650 NULL /* free_list */, 1651 &humongous_proxy_set, 1652 true /* par */); 1653 { 1654 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1655 _max_live_bytes += g1_note_end.max_live_bytes(); 1656 _freed_bytes += g1_note_end.freed_bytes(); 1657 1658 // If we iterate over the global cleanup list at the end of 1659 // cleanup to do this printing we will not guarantee to only 1660 // generate output for the newly-reclaimed regions (the list 1661 // might not be empty at the beginning of cleanup; we might 1662 // still be working on its previous contents). So we do the 1663 // printing here, before we append the new regions to the global 1664 // cleanup list. 1665 1666 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1667 if (hr_printer->is_active()) { 1668 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1669 while (iter.more_available()) { 1670 HeapRegion* hr = iter.get_next(); 1671 hr_printer->cleanup(hr); 1672 } 1673 } 1674 1675 _cleanup_list->add_as_tail(&local_cleanup_list); 1676 assert(local_cleanup_list.is_empty(), "post-condition"); 1677 1678 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1679 } 1680 double end = os::elapsedTime(); 1681 if (G1PrintParCleanupStats) { 1682 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1683 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1684 i, start, end, (end-start)*1000.0, 1685 g1_note_end.regions_claimed(), 1686 g1_note_end.claimed_region_time_sec()*1000.0, 1687 g1_note_end.max_region_time_sec()*1000.0); 1688 } 1689 } 1690 size_t max_live_bytes() { return _max_live_bytes; } 1691 size_t freed_bytes() { return _freed_bytes; } 1692 }; 1693 1694 class G1ParScrubRemSetTask: public AbstractGangTask { 1695 protected: 1696 G1RemSet* _g1rs; 1697 BitMap* _region_bm; 1698 BitMap* _card_bm; 1699 public: 1700 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1701 BitMap* region_bm, BitMap* card_bm) : 1702 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1703 _region_bm(region_bm), _card_bm(card_bm) 1704 {} 1705 1706 void work(int i) { 1707 if (G1CollectedHeap::use_parallel_gc_threads()) { 1708 _g1rs->scrub_par(_region_bm, _card_bm, i, 1709 HeapRegion::ScrubRemSetClaimValue); 1710 } else { 1711 _g1rs->scrub(_region_bm, _card_bm); 1712 } 1713 } 1714 1715 }; 1716 1717 G1NoteEndOfConcMarkClosure:: 1718 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1719 int worker_num, 1720 FreeRegionList* local_cleanup_list, 1721 HumongousRegionSet* humongous_proxy_set, 1722 HRRSCleanupTask* hrrs_cleanup_task) 1723 : _g1(g1), _worker_num(worker_num), 1724 _max_live_bytes(0), _regions_claimed(0), 1725 _freed_bytes(0), 1726 _claimed_region_time(0.0), _max_region_time(0.0), 1727 _local_cleanup_list(local_cleanup_list), 1728 _humongous_proxy_set(humongous_proxy_set), 1729 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1730 1731 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { 1732 // We use a claim value of zero here because all regions 1733 // were claimed with value 1 in the FinalCount task. 1734 hr->reset_gc_time_stamp(); 1735 if (!hr->continuesHumongous()) { 1736 double start = os::elapsedTime(); 1737 _regions_claimed++; 1738 hr->note_end_of_marking(); 1739 _max_live_bytes += hr->max_live_bytes(); 1740 _g1->free_region_if_empty(hr, 1741 &_freed_bytes, 1742 _local_cleanup_list, 1743 _humongous_proxy_set, 1744 _hrrs_cleanup_task, 1745 true /* par */); 1746 double region_time = (os::elapsedTime() - start); 1747 _claimed_region_time += region_time; 1748 if (region_time > _max_region_time) { 1749 _max_region_time = region_time; 1750 } 1751 } 1752 return false; 1753 } 1754 1755 void ConcurrentMark::cleanup() { 1756 // world is stopped at this checkpoint 1757 assert(SafepointSynchronize::is_at_safepoint(), 1758 "world should be stopped"); 1759 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1760 1761 // If a full collection has happened, we shouldn't do this. 1762 if (has_aborted()) { 1763 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1764 return; 1765 } 1766 1767 g1h->verify_region_sets_optional(); 1768 1769 if (VerifyDuringGC) { 1770 HandleMark hm; // handle scope 1771 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1772 Universe::heap()->prepare_for_verify(); 1773 Universe::verify(/* allow dirty */ true, 1774 /* silent */ false, 1775 /* option */ VerifyOption_G1UsePrevMarking); 1776 } 1777 1778 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1779 g1p->record_concurrent_mark_cleanup_start(); 1780 1781 double start = os::elapsedTime(); 1782 1783 HeapRegionRemSet::reset_for_cleanup_tasks(); 1784 1785 // Do counting once more with the world stopped for good measure. 1786 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1787 &_region_bm, &_card_bm); 1788 if (G1CollectedHeap::use_parallel_gc_threads()) { 1789 assert(g1h->check_heap_region_claim_values( 1790 HeapRegion::InitialClaimValue), 1791 "sanity check"); 1792 1793 int n_workers = g1h->workers()->total_workers(); 1794 g1h->set_par_threads(n_workers); 1795 g1h->workers()->run_task(&g1_par_count_task); 1796 g1h->set_par_threads(0); 1797 1798 assert(g1h->check_heap_region_claim_values( 1799 HeapRegion::FinalCountClaimValue), 1800 "sanity check"); 1801 } else { 1802 g1_par_count_task.work(0); 1803 } 1804 1805 size_t known_garbage_bytes = 1806 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1807 #if 0 1808 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1809 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1810 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1811 (double) known_garbage_bytes / (double) (1024 * 1024)); 1812 #endif // 0 1813 g1p->set_known_garbage_bytes(known_garbage_bytes); 1814 1815 size_t start_used_bytes = g1h->used(); 1816 _at_least_one_mark_complete = true; 1817 g1h->set_marking_complete(); 1818 1819 double count_end = os::elapsedTime(); 1820 double this_final_counting_time = (count_end - start); 1821 if (G1PrintParCleanupStats) { 1822 gclog_or_tty->print_cr("Cleanup:"); 1823 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1824 this_final_counting_time*1000.0); 1825 } 1826 _total_counting_time += this_final_counting_time; 1827 1828 if (G1PrintRegionLivenessInfo) { 1829 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1830 _g1h->heap_region_iterate(&cl); 1831 } 1832 1833 // Install newly created mark bitMap as "prev". 1834 swapMarkBitMaps(); 1835 1836 g1h->reset_gc_time_stamp(); 1837 1838 // Note end of marking in all heap regions. 1839 double note_end_start = os::elapsedTime(); 1840 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1841 if (G1CollectedHeap::use_parallel_gc_threads()) { 1842 int n_workers = g1h->workers()->total_workers(); 1843 g1h->set_par_threads(n_workers); 1844 g1h->workers()->run_task(&g1_par_note_end_task); 1845 g1h->set_par_threads(0); 1846 1847 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1848 "sanity check"); 1849 } else { 1850 g1_par_note_end_task.work(0); 1851 } 1852 1853 if (!cleanup_list_is_empty()) { 1854 // The cleanup list is not empty, so we'll have to process it 1855 // concurrently. Notify anyone else that might be wanting free 1856 // regions that there will be more free regions coming soon. 1857 g1h->set_free_regions_coming(); 1858 } 1859 double note_end_end = os::elapsedTime(); 1860 if (G1PrintParCleanupStats) { 1861 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1862 (note_end_end - note_end_start)*1000.0); 1863 } 1864 1865 1866 // call below, since it affects the metric by which we sort the heap 1867 // regions. 1868 if (G1ScrubRemSets) { 1869 double rs_scrub_start = os::elapsedTime(); 1870 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1871 if (G1CollectedHeap::use_parallel_gc_threads()) { 1872 int n_workers = g1h->workers()->total_workers(); 1873 g1h->set_par_threads(n_workers); 1874 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1875 g1h->set_par_threads(0); 1876 1877 assert(g1h->check_heap_region_claim_values( 1878 HeapRegion::ScrubRemSetClaimValue), 1879 "sanity check"); 1880 } else { 1881 g1_par_scrub_rs_task.work(0); 1882 } 1883 1884 double rs_scrub_end = os::elapsedTime(); 1885 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1886 _total_rs_scrub_time += this_rs_scrub_time; 1887 } 1888 1889 // this will also free any regions totally full of garbage objects, 1890 // and sort the regions. 1891 g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1892 g1_par_note_end_task.freed_bytes(), 1893 g1_par_note_end_task.max_live_bytes()); 1894 1895 // Statistics. 1896 double end = os::elapsedTime(); 1897 _cleanup_times.add((end - start) * 1000.0); 1898 1899 // G1CollectedHeap::heap()->print(); 1900 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1901 // G1CollectedHeap::heap()->get_gc_time_stamp()); 1902 1903 if (PrintGC || PrintGCDetails) { 1904 g1h->print_size_transition(gclog_or_tty, 1905 start_used_bytes, 1906 g1h->used(), 1907 g1h->capacity()); 1908 } 1909 1910 size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1911 g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1912 1913 // We need to make this be a "collection" so any collection pause that 1914 // races with it goes around and waits for completeCleanup to finish. 1915 g1h->increment_total_collections(); 1916 1917 if (VerifyDuringGC) { 1918 HandleMark hm; // handle scope 1919 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1920 Universe::heap()->prepare_for_verify(); 1921 Universe::verify(/* allow dirty */ true, 1922 /* silent */ false, 1923 /* option */ VerifyOption_G1UsePrevMarking); 1924 } 1925 1926 g1h->verify_region_sets_optional(); 1927 } 1928 1929 void ConcurrentMark::completeCleanup() { 1930 if (has_aborted()) return; 1931 1932 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1933 1934 _cleanup_list.verify_optional(); 1935 FreeRegionList tmp_free_list("Tmp Free List"); 1936 1937 if (G1ConcRegionFreeingVerbose) { 1938 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1939 "cleanup list has "SIZE_FORMAT" entries", 1940 _cleanup_list.length()); 1941 } 1942 1943 // Noone else should be accessing the _cleanup_list at this point, 1944 // so it's not necessary to take any locks 1945 while (!_cleanup_list.is_empty()) { 1946 HeapRegion* hr = _cleanup_list.remove_head(); 1947 assert(hr != NULL, "the list was not empty"); 1948 hr->par_clear(); 1949 tmp_free_list.add_as_tail(hr); 1950 1951 // Instead of adding one region at a time to the secondary_free_list, 1952 // we accumulate them in the local list and move them a few at a 1953 // time. This also cuts down on the number of notify_all() calls 1954 // we do during this process. We'll also append the local list when 1955 // _cleanup_list is empty (which means we just removed the last 1956 // region from the _cleanup_list). 1957 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1958 _cleanup_list.is_empty()) { 1959 if (G1ConcRegionFreeingVerbose) { 1960 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1961 "appending "SIZE_FORMAT" entries to the " 1962 "secondary_free_list, clean list still has " 1963 SIZE_FORMAT" entries", 1964 tmp_free_list.length(), 1965 _cleanup_list.length()); 1966 } 1967 1968 { 1969 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1970 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 1971 SecondaryFreeList_lock->notify_all(); 1972 } 1973 1974 if (G1StressConcRegionFreeing) { 1975 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1976 os::sleep(Thread::current(), (jlong) 1, false); 1977 } 1978 } 1979 } 1980 } 1981 assert(tmp_free_list.is_empty(), "post-condition"); 1982 } 1983 1984 // Support closures for reference procssing in G1 1985 1986 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1987 HeapWord* addr = (HeapWord*)obj; 1988 return addr != NULL && 1989 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1990 } 1991 1992 class G1CMKeepAliveClosure: public OopClosure { 1993 G1CollectedHeap* _g1; 1994 ConcurrentMark* _cm; 1995 CMBitMap* _bitMap; 1996 public: 1997 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1998 CMBitMap* bitMap) : 1999 _g1(g1), _cm(cm), 2000 _bitMap(bitMap) {} 2001 2002 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2003 virtual void do_oop( oop* p) { do_oop_work(p); } 2004 2005 template <class T> void do_oop_work(T* p) { 2006 oop obj = oopDesc::load_decode_heap_oop(p); 2007 HeapWord* addr = (HeapWord*)obj; 2008 2009 if (_cm->verbose_high()) { 2010 gclog_or_tty->print_cr("\t[0] we're looking at location " 2011 "*"PTR_FORMAT" = "PTR_FORMAT, 2012 p, (void*) obj); 2013 } 2014 2015 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2016 _bitMap->mark(addr); 2017 _cm->mark_stack_push(obj); 2018 } 2019 } 2020 }; 2021 2022 class G1CMDrainMarkingStackClosure: public VoidClosure { 2023 CMMarkStack* _markStack; 2024 CMBitMap* _bitMap; 2025 G1CMKeepAliveClosure* _oopClosure; 2026 public: 2027 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 2028 G1CMKeepAliveClosure* oopClosure) : 2029 _bitMap(bitMap), 2030 _markStack(markStack), 2031 _oopClosure(oopClosure) 2032 {} 2033 2034 void do_void() { 2035 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 2036 } 2037 }; 2038 2039 // 'Keep Alive' closure used by parallel reference processing. 2040 // An instance of this closure is used in the parallel reference processing 2041 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2042 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2043 // placed on to discovered ref lists once so we can mark and push with no 2044 // need to check whether the object has already been marked. Using the 2045 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2046 // operating on the global mark stack. This means that an individual 2047 // worker would be doing lock-free pushes while it processes its own 2048 // discovered ref list followed by drain call. If the discovered ref lists 2049 // are unbalanced then this could cause interference with the other 2050 // workers. Using a CMTask (and its embedded local data structures) 2051 // avoids that potential interference. 2052 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2053 ConcurrentMark* _cm; 2054 CMTask* _task; 2055 CMBitMap* _bitMap; 2056 int _ref_counter_limit; 2057 int _ref_counter; 2058 public: 2059 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 2060 CMTask* task, 2061 CMBitMap* bitMap) : 2062 _cm(cm), _task(task), _bitMap(bitMap), 2063 _ref_counter_limit(G1RefProcDrainInterval) 2064 { 2065 assert(_ref_counter_limit > 0, "sanity"); 2066 _ref_counter = _ref_counter_limit; 2067 } 2068 2069 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2070 virtual void do_oop( oop* p) { do_oop_work(p); } 2071 2072 template <class T> void do_oop_work(T* p) { 2073 if (!_cm->has_overflown()) { 2074 oop obj = oopDesc::load_decode_heap_oop(p); 2075 if (_cm->verbose_high()) { 2076 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2077 "*"PTR_FORMAT" = "PTR_FORMAT, 2078 _task->task_id(), p, (void*) obj); 2079 } 2080 2081 _task->deal_with_reference(obj); 2082 _ref_counter--; 2083 2084 if (_ref_counter == 0) { 2085 // We have dealt with _ref_counter_limit references, pushing them and objects 2086 // reachable from them on to the local stack (and possibly the global stack). 2087 // Call do_marking_step() to process these entries. We call the routine in a 2088 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2089 // with the entries that we've pushed as a result of the deal_with_reference 2090 // calls above) or we overflow. 2091 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2092 // while there may still be some work to do. (See the comment at the 2093 // beginning of CMTask::do_marking_step() for those conditions - one of which 2094 // is reaching the specified time target.) It is only when 2095 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2096 // that the marking has completed. 2097 do { 2098 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2099 _task->do_marking_step(mark_step_duration_ms, 2100 false /* do_stealing */, 2101 false /* do_termination */); 2102 } while (_task->has_aborted() && !_cm->has_overflown()); 2103 _ref_counter = _ref_counter_limit; 2104 } 2105 } else { 2106 if (_cm->verbose_high()) { 2107 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2108 } 2109 } 2110 } 2111 }; 2112 2113 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2114 ConcurrentMark* _cm; 2115 CMTask* _task; 2116 public: 2117 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2118 _cm(cm), _task(task) 2119 {} 2120 2121 void do_void() { 2122 do { 2123 if (_cm->verbose_high()) { 2124 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2125 _task->task_id()); 2126 } 2127 2128 // We call CMTask::do_marking_step() to completely drain the local and 2129 // global marking stacks. The routine is called in a loop, which we'll 2130 // exit if there's nothing more to do (i.e. we'completely drained the 2131 // entries that were pushed as a result of applying the 2132 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2133 // lists above) or we overflow the global marking stack. 2134 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2135 // while there may still be some work to do. (See the comment at the 2136 // beginning of CMTask::do_marking_step() for those conditions - one of which 2137 // is reaching the specified time target.) It is only when 2138 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2139 // that the marking has completed. 2140 2141 _task->do_marking_step(1000000000.0 /* something very large */, 2142 true /* do_stealing */, 2143 true /* do_termination */); 2144 } while (_task->has_aborted() && !_cm->has_overflown()); 2145 } 2146 }; 2147 2148 // Implementation of AbstractRefProcTaskExecutor for G1 2149 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2150 private: 2151 G1CollectedHeap* _g1h; 2152 ConcurrentMark* _cm; 2153 CMBitMap* _bitmap; 2154 WorkGang* _workers; 2155 int _active_workers; 2156 2157 public: 2158 G1RefProcTaskExecutor(G1CollectedHeap* g1h, 2159 ConcurrentMark* cm, 2160 CMBitMap* bitmap, 2161 WorkGang* workers, 2162 int n_workers) : 2163 _g1h(g1h), _cm(cm), _bitmap(bitmap), 2164 _workers(workers), _active_workers(n_workers) 2165 { } 2166 2167 // Executes the given task using concurrent marking worker threads. 2168 virtual void execute(ProcessTask& task); 2169 virtual void execute(EnqueueTask& task); 2170 }; 2171 2172 class G1RefProcTaskProxy: public AbstractGangTask { 2173 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2174 ProcessTask& _proc_task; 2175 G1CollectedHeap* _g1h; 2176 ConcurrentMark* _cm; 2177 CMBitMap* _bitmap; 2178 2179 public: 2180 G1RefProcTaskProxy(ProcessTask& proc_task, 2181 G1CollectedHeap* g1h, 2182 ConcurrentMark* cm, 2183 CMBitMap* bitmap) : 2184 AbstractGangTask("Process reference objects in parallel"), 2185 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 2186 {} 2187 2188 virtual void work(int i) { 2189 CMTask* marking_task = _cm->task(i); 2190 G1CMIsAliveClosure g1_is_alive(_g1h); 2191 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 2192 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2193 2194 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2195 } 2196 }; 2197 2198 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) { 2199 assert(_workers != NULL, "Need parallel worker threads."); 2200 2201 G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 2202 2203 // We need to reset the phase for each task execution so that 2204 // the termination protocol of CMTask::do_marking_step works. 2205 _cm->set_phase(_active_workers, false /* concurrent */); 2206 _g1h->set_par_threads(_active_workers); 2207 _workers->run_task(&proc_task_proxy); 2208 _g1h->set_par_threads(0); 2209 } 2210 2211 class G1RefEnqueueTaskProxy: public AbstractGangTask { 2212 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2213 EnqueueTask& _enq_task; 2214 2215 public: 2216 G1RefEnqueueTaskProxy(EnqueueTask& enq_task) : 2217 AbstractGangTask("Enqueue reference objects in parallel"), 2218 _enq_task(enq_task) 2219 { } 2220 2221 virtual void work(int i) { 2222 _enq_task.work(i); 2223 } 2224 }; 2225 2226 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2227 assert(_workers != NULL, "Need parallel worker threads."); 2228 2229 G1RefEnqueueTaskProxy enq_task_proxy(enq_task); 2230 2231 _g1h->set_par_threads(_active_workers); 2232 _workers->run_task(&enq_task_proxy); 2233 _g1h->set_par_threads(0); 2234 } 2235 2236 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2237 ResourceMark rm; 2238 HandleMark hm; 2239 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2240 ReferenceProcessor* rp = g1h->ref_processor(); 2241 2242 // See the comment in G1CollectedHeap::ref_processing_init() 2243 // about how reference processing currently works in G1. 2244 2245 // Process weak references. 2246 rp->setup_policy(clear_all_soft_refs); 2247 assert(_markStack.isEmpty(), "mark stack should be empty"); 2248 2249 G1CMIsAliveClosure g1_is_alive(g1h); 2250 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); 2251 G1CMDrainMarkingStackClosure 2252 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 2253 // We use the work gang from the G1CollectedHeap and we utilize all 2254 // the worker threads. 2255 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 2256 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 2257 2258 G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 2259 g1h->workers(), active_workers); 2260 2261 2262 if (rp->processing_is_mt()) { 2263 // Set the degree of MT here. If the discovery is done MT, there 2264 // may have been a different number of threads doing the discovery 2265 // and a different number of discovered lists may have Ref objects. 2266 // That is OK as long as the Reference lists are balanced (see 2267 // balance_all_queues() and balance_queues()). 2268 rp->set_active_mt_degree(active_workers); 2269 2270 rp->process_discovered_references(&g1_is_alive, 2271 &g1_keep_alive, 2272 &g1_drain_mark_stack, 2273 &par_task_executor); 2274 2275 // The work routines of the parallel keep_alive and drain_marking_stack 2276 // will set the has_overflown flag if we overflow the global marking 2277 // stack. 2278 } else { 2279 rp->process_discovered_references(&g1_is_alive, 2280 &g1_keep_alive, 2281 &g1_drain_mark_stack, 2282 NULL); 2283 2284 } 2285 2286 assert(_markStack.overflow() || _markStack.isEmpty(), 2287 "mark stack should be empty (unless it overflowed)"); 2288 if (_markStack.overflow()) { 2289 // Should have been done already when we tried to push an 2290 // entry on to the global mark stack. But let's do it again. 2291 set_has_overflown(); 2292 } 2293 2294 if (rp->processing_is_mt()) { 2295 assert(rp->num_q() == active_workers, "why not"); 2296 rp->enqueue_discovered_references(&par_task_executor); 2297 } else { 2298 rp->enqueue_discovered_references(); 2299 } 2300 2301 rp->verify_no_references_recorded(); 2302 assert(!rp->discovery_enabled(), "should have been disabled"); 2303 2304 // Now clean up stale oops in StringTable 2305 StringTable::unlink(&g1_is_alive); 2306 // Clean up unreferenced symbols in symbol table. 2307 SymbolTable::unlink(); 2308 } 2309 2310 void ConcurrentMark::swapMarkBitMaps() { 2311 CMBitMapRO* temp = _prevMarkBitMap; 2312 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2313 _nextMarkBitMap = (CMBitMap*) temp; 2314 } 2315 2316 class CMRemarkTask: public AbstractGangTask { 2317 private: 2318 ConcurrentMark *_cm; 2319 2320 public: 2321 void work(int worker_i) { 2322 // Since all available tasks are actually started, we should 2323 // only proceed if we're supposed to be actived. 2324 if ((size_t)worker_i < _cm->active_tasks()) { 2325 CMTask* task = _cm->task(worker_i); 2326 task->record_start_time(); 2327 do { 2328 task->do_marking_step(1000000000.0 /* something very large */, 2329 true /* do_stealing */, 2330 true /* do_termination */); 2331 } while (task->has_aborted() && !_cm->has_overflown()); 2332 // If we overflow, then we do not want to restart. We instead 2333 // want to abort remark and do concurrent marking again. 2334 task->record_end_time(); 2335 } 2336 } 2337 2338 CMRemarkTask(ConcurrentMark* cm) : 2339 AbstractGangTask("Par Remark"), _cm(cm) { } 2340 }; 2341 2342 void ConcurrentMark::checkpointRootsFinalWork() { 2343 ResourceMark rm; 2344 HandleMark hm; 2345 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2346 2347 g1h->ensure_parsability(false); 2348 2349 if (G1CollectedHeap::use_parallel_gc_threads()) { 2350 G1CollectedHeap::StrongRootsScope srs(g1h); 2351 // this is remark, so we'll use up all available threads 2352 int active_workers = ParallelGCThreads; 2353 set_phase(active_workers, false /* concurrent */); 2354 2355 CMRemarkTask remarkTask(this); 2356 // We will start all available threads, even if we decide that the 2357 // active_workers will be fewer. The extra ones will just bail out 2358 // immediately. 2359 int n_workers = g1h->workers()->total_workers(); 2360 g1h->set_par_threads(n_workers); 2361 g1h->workers()->run_task(&remarkTask); 2362 g1h->set_par_threads(0); 2363 } else { 2364 G1CollectedHeap::StrongRootsScope srs(g1h); 2365 // this is remark, so we'll use up all available threads 2366 int active_workers = 1; 2367 set_phase(active_workers, false /* concurrent */); 2368 2369 CMRemarkTask remarkTask(this); 2370 // We will start all available threads, even if we decide that the 2371 // active_workers will be fewer. The extra ones will just bail out 2372 // immediately. 2373 remarkTask.work(0); 2374 } 2375 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2376 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2377 2378 print_stats(); 2379 2380 #if VERIFY_OBJS_PROCESSED 2381 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2382 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2383 _scan_obj_cl.objs_processed, 2384 ThreadLocalObjQueue::objs_enqueued); 2385 guarantee(_scan_obj_cl.objs_processed == 2386 ThreadLocalObjQueue::objs_enqueued, 2387 "Different number of objs processed and enqueued."); 2388 } 2389 #endif 2390 } 2391 2392 #ifndef PRODUCT 2393 2394 class PrintReachableOopClosure: public OopClosure { 2395 private: 2396 G1CollectedHeap* _g1h; 2397 outputStream* _out; 2398 VerifyOption _vo; 2399 bool _all; 2400 2401 public: 2402 PrintReachableOopClosure(outputStream* out, 2403 VerifyOption vo, 2404 bool all) : 2405 _g1h(G1CollectedHeap::heap()), 2406 _out(out), _vo(vo), _all(all) { } 2407 2408 void do_oop(narrowOop* p) { do_oop_work(p); } 2409 void do_oop( oop* p) { do_oop_work(p); } 2410 2411 template <class T> void do_oop_work(T* p) { 2412 oop obj = oopDesc::load_decode_heap_oop(p); 2413 const char* str = NULL; 2414 const char* str2 = ""; 2415 2416 if (obj == NULL) { 2417 str = ""; 2418 } else if (!_g1h->is_in_g1_reserved(obj)) { 2419 str = " O"; 2420 } else { 2421 HeapRegion* hr = _g1h->heap_region_containing(obj); 2422 guarantee(hr != NULL, "invariant"); 2423 bool over_tams = false; 2424 bool marked = false; 2425 2426 switch (_vo) { 2427 case VerifyOption_G1UsePrevMarking: 2428 over_tams = hr->obj_allocated_since_prev_marking(obj); 2429 marked = _g1h->isMarkedPrev(obj); 2430 break; 2431 case VerifyOption_G1UseNextMarking: 2432 over_tams = hr->obj_allocated_since_next_marking(obj); 2433 marked = _g1h->isMarkedNext(obj); 2434 break; 2435 case VerifyOption_G1UseMarkWord: 2436 marked = obj->is_gc_marked(); 2437 break; 2438 default: 2439 ShouldNotReachHere(); 2440 } 2441 2442 if (over_tams) { 2443 str = " >"; 2444 if (marked) { 2445 str2 = " AND MARKED"; 2446 } 2447 } else if (marked) { 2448 str = " M"; 2449 } else { 2450 str = " NOT"; 2451 } 2452 } 2453 2454 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2455 p, (void*) obj, str, str2); 2456 } 2457 }; 2458 2459 class PrintReachableObjectClosure : public ObjectClosure { 2460 private: 2461 G1CollectedHeap* _g1h; 2462 outputStream* _out; 2463 VerifyOption _vo; 2464 bool _all; 2465 HeapRegion* _hr; 2466 2467 public: 2468 PrintReachableObjectClosure(outputStream* out, 2469 VerifyOption vo, 2470 bool all, 2471 HeapRegion* hr) : 2472 _g1h(G1CollectedHeap::heap()), 2473 _out(out), _vo(vo), _all(all), _hr(hr) { } 2474 2475 void do_object(oop o) { 2476 bool over_tams = false; 2477 bool marked = false; 2478 2479 switch (_vo) { 2480 case VerifyOption_G1UsePrevMarking: 2481 over_tams = _hr->obj_allocated_since_prev_marking(o); 2482 marked = _g1h->isMarkedPrev(o); 2483 break; 2484 case VerifyOption_G1UseNextMarking: 2485 over_tams = _hr->obj_allocated_since_next_marking(o); 2486 marked = _g1h->isMarkedNext(o); 2487 break; 2488 case VerifyOption_G1UseMarkWord: 2489 marked = o->is_gc_marked(); 2490 break; 2491 default: 2492 ShouldNotReachHere(); 2493 } 2494 bool print_it = _all || over_tams || marked; 2495 2496 if (print_it) { 2497 _out->print_cr(" "PTR_FORMAT"%s", 2498 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2499 PrintReachableOopClosure oopCl(_out, _vo, _all); 2500 o->oop_iterate(&oopCl); 2501 } 2502 } 2503 }; 2504 2505 class PrintReachableRegionClosure : public HeapRegionClosure { 2506 private: 2507 outputStream* _out; 2508 VerifyOption _vo; 2509 bool _all; 2510 2511 public: 2512 bool doHeapRegion(HeapRegion* hr) { 2513 HeapWord* b = hr->bottom(); 2514 HeapWord* e = hr->end(); 2515 HeapWord* t = hr->top(); 2516 HeapWord* p = NULL; 2517 2518 switch (_vo) { 2519 case VerifyOption_G1UsePrevMarking: 2520 p = hr->prev_top_at_mark_start(); 2521 break; 2522 case VerifyOption_G1UseNextMarking: 2523 p = hr->next_top_at_mark_start(); 2524 break; 2525 case VerifyOption_G1UseMarkWord: 2526 // When we are verifying marking using the mark word 2527 // TAMS has no relevance. 2528 assert(p == NULL, "post-condition"); 2529 break; 2530 default: 2531 ShouldNotReachHere(); 2532 } 2533 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2534 "TAMS: "PTR_FORMAT, b, e, t, p); 2535 _out->cr(); 2536 2537 HeapWord* from = b; 2538 HeapWord* to = t; 2539 2540 if (to > from) { 2541 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2542 _out->cr(); 2543 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2544 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2545 _out->cr(); 2546 } 2547 2548 return false; 2549 } 2550 2551 PrintReachableRegionClosure(outputStream* out, 2552 VerifyOption vo, 2553 bool all) : 2554 _out(out), _vo(vo), _all(all) { } 2555 }; 2556 2557 static const char* verify_option_to_tams(VerifyOption vo) { 2558 switch (vo) { 2559 case VerifyOption_G1UsePrevMarking: 2560 return "PTAMS"; 2561 case VerifyOption_G1UseNextMarking: 2562 return "NTAMS"; 2563 default: 2564 return "NONE"; 2565 } 2566 } 2567 2568 void ConcurrentMark::print_reachable(const char* str, 2569 VerifyOption vo, 2570 bool all) { 2571 gclog_or_tty->cr(); 2572 gclog_or_tty->print_cr("== Doing heap dump... "); 2573 2574 if (G1PrintReachableBaseFile == NULL) { 2575 gclog_or_tty->print_cr(" #### error: no base file defined"); 2576 return; 2577 } 2578 2579 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2580 (JVM_MAXPATHLEN - 1)) { 2581 gclog_or_tty->print_cr(" #### error: file name too long"); 2582 return; 2583 } 2584 2585 char file_name[JVM_MAXPATHLEN]; 2586 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2587 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2588 2589 fileStream fout(file_name); 2590 if (!fout.is_open()) { 2591 gclog_or_tty->print_cr(" #### error: could not open file"); 2592 return; 2593 } 2594 2595 outputStream* out = &fout; 2596 out->print_cr("-- USING %s", verify_option_to_tams(vo)); 2597 out->cr(); 2598 2599 out->print_cr("--- ITERATING OVER REGIONS"); 2600 out->cr(); 2601 PrintReachableRegionClosure rcl(out, vo, all); 2602 _g1h->heap_region_iterate(&rcl); 2603 out->cr(); 2604 2605 gclog_or_tty->print_cr(" done"); 2606 gclog_or_tty->flush(); 2607 } 2608 2609 #endif // PRODUCT 2610 2611 // This note is for drainAllSATBBuffers and the code in between. 2612 // In the future we could reuse a task to do this work during an 2613 // evacuation pause (since now tasks are not active and can be claimed 2614 // during an evacuation pause). This was a late change to the code and 2615 // is currently not being taken advantage of. 2616 2617 class CMGlobalObjectClosure : public ObjectClosure { 2618 private: 2619 ConcurrentMark* _cm; 2620 2621 public: 2622 void do_object(oop obj) { 2623 _cm->deal_with_reference(obj); 2624 } 2625 2626 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 2627 }; 2628 2629 void ConcurrentMark::deal_with_reference(oop obj) { 2630 if (verbose_high()) { 2631 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 2632 (void*) obj); 2633 } 2634 2635 HeapWord* objAddr = (HeapWord*) obj; 2636 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); 2637 if (_g1h->is_in_g1_reserved(objAddr)) { 2638 assert(obj != NULL, "null check is implicit"); 2639 if (!_nextMarkBitMap->isMarked(objAddr)) { 2640 // Only get the containing region if the object is not marked on the 2641 // bitmap (otherwise, it's a waste of time since we won't do 2642 // anything with it). 2643 HeapRegion* hr = _g1h->heap_region_containing_raw(obj); 2644 if (!hr->obj_allocated_since_next_marking(obj)) { 2645 if (verbose_high()) { 2646 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 2647 "marked", (void*) obj); 2648 } 2649 2650 // we need to mark it first 2651 if (_nextMarkBitMap->parMark(objAddr)) { 2652 // No OrderAccess:store_load() is needed. It is implicit in the 2653 // CAS done in parMark(objAddr) above 2654 HeapWord* finger = _finger; 2655 if (objAddr < finger) { 2656 if (verbose_high()) { 2657 gclog_or_tty->print_cr("[global] below the global finger " 2658 "("PTR_FORMAT"), pushing it", finger); 2659 } 2660 if (!mark_stack_push(obj)) { 2661 if (verbose_low()) { 2662 gclog_or_tty->print_cr("[global] global stack overflow during " 2663 "deal_with_reference"); 2664 } 2665 } 2666 } 2667 } 2668 } 2669 } 2670 } 2671 } 2672 2673 void ConcurrentMark::drainAllSATBBuffers() { 2674 CMGlobalObjectClosure oc(this); 2675 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2676 satb_mq_set.set_closure(&oc); 2677 2678 while (satb_mq_set.apply_closure_to_completed_buffer()) { 2679 if (verbose_medium()) { 2680 gclog_or_tty->print_cr("[global] processed an SATB buffer"); 2681 } 2682 } 2683 2684 // no need to check whether we should do this, as this is only 2685 // called during an evacuation pause 2686 satb_mq_set.iterate_closure_all_threads(); 2687 2688 satb_mq_set.set_closure(NULL); 2689 assert(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2690 } 2691 2692 void ConcurrentMark::markPrev(oop p) { 2693 // Note we are overriding the read-only view of the prev map here, via 2694 // the cast. 2695 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 2696 } 2697 2698 void ConcurrentMark::clear(oop p) { 2699 assert(p != NULL && p->is_oop(), "expected an oop"); 2700 HeapWord* addr = (HeapWord*)p; 2701 assert(addr >= _nextMarkBitMap->startWord() || 2702 addr < _nextMarkBitMap->endWord(), "in a region"); 2703 2704 _nextMarkBitMap->clear(addr); 2705 } 2706 2707 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 2708 // Note we are overriding the read-only view of the prev map here, via 2709 // the cast. 2710 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2711 _nextMarkBitMap->clearRange(mr); 2712 } 2713 2714 HeapRegion* 2715 ConcurrentMark::claim_region(int task_num) { 2716 // "checkpoint" the finger 2717 HeapWord* finger = _finger; 2718 2719 // _heap_end will not change underneath our feet; it only changes at 2720 // yield points. 2721 while (finger < _heap_end) { 2722 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2723 2724 // Note on how this code handles humongous regions. In the 2725 // normal case the finger will reach the start of a "starts 2726 // humongous" (SH) region. Its end will either be the end of the 2727 // last "continues humongous" (CH) region in the sequence, or the 2728 // standard end of the SH region (if the SH is the only region in 2729 // the sequence). That way claim_region() will skip over the CH 2730 // regions. However, there is a subtle race between a CM thread 2731 // executing this method and a mutator thread doing a humongous 2732 // object allocation. The two are not mutually exclusive as the CM 2733 // thread does not need to hold the Heap_lock when it gets 2734 // here. So there is a chance that claim_region() will come across 2735 // a free region that's in the progress of becoming a SH or a CH 2736 // region. In the former case, it will either 2737 // a) Miss the update to the region's end, in which case it will 2738 // visit every subsequent CH region, will find their bitmaps 2739 // empty, and do nothing, or 2740 // b) Will observe the update of the region's end (in which case 2741 // it will skip the subsequent CH regions). 2742 // If it comes across a region that suddenly becomes CH, the 2743 // scenario will be similar to b). So, the race between 2744 // claim_region() and a humongous object allocation might force us 2745 // to do a bit of unnecessary work (due to some unnecessary bitmap 2746 // iterations) but it should not introduce and correctness issues. 2747 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2748 HeapWord* bottom = curr_region->bottom(); 2749 HeapWord* end = curr_region->end(); 2750 HeapWord* limit = curr_region->next_top_at_mark_start(); 2751 2752 if (verbose_low()) { 2753 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2754 "["PTR_FORMAT", "PTR_FORMAT"), " 2755 "limit = "PTR_FORMAT, 2756 task_num, curr_region, bottom, end, limit); 2757 } 2758 2759 // Is the gap between reading the finger and doing the CAS too long? 2760 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2761 if (res == finger) { 2762 // we succeeded 2763 2764 // notice that _finger == end cannot be guaranteed here since, 2765 // someone else might have moved the finger even further 2766 assert(_finger >= end, "the finger should have moved forward"); 2767 2768 if (verbose_low()) { 2769 gclog_or_tty->print_cr("[%d] we were successful with region = " 2770 PTR_FORMAT, task_num, curr_region); 2771 } 2772 2773 if (limit > bottom) { 2774 if (verbose_low()) { 2775 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2776 "returning it ", task_num, curr_region); 2777 } 2778 return curr_region; 2779 } else { 2780 assert(limit == bottom, 2781 "the region limit should be at bottom"); 2782 if (verbose_low()) { 2783 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2784 "returning NULL", task_num, curr_region); 2785 } 2786 // we return NULL and the caller should try calling 2787 // claim_region() again. 2788 return NULL; 2789 } 2790 } else { 2791 assert(_finger > finger, "the finger should have moved forward"); 2792 if (verbose_low()) { 2793 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2794 "global finger = "PTR_FORMAT", " 2795 "our finger = "PTR_FORMAT, 2796 task_num, _finger, finger); 2797 } 2798 2799 // read it again 2800 finger = _finger; 2801 } 2802 } 2803 2804 return NULL; 2805 } 2806 2807 bool ConcurrentMark::invalidate_aborted_regions_in_cset() { 2808 bool result = false; 2809 for (int i = 0; i < (int)_max_task_num; ++i) { 2810 CMTask* the_task = _tasks[i]; 2811 MemRegion mr = the_task->aborted_region(); 2812 if (mr.start() != NULL) { 2813 assert(mr.end() != NULL, "invariant"); 2814 assert(mr.word_size() > 0, "invariant"); 2815 HeapRegion* hr = _g1h->heap_region_containing(mr.start()); 2816 assert(hr != NULL, "invariant"); 2817 if (hr->in_collection_set()) { 2818 // The region points into the collection set 2819 the_task->set_aborted_region(MemRegion()); 2820 result = true; 2821 } 2822 } 2823 } 2824 return result; 2825 } 2826 2827 bool ConcurrentMark::has_aborted_regions() { 2828 for (int i = 0; i < (int)_max_task_num; ++i) { 2829 CMTask* the_task = _tasks[i]; 2830 MemRegion mr = the_task->aborted_region(); 2831 if (mr.start() != NULL) { 2832 assert(mr.end() != NULL, "invariant"); 2833 assert(mr.word_size() > 0, "invariant"); 2834 return true; 2835 } 2836 } 2837 return false; 2838 } 2839 2840 void ConcurrentMark::oops_do(OopClosure* cl) { 2841 if (_markStack.size() > 0 && verbose_low()) { 2842 gclog_or_tty->print_cr("[global] scanning the global marking stack, " 2843 "size = %d", _markStack.size()); 2844 } 2845 // we first iterate over the contents of the mark stack... 2846 _markStack.oops_do(cl); 2847 2848 for (int i = 0; i < (int)_max_task_num; ++i) { 2849 OopTaskQueue* queue = _task_queues->queue((int)i); 2850 2851 if (queue->size() > 0 && verbose_low()) { 2852 gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 2853 "size = %d", i, queue->size()); 2854 } 2855 2856 // ...then over the contents of the all the task queues. 2857 queue->oops_do(cl); 2858 } 2859 2860 // Invalidate any entries, that are in the region stack, that 2861 // point into the collection set 2862 if (_regionStack.invalidate_entries_into_cset()) { 2863 // otherwise, any gray objects copied during the evacuation pause 2864 // might not be visited. 2865 assert(_should_gray_objects, "invariant"); 2866 } 2867 2868 // Invalidate any aborted regions, recorded in the individual CM 2869 // tasks, that point into the collection set. 2870 if (invalidate_aborted_regions_in_cset()) { 2871 // otherwise, any gray objects copied during the evacuation pause 2872 // might not be visited. 2873 assert(_should_gray_objects, "invariant"); 2874 } 2875 2876 } 2877 2878 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2879 _markStack.setEmpty(); 2880 _markStack.clear_overflow(); 2881 _regionStack.setEmpty(); 2882 _regionStack.clear_overflow(); 2883 if (clear_overflow) { 2884 clear_has_overflown(); 2885 } else { 2886 assert(has_overflown(), "pre-condition"); 2887 } 2888 _finger = _heap_start; 2889 2890 for (int i = 0; i < (int)_max_task_num; ++i) { 2891 OopTaskQueue* queue = _task_queues->queue(i); 2892 queue->set_empty(); 2893 // Clear any partial regions from the CMTasks 2894 _tasks[i]->clear_aborted_region(); 2895 } 2896 } 2897 2898 void ConcurrentMark::print_stats() { 2899 if (verbose_stats()) { 2900 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2901 for (size_t i = 0; i < _active_tasks; ++i) { 2902 _tasks[i]->print_stats(); 2903 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2904 } 2905 } 2906 } 2907 2908 class CSMarkOopClosure: public OopClosure { 2909 friend class CSMarkBitMapClosure; 2910 2911 G1CollectedHeap* _g1h; 2912 CMBitMap* _bm; 2913 ConcurrentMark* _cm; 2914 oop* _ms; 2915 jint* _array_ind_stack; 2916 int _ms_size; 2917 int _ms_ind; 2918 int _array_increment; 2919 2920 bool push(oop obj, int arr_ind = 0) { 2921 if (_ms_ind == _ms_size) { 2922 gclog_or_tty->print_cr("Mark stack is full."); 2923 return false; 2924 } 2925 _ms[_ms_ind] = obj; 2926 if (obj->is_objArray()) { 2927 _array_ind_stack[_ms_ind] = arr_ind; 2928 } 2929 _ms_ind++; 2930 return true; 2931 } 2932 2933 oop pop() { 2934 if (_ms_ind == 0) { 2935 return NULL; 2936 } else { 2937 _ms_ind--; 2938 return _ms[_ms_ind]; 2939 } 2940 } 2941 2942 template <class T> bool drain() { 2943 while (_ms_ind > 0) { 2944 oop obj = pop(); 2945 assert(obj != NULL, "Since index was non-zero."); 2946 if (obj->is_objArray()) { 2947 jint arr_ind = _array_ind_stack[_ms_ind]; 2948 objArrayOop aobj = objArrayOop(obj); 2949 jint len = aobj->length(); 2950 jint next_arr_ind = arr_ind + _array_increment; 2951 if (next_arr_ind < len) { 2952 push(obj, next_arr_ind); 2953 } 2954 // Now process this portion of this one. 2955 int lim = MIN2(next_arr_ind, len); 2956 for (int j = arr_ind; j < lim; j++) { 2957 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 2958 } 2959 2960 } else { 2961 obj->oop_iterate(this); 2962 } 2963 if (abort()) return false; 2964 } 2965 return true; 2966 } 2967 2968 public: 2969 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 2970 _g1h(G1CollectedHeap::heap()), 2971 _cm(cm), 2972 _bm(cm->nextMarkBitMap()), 2973 _ms_size(ms_size), _ms_ind(0), 2974 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 2975 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 2976 _array_increment(MAX2(ms_size/8, 16)) 2977 {} 2978 2979 ~CSMarkOopClosure() { 2980 FREE_C_HEAP_ARRAY(oop, _ms); 2981 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 2982 } 2983 2984 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2985 virtual void do_oop( oop* p) { do_oop_work(p); } 2986 2987 template <class T> void do_oop_work(T* p) { 2988 T heap_oop = oopDesc::load_heap_oop(p); 2989 if (oopDesc::is_null(heap_oop)) return; 2990 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 2991 if (obj->is_forwarded()) { 2992 // If the object has already been forwarded, we have to make sure 2993 // that it's marked. So follow the forwarding pointer. Note that 2994 // this does the right thing for self-forwarding pointers in the 2995 // evacuation failure case. 2996 obj = obj->forwardee(); 2997 } 2998 HeapRegion* hr = _g1h->heap_region_containing(obj); 2999 if (hr != NULL) { 3000 if (hr->in_collection_set()) { 3001 if (_g1h->is_obj_ill(obj)) { 3002 _bm->mark((HeapWord*)obj); 3003 if (!push(obj)) { 3004 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 3005 set_abort(); 3006 } 3007 } 3008 } else { 3009 // Outside the collection set; we need to gray it 3010 _cm->deal_with_reference(obj); 3011 } 3012 } 3013 } 3014 }; 3015 3016 class CSMarkBitMapClosure: public BitMapClosure { 3017 G1CollectedHeap* _g1h; 3018 CMBitMap* _bitMap; 3019 ConcurrentMark* _cm; 3020 CSMarkOopClosure _oop_cl; 3021 public: 3022 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 3023 _g1h(G1CollectedHeap::heap()), 3024 _bitMap(cm->nextMarkBitMap()), 3025 _oop_cl(cm, ms_size) 3026 {} 3027 3028 ~CSMarkBitMapClosure() {} 3029 3030 bool do_bit(size_t offset) { 3031 // convert offset into a HeapWord* 3032 HeapWord* addr = _bitMap->offsetToHeapWord(offset); 3033 assert(_bitMap->endWord() && addr < _bitMap->endWord(), 3034 "address out of range"); 3035 assert(_bitMap->isMarked(addr), "tautology"); 3036 oop obj = oop(addr); 3037 if (!obj->is_forwarded()) { 3038 if (!_oop_cl.push(obj)) return false; 3039 if (UseCompressedOops) { 3040 if (!_oop_cl.drain<narrowOop>()) return false; 3041 } else { 3042 if (!_oop_cl.drain<oop>()) return false; 3043 } 3044 } 3045 // Otherwise... 3046 return true; 3047 } 3048 }; 3049 3050 3051 class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 3052 CMBitMap* _bm; 3053 CSMarkBitMapClosure _bit_cl; 3054 enum SomePrivateConstants { 3055 MSSize = 1000 3056 }; 3057 bool _completed; 3058 public: 3059 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 3060 _bm(cm->nextMarkBitMap()), 3061 _bit_cl(cm, MSSize), 3062 _completed(true) 3063 {} 3064 3065 ~CompleteMarkingInCSHRClosure() {} 3066 3067 bool doHeapRegion(HeapRegion* r) { 3068 if (!r->evacuation_failed()) { 3069 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 3070 if (!mr.is_empty()) { 3071 if (!_bm->iterate(&_bit_cl, mr)) { 3072 _completed = false; 3073 return true; 3074 } 3075 } 3076 } 3077 return false; 3078 } 3079 3080 bool completed() { return _completed; } 3081 }; 3082 3083 class ClearMarksInHRClosure: public HeapRegionClosure { 3084 CMBitMap* _bm; 3085 public: 3086 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 3087 3088 bool doHeapRegion(HeapRegion* r) { 3089 if (!r->used_region().is_empty() && !r->evacuation_failed()) { 3090 MemRegion usedMR = r->used_region(); 3091 _bm->clearRange(r->used_region()); 3092 } 3093 return false; 3094 } 3095 }; 3096 3097 void ConcurrentMark::complete_marking_in_collection_set() { 3098 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3099 3100 if (!g1h->mark_in_progress()) { 3101 g1h->g1_policy()->record_mark_closure_time(0.0); 3102 return; 3103 } 3104 3105 int i = 1; 3106 double start = os::elapsedTime(); 3107 while (true) { 3108 i++; 3109 CompleteMarkingInCSHRClosure cmplt(this); 3110 g1h->collection_set_iterate(&cmplt); 3111 if (cmplt.completed()) break; 3112 } 3113 double end_time = os::elapsedTime(); 3114 double elapsed_time_ms = (end_time - start) * 1000.0; 3115 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 3116 3117 ClearMarksInHRClosure clr(nextMarkBitMap()); 3118 g1h->collection_set_iterate(&clr); 3119 } 3120 3121 // The next two methods deal with the following optimisation. Some 3122 // objects are gray by being marked and located above the finger. If 3123 // they are copied, during an evacuation pause, below the finger then 3124 // the need to be pushed on the stack. The observation is that, if 3125 // there are no regions in the collection set located above the 3126 // finger, then the above cannot happen, hence we do not need to 3127 // explicitly gray any objects when copying them to below the 3128 // finger. The global stack will be scanned to ensure that, if it 3129 // points to objects being copied, it will update their 3130 // location. There is a tricky situation with the gray objects in 3131 // region stack that are being coped, however. See the comment in 3132 // newCSet(). 3133 3134 void ConcurrentMark::newCSet() { 3135 if (!concurrent_marking_in_progress()) { 3136 // nothing to do if marking is not in progress 3137 return; 3138 } 3139 3140 // find what the lowest finger is among the global and local fingers 3141 _min_finger = _finger; 3142 for (int i = 0; i < (int)_max_task_num; ++i) { 3143 CMTask* task = _tasks[i]; 3144 HeapWord* task_finger = task->finger(); 3145 if (task_finger != NULL && task_finger < _min_finger) { 3146 _min_finger = task_finger; 3147 } 3148 } 3149 3150 _should_gray_objects = false; 3151 3152 // This fixes a very subtle and fustrating bug. It might be the case 3153 // that, during en evacuation pause, heap regions that contain 3154 // objects that are gray (by being in regions contained in the 3155 // region stack) are included in the collection set. Since such gray 3156 // objects will be moved, and because it's not easy to redirect 3157 // region stack entries to point to a new location (because objects 3158 // in one region might be scattered to multiple regions after they 3159 // are copied), one option is to ensure that all marked objects 3160 // copied during a pause are pushed on the stack. Notice, however, 3161 // that this problem can only happen when the region stack is not 3162 // empty during an evacuation pause. So, we make the fix a bit less 3163 // conservative and ensure that regions are pushed on the stack, 3164 // irrespective whether all collection set regions are below the 3165 // finger, if the region stack is not empty. This is expected to be 3166 // a rare case, so I don't think it's necessary to be smarted about it. 3167 if (!region_stack_empty() || has_aborted_regions()) { 3168 _should_gray_objects = true; 3169 } 3170 } 3171 3172 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 3173 if (!concurrent_marking_in_progress()) return; 3174 3175 HeapWord* region_end = hr->end(); 3176 if (region_end > _min_finger) { 3177 _should_gray_objects = true; 3178 } 3179 } 3180 3181 // Resets the region fields of active CMTasks whose values point 3182 // into the collection set. 3183 void ConcurrentMark::reset_active_task_region_fields_in_cset() { 3184 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW"); 3185 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 3186 3187 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) { 3188 CMTask* task = _tasks[i]; 3189 HeapWord* task_finger = task->finger(); 3190 if (task_finger != NULL) { 3191 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap"); 3192 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger); 3193 if (finger_region->in_collection_set()) { 3194 // The task's current region is in the collection set. 3195 // This region will be evacuated in the current GC and 3196 // the region fields in the task will be stale. 3197 task->giveup_current_region(); 3198 } 3199 } 3200 } 3201 } 3202 3203 // abandon current marking iteration due to a Full GC 3204 void ConcurrentMark::abort() { 3205 // Clear all marks to force marking thread to do nothing 3206 _nextMarkBitMap->clearAll(); 3207 // Empty mark stack 3208 clear_marking_state(); 3209 for (int i = 0; i < (int)_max_task_num; ++i) { 3210 _tasks[i]->clear_region_fields(); 3211 } 3212 _has_aborted = true; 3213 3214 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3215 satb_mq_set.abandon_partial_marking(); 3216 // This can be called either during or outside marking, we'll read 3217 // the expected_active value from the SATB queue set. 3218 satb_mq_set.set_active_all_threads( 3219 false, /* new active value */ 3220 satb_mq_set.is_active() /* expected_active */); 3221 } 3222 3223 static void print_ms_time_info(const char* prefix, const char* name, 3224 NumberSeq& ns) { 3225 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3226 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3227 if (ns.num() > 0) { 3228 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3229 prefix, ns.sd(), ns.maximum()); 3230 } 3231 } 3232 3233 void ConcurrentMark::print_summary_info() { 3234 gclog_or_tty->print_cr(" Concurrent marking:"); 3235 print_ms_time_info(" ", "init marks", _init_times); 3236 print_ms_time_info(" ", "remarks", _remark_times); 3237 { 3238 print_ms_time_info(" ", "final marks", _remark_mark_times); 3239 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3240 3241 } 3242 print_ms_time_info(" ", "cleanups", _cleanup_times); 3243 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3244 _total_counting_time, 3245 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3246 (double)_cleanup_times.num() 3247 : 0.0)); 3248 if (G1ScrubRemSets) { 3249 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3250 _total_rs_scrub_time, 3251 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3252 (double)_cleanup_times.num() 3253 : 0.0)); 3254 } 3255 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3256 (_init_times.sum() + _remark_times.sum() + 3257 _cleanup_times.sum())/1000.0); 3258 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3259 "(%8.2f s marking, %8.2f s counting).", 3260 cmThread()->vtime_accum(), 3261 cmThread()->vtime_mark_accum(), 3262 cmThread()->vtime_count_accum()); 3263 } 3264 3265 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3266 _parallel_workers->print_worker_threads_on(st); 3267 } 3268 3269 // Closures 3270 // XXX: there seems to be a lot of code duplication here; 3271 // should refactor and consolidate the shared code. 3272 3273 // This closure is used to mark refs into the CMS generation in 3274 // the CMS bit map. Called at the first checkpoint. 3275 3276 // We take a break if someone is trying to stop the world. 3277 bool ConcurrentMark::do_yield_check(int worker_i) { 3278 if (should_yield()) { 3279 if (worker_i == 0) { 3280 _g1h->g1_policy()->record_concurrent_pause(); 3281 } 3282 cmThread()->yield(); 3283 if (worker_i == 0) { 3284 _g1h->g1_policy()->record_concurrent_pause_end(); 3285 } 3286 return true; 3287 } else { 3288 return false; 3289 } 3290 } 3291 3292 bool ConcurrentMark::should_yield() { 3293 return cmThread()->should_yield(); 3294 } 3295 3296 bool ConcurrentMark::containing_card_is_marked(void* p) { 3297 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3298 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3299 } 3300 3301 bool ConcurrentMark::containing_cards_are_marked(void* start, 3302 void* last) { 3303 return containing_card_is_marked(start) && 3304 containing_card_is_marked(last); 3305 } 3306 3307 #ifndef PRODUCT 3308 // for debugging purposes 3309 void ConcurrentMark::print_finger() { 3310 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3311 _heap_start, _heap_end, _finger); 3312 for (int i = 0; i < (int) _max_task_num; ++i) { 3313 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3314 } 3315 gclog_or_tty->print_cr(""); 3316 } 3317 #endif 3318 3319 void CMTask::scan_object(oop obj) { 3320 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3321 3322 if (_cm->verbose_high()) { 3323 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3324 _task_id, (void*) obj); 3325 } 3326 3327 size_t obj_size = obj->size(); 3328 _words_scanned += obj_size; 3329 3330 obj->oop_iterate(_cm_oop_closure); 3331 statsOnly( ++_objs_scanned ); 3332 check_limits(); 3333 } 3334 3335 // Closure for iteration over bitmaps 3336 class CMBitMapClosure : public BitMapClosure { 3337 private: 3338 // the bitmap that is being iterated over 3339 CMBitMap* _nextMarkBitMap; 3340 ConcurrentMark* _cm; 3341 CMTask* _task; 3342 // true if we're scanning a heap region claimed by the task (so that 3343 // we move the finger along), false if we're not, i.e. currently when 3344 // scanning a heap region popped from the region stack (so that we 3345 // do not move the task finger along; it'd be a mistake if we did so). 3346 bool _scanning_heap_region; 3347 3348 public: 3349 CMBitMapClosure(CMTask *task, 3350 ConcurrentMark* cm, 3351 CMBitMap* nextMarkBitMap) 3352 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3353 3354 void set_scanning_heap_region(bool scanning_heap_region) { 3355 _scanning_heap_region = scanning_heap_region; 3356 } 3357 3358 bool do_bit(size_t offset) { 3359 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3360 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3361 assert( addr < _cm->finger(), "invariant"); 3362 3363 if (_scanning_heap_region) { 3364 statsOnly( _task->increase_objs_found_on_bitmap() ); 3365 assert(addr >= _task->finger(), "invariant"); 3366 // We move that task's local finger along. 3367 _task->move_finger_to(addr); 3368 } else { 3369 // We move the task's region finger along. 3370 _task->move_region_finger_to(addr); 3371 } 3372 3373 _task->scan_object(oop(addr)); 3374 // we only partially drain the local queue and global stack 3375 _task->drain_local_queue(true); 3376 _task->drain_global_stack(true); 3377 3378 // if the has_aborted flag has been raised, we need to bail out of 3379 // the iteration 3380 return !_task->has_aborted(); 3381 } 3382 }; 3383 3384 // Closure for iterating over objects, currently only used for 3385 // processing SATB buffers. 3386 class CMObjectClosure : public ObjectClosure { 3387 private: 3388 CMTask* _task; 3389 3390 public: 3391 void do_object(oop obj) { 3392 _task->deal_with_reference(obj); 3393 } 3394 3395 CMObjectClosure(CMTask* task) : _task(task) { } 3396 }; 3397 3398 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3399 ConcurrentMark* cm, 3400 CMTask* task) 3401 : _g1h(g1h), _cm(cm), _task(task) { 3402 assert(_ref_processor == NULL, "should be initialized to NULL"); 3403 3404 if (G1UseConcMarkReferenceProcessing) { 3405 _ref_processor = g1h->ref_processor(); 3406 assert(_ref_processor != NULL, "should not be NULL"); 3407 } 3408 } 3409 3410 void CMTask::setup_for_region(HeapRegion* hr) { 3411 // Separated the asserts so that we know which one fires. 3412 assert(hr != NULL, 3413 "claim_region() should have filtered out continues humongous regions"); 3414 assert(!hr->continuesHumongous(), 3415 "claim_region() should have filtered out continues humongous regions"); 3416 3417 if (_cm->verbose_low()) { 3418 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3419 _task_id, hr); 3420 } 3421 3422 _curr_region = hr; 3423 _finger = hr->bottom(); 3424 update_region_limit(); 3425 } 3426 3427 void CMTask::update_region_limit() { 3428 HeapRegion* hr = _curr_region; 3429 HeapWord* bottom = hr->bottom(); 3430 HeapWord* limit = hr->next_top_at_mark_start(); 3431 3432 if (limit == bottom) { 3433 if (_cm->verbose_low()) { 3434 gclog_or_tty->print_cr("[%d] found an empty region " 3435 "["PTR_FORMAT", "PTR_FORMAT")", 3436 _task_id, bottom, limit); 3437 } 3438 // The region was collected underneath our feet. 3439 // We set the finger to bottom to ensure that the bitmap 3440 // iteration that will follow this will not do anything. 3441 // (this is not a condition that holds when we set the region up, 3442 // as the region is not supposed to be empty in the first place) 3443 _finger = bottom; 3444 } else if (limit >= _region_limit) { 3445 assert(limit >= _finger, "peace of mind"); 3446 } else { 3447 assert(limit < _region_limit, "only way to get here"); 3448 // This can happen under some pretty unusual circumstances. An 3449 // evacuation pause empties the region underneath our feet (NTAMS 3450 // at bottom). We then do some allocation in the region (NTAMS 3451 // stays at bottom), followed by the region being used as a GC 3452 // alloc region (NTAMS will move to top() and the objects 3453 // originally below it will be grayed). All objects now marked in 3454 // the region are explicitly grayed, if below the global finger, 3455 // and we do not need in fact to scan anything else. So, we simply 3456 // set _finger to be limit to ensure that the bitmap iteration 3457 // doesn't do anything. 3458 _finger = limit; 3459 } 3460 3461 _region_limit = limit; 3462 } 3463 3464 void CMTask::giveup_current_region() { 3465 assert(_curr_region != NULL, "invariant"); 3466 if (_cm->verbose_low()) { 3467 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3468 _task_id, _curr_region); 3469 } 3470 clear_region_fields(); 3471 } 3472 3473 void CMTask::clear_region_fields() { 3474 // Values for these three fields that indicate that we're not 3475 // holding on to a region. 3476 _curr_region = NULL; 3477 _finger = NULL; 3478 _region_limit = NULL; 3479 3480 _region_finger = NULL; 3481 } 3482 3483 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3484 if (cm_oop_closure == NULL) { 3485 assert(_cm_oop_closure != NULL, "invariant"); 3486 } else { 3487 assert(_cm_oop_closure == NULL, "invariant"); 3488 } 3489 _cm_oop_closure = cm_oop_closure; 3490 } 3491 3492 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3493 guarantee(nextMarkBitMap != NULL, "invariant"); 3494 3495 if (_cm->verbose_low()) { 3496 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3497 } 3498 3499 _nextMarkBitMap = nextMarkBitMap; 3500 clear_region_fields(); 3501 assert(_aborted_region.is_empty(), "should have been cleared"); 3502 3503 _calls = 0; 3504 _elapsed_time_ms = 0.0; 3505 _termination_time_ms = 0.0; 3506 _termination_start_time_ms = 0.0; 3507 3508 #if _MARKING_STATS_ 3509 _local_pushes = 0; 3510 _local_pops = 0; 3511 _local_max_size = 0; 3512 _objs_scanned = 0; 3513 _global_pushes = 0; 3514 _global_pops = 0; 3515 _global_max_size = 0; 3516 _global_transfers_to = 0; 3517 _global_transfers_from = 0; 3518 _region_stack_pops = 0; 3519 _regions_claimed = 0; 3520 _objs_found_on_bitmap = 0; 3521 _satb_buffers_processed = 0; 3522 _steal_attempts = 0; 3523 _steals = 0; 3524 _aborted = 0; 3525 _aborted_overflow = 0; 3526 _aborted_cm_aborted = 0; 3527 _aborted_yield = 0; 3528 _aborted_timed_out = 0; 3529 _aborted_satb = 0; 3530 _aborted_termination = 0; 3531 #endif // _MARKING_STATS_ 3532 } 3533 3534 bool CMTask::should_exit_termination() { 3535 regular_clock_call(); 3536 // This is called when we are in the termination protocol. We should 3537 // quit if, for some reason, this task wants to abort or the global 3538 // stack is not empty (this means that we can get work from it). 3539 return !_cm->mark_stack_empty() || has_aborted(); 3540 } 3541 3542 void CMTask::reached_limit() { 3543 assert(_words_scanned >= _words_scanned_limit || 3544 _refs_reached >= _refs_reached_limit , 3545 "shouldn't have been called otherwise"); 3546 regular_clock_call(); 3547 } 3548 3549 void CMTask::regular_clock_call() { 3550 if (has_aborted()) return; 3551 3552 // First, we need to recalculate the words scanned and refs reached 3553 // limits for the next clock call. 3554 recalculate_limits(); 3555 3556 // During the regular clock call we do the following 3557 3558 // (1) If an overflow has been flagged, then we abort. 3559 if (_cm->has_overflown()) { 3560 set_has_aborted(); 3561 return; 3562 } 3563 3564 // If we are not concurrent (i.e. we're doing remark) we don't need 3565 // to check anything else. The other steps are only needed during 3566 // the concurrent marking phase. 3567 if (!concurrent()) return; 3568 3569 // (2) If marking has been aborted for Full GC, then we also abort. 3570 if (_cm->has_aborted()) { 3571 set_has_aborted(); 3572 statsOnly( ++_aborted_cm_aborted ); 3573 return; 3574 } 3575 3576 double curr_time_ms = os::elapsedVTime() * 1000.0; 3577 3578 // (3) If marking stats are enabled, then we update the step history. 3579 #if _MARKING_STATS_ 3580 if (_words_scanned >= _words_scanned_limit) { 3581 ++_clock_due_to_scanning; 3582 } 3583 if (_refs_reached >= _refs_reached_limit) { 3584 ++_clock_due_to_marking; 3585 } 3586 3587 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3588 _interval_start_time_ms = curr_time_ms; 3589 _all_clock_intervals_ms.add(last_interval_ms); 3590 3591 if (_cm->verbose_medium()) { 3592 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3593 "scanned = %d%s, refs reached = %d%s", 3594 _task_id, last_interval_ms, 3595 _words_scanned, 3596 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3597 _refs_reached, 3598 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3599 } 3600 #endif // _MARKING_STATS_ 3601 3602 // (4) We check whether we should yield. If we have to, then we abort. 3603 if (_cm->should_yield()) { 3604 // We should yield. To do this we abort the task. The caller is 3605 // responsible for yielding. 3606 set_has_aborted(); 3607 statsOnly( ++_aborted_yield ); 3608 return; 3609 } 3610 3611 // (5) We check whether we've reached our time quota. If we have, 3612 // then we abort. 3613 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3614 if (elapsed_time_ms > _time_target_ms) { 3615 set_has_aborted(); 3616 _has_timed_out = true; 3617 statsOnly( ++_aborted_timed_out ); 3618 return; 3619 } 3620 3621 // (6) Finally, we check whether there are enough completed STAB 3622 // buffers available for processing. If there are, we abort. 3623 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3624 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3625 if (_cm->verbose_low()) { 3626 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3627 _task_id); 3628 } 3629 // we do need to process SATB buffers, we'll abort and restart 3630 // the marking task to do so 3631 set_has_aborted(); 3632 statsOnly( ++_aborted_satb ); 3633 return; 3634 } 3635 } 3636 3637 void CMTask::recalculate_limits() { 3638 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3639 _words_scanned_limit = _real_words_scanned_limit; 3640 3641 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3642 _refs_reached_limit = _real_refs_reached_limit; 3643 } 3644 3645 void CMTask::decrease_limits() { 3646 // This is called when we believe that we're going to do an infrequent 3647 // operation which will increase the per byte scanned cost (i.e. move 3648 // entries to/from the global stack). It basically tries to decrease the 3649 // scanning limit so that the clock is called earlier. 3650 3651 if (_cm->verbose_medium()) { 3652 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3653 } 3654 3655 _words_scanned_limit = _real_words_scanned_limit - 3656 3 * words_scanned_period / 4; 3657 _refs_reached_limit = _real_refs_reached_limit - 3658 3 * refs_reached_period / 4; 3659 } 3660 3661 void CMTask::move_entries_to_global_stack() { 3662 // local array where we'll store the entries that will be popped 3663 // from the local queue 3664 oop buffer[global_stack_transfer_size]; 3665 3666 int n = 0; 3667 oop obj; 3668 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3669 buffer[n] = obj; 3670 ++n; 3671 } 3672 3673 if (n > 0) { 3674 // we popped at least one entry from the local queue 3675 3676 statsOnly( ++_global_transfers_to; _local_pops += n ); 3677 3678 if (!_cm->mark_stack_push(buffer, n)) { 3679 if (_cm->verbose_low()) { 3680 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3681 _task_id); 3682 } 3683 set_has_aborted(); 3684 } else { 3685 // the transfer was successful 3686 3687 if (_cm->verbose_medium()) { 3688 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3689 _task_id, n); 3690 } 3691 statsOnly( int tmp_size = _cm->mark_stack_size(); 3692 if (tmp_size > _global_max_size) { 3693 _global_max_size = tmp_size; 3694 } 3695 _global_pushes += n ); 3696 } 3697 } 3698 3699 // this operation was quite expensive, so decrease the limits 3700 decrease_limits(); 3701 } 3702 3703 void CMTask::get_entries_from_global_stack() { 3704 // local array where we'll store the entries that will be popped 3705 // from the global stack. 3706 oop buffer[global_stack_transfer_size]; 3707 int n; 3708 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3709 assert(n <= global_stack_transfer_size, 3710 "we should not pop more than the given limit"); 3711 if (n > 0) { 3712 // yes, we did actually pop at least one entry 3713 3714 statsOnly( ++_global_transfers_from; _global_pops += n ); 3715 if (_cm->verbose_medium()) { 3716 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3717 _task_id, n); 3718 } 3719 for (int i = 0; i < n; ++i) { 3720 bool success = _task_queue->push(buffer[i]); 3721 // We only call this when the local queue is empty or under a 3722 // given target limit. So, we do not expect this push to fail. 3723 assert(success, "invariant"); 3724 } 3725 3726 statsOnly( int tmp_size = _task_queue->size(); 3727 if (tmp_size > _local_max_size) { 3728 _local_max_size = tmp_size; 3729 } 3730 _local_pushes += n ); 3731 } 3732 3733 // this operation was quite expensive, so decrease the limits 3734 decrease_limits(); 3735 } 3736 3737 void CMTask::drain_local_queue(bool partially) { 3738 if (has_aborted()) return; 3739 3740 // Decide what the target size is, depending whether we're going to 3741 // drain it partially (so that other tasks can steal if they run out 3742 // of things to do) or totally (at the very end). 3743 size_t target_size; 3744 if (partially) { 3745 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3746 } else { 3747 target_size = 0; 3748 } 3749 3750 if (_task_queue->size() > target_size) { 3751 if (_cm->verbose_high()) { 3752 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3753 _task_id, target_size); 3754 } 3755 3756 oop obj; 3757 bool ret = _task_queue->pop_local(obj); 3758 while (ret) { 3759 statsOnly( ++_local_pops ); 3760 3761 if (_cm->verbose_high()) { 3762 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3763 (void*) obj); 3764 } 3765 3766 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3767 assert(!_g1h->is_on_master_free_list( 3768 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3769 3770 scan_object(obj); 3771 3772 if (_task_queue->size() <= target_size || has_aborted()) { 3773 ret = false; 3774 } else { 3775 ret = _task_queue->pop_local(obj); 3776 } 3777 } 3778 3779 if (_cm->verbose_high()) { 3780 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3781 _task_id, _task_queue->size()); 3782 } 3783 } 3784 } 3785 3786 void CMTask::drain_global_stack(bool partially) { 3787 if (has_aborted()) return; 3788 3789 // We have a policy to drain the local queue before we attempt to 3790 // drain the global stack. 3791 assert(partially || _task_queue->size() == 0, "invariant"); 3792 3793 // Decide what the target size is, depending whether we're going to 3794 // drain it partially (so that other tasks can steal if they run out 3795 // of things to do) or totally (at the very end). Notice that, 3796 // because we move entries from the global stack in chunks or 3797 // because another task might be doing the same, we might in fact 3798 // drop below the target. But, this is not a problem. 3799 size_t target_size; 3800 if (partially) { 3801 target_size = _cm->partial_mark_stack_size_target(); 3802 } else { 3803 target_size = 0; 3804 } 3805 3806 if (_cm->mark_stack_size() > target_size) { 3807 if (_cm->verbose_low()) { 3808 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3809 _task_id, target_size); 3810 } 3811 3812 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3813 get_entries_from_global_stack(); 3814 drain_local_queue(partially); 3815 } 3816 3817 if (_cm->verbose_low()) { 3818 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3819 _task_id, _cm->mark_stack_size()); 3820 } 3821 } 3822 } 3823 3824 // SATB Queue has several assumptions on whether to call the par or 3825 // non-par versions of the methods. this is why some of the code is 3826 // replicated. We should really get rid of the single-threaded version 3827 // of the code to simplify things. 3828 void CMTask::drain_satb_buffers() { 3829 if (has_aborted()) return; 3830 3831 // We set this so that the regular clock knows that we're in the 3832 // middle of draining buffers and doesn't set the abort flag when it 3833 // notices that SATB buffers are available for draining. It'd be 3834 // very counter productive if it did that. :-) 3835 _draining_satb_buffers = true; 3836 3837 CMObjectClosure oc(this); 3838 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3839 if (G1CollectedHeap::use_parallel_gc_threads()) { 3840 satb_mq_set.set_par_closure(_task_id, &oc); 3841 } else { 3842 satb_mq_set.set_closure(&oc); 3843 } 3844 3845 // This keeps claiming and applying the closure to completed buffers 3846 // until we run out of buffers or we need to abort. 3847 if (G1CollectedHeap::use_parallel_gc_threads()) { 3848 while (!has_aborted() && 3849 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3850 if (_cm->verbose_medium()) { 3851 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3852 } 3853 statsOnly( ++_satb_buffers_processed ); 3854 regular_clock_call(); 3855 } 3856 } else { 3857 while (!has_aborted() && 3858 satb_mq_set.apply_closure_to_completed_buffer()) { 3859 if (_cm->verbose_medium()) { 3860 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3861 } 3862 statsOnly( ++_satb_buffers_processed ); 3863 regular_clock_call(); 3864 } 3865 } 3866 3867 if (!concurrent() && !has_aborted()) { 3868 // We should only do this during remark. 3869 if (G1CollectedHeap::use_parallel_gc_threads()) { 3870 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3871 } else { 3872 satb_mq_set.iterate_closure_all_threads(); 3873 } 3874 } 3875 3876 _draining_satb_buffers = false; 3877 3878 assert(has_aborted() || 3879 concurrent() || 3880 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3881 3882 if (G1CollectedHeap::use_parallel_gc_threads()) { 3883 satb_mq_set.set_par_closure(_task_id, NULL); 3884 } else { 3885 satb_mq_set.set_closure(NULL); 3886 } 3887 3888 // again, this was a potentially expensive operation, decrease the 3889 // limits to get the regular clock call early 3890 decrease_limits(); 3891 } 3892 3893 void CMTask::drain_region_stack(BitMapClosure* bc) { 3894 if (has_aborted()) return; 3895 3896 assert(_region_finger == NULL, 3897 "it should be NULL when we're not scanning a region"); 3898 3899 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) { 3900 if (_cm->verbose_low()) { 3901 gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 3902 _task_id, _cm->region_stack_size()); 3903 } 3904 3905 MemRegion mr; 3906 3907 if (!_aborted_region.is_empty()) { 3908 mr = _aborted_region; 3909 _aborted_region = MemRegion(); 3910 3911 if (_cm->verbose_low()) { 3912 gclog_or_tty->print_cr("[%d] scanning aborted region " 3913 "[ " PTR_FORMAT ", " PTR_FORMAT " )", 3914 _task_id, mr.start(), mr.end()); 3915 } 3916 } else { 3917 mr = _cm->region_stack_pop_lock_free(); 3918 // it returns MemRegion() if the pop fails 3919 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3920 } 3921 3922 while (mr.start() != NULL) { 3923 if (_cm->verbose_medium()) { 3924 gclog_or_tty->print_cr("[%d] we are scanning region " 3925 "["PTR_FORMAT", "PTR_FORMAT")", 3926 _task_id, mr.start(), mr.end()); 3927 } 3928 3929 assert(mr.end() <= _cm->finger(), 3930 "otherwise the region shouldn't be on the stack"); 3931 assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 3932 if (_nextMarkBitMap->iterate(bc, mr)) { 3933 assert(!has_aborted(), 3934 "cannot abort the task without aborting the bitmap iteration"); 3935 3936 // We finished iterating over the region without aborting. 3937 regular_clock_call(); 3938 if (has_aborted()) { 3939 mr = MemRegion(); 3940 } else { 3941 mr = _cm->region_stack_pop_lock_free(); 3942 // it returns MemRegion() if the pop fails 3943 statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 3944 } 3945 } else { 3946 assert(has_aborted(), "currently the only way to do so"); 3947 3948 // The only way to abort the bitmap iteration is to return 3949 // false from the do_bit() method. However, inside the 3950 // do_bit() method we move the _region_finger to point to the 3951 // object currently being looked at. So, if we bail out, we 3952 // have definitely set _region_finger to something non-null. 3953 assert(_region_finger != NULL, "invariant"); 3954 3955 // Make sure that any previously aborted region has been 3956 // cleared. 3957 assert(_aborted_region.is_empty(), "aborted region not cleared"); 3958 3959 // The iteration was actually aborted. So now _region_finger 3960 // points to the address of the object we last scanned. If we 3961 // leave it there, when we restart this task, we will rescan 3962 // the object. It is easy to avoid this. We move the finger by 3963 // enough to point to the next possible object header (the 3964 // bitmap knows by how much we need to move it as it knows its 3965 // granularity). 3966 MemRegion newRegion = 3967 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 3968 3969 if (!newRegion.is_empty()) { 3970 if (_cm->verbose_low()) { 3971 gclog_or_tty->print_cr("[%d] recording unscanned region" 3972 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask", 3973 _task_id, 3974 newRegion.start(), newRegion.end()); 3975 } 3976 // Now record the part of the region we didn't scan to 3977 // make sure this task scans it later. 3978 _aborted_region = newRegion; 3979 } 3980 // break from while 3981 mr = MemRegion(); 3982 } 3983 _region_finger = NULL; 3984 } 3985 3986 if (_cm->verbose_low()) { 3987 gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 3988 _task_id, _cm->region_stack_size()); 3989 } 3990 } 3991 } 3992 3993 void CMTask::print_stats() { 3994 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3995 _task_id, _calls); 3996 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3997 _elapsed_time_ms, _termination_time_ms); 3998 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3999 _step_times_ms.num(), _step_times_ms.avg(), 4000 _step_times_ms.sd()); 4001 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4002 _step_times_ms.maximum(), _step_times_ms.sum()); 4003 4004 #if _MARKING_STATS_ 4005 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 4006 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 4007 _all_clock_intervals_ms.sd()); 4008 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 4009 _all_clock_intervals_ms.maximum(), 4010 _all_clock_intervals_ms.sum()); 4011 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 4012 _clock_due_to_scanning, _clock_due_to_marking); 4013 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 4014 _objs_scanned, _objs_found_on_bitmap); 4015 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 4016 _local_pushes, _local_pops, _local_max_size); 4017 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 4018 _global_pushes, _global_pops, _global_max_size); 4019 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 4020 _global_transfers_to,_global_transfers_from); 4021 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 4022 _regions_claimed, _region_stack_pops); 4023 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 4024 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 4025 _steal_attempts, _steals); 4026 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 4027 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 4028 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 4029 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 4030 _aborted_timed_out, _aborted_satb, _aborted_termination); 4031 #endif // _MARKING_STATS_ 4032 } 4033 4034 /***************************************************************************** 4035 4036 The do_marking_step(time_target_ms) method is the building block 4037 of the parallel marking framework. It can be called in parallel 4038 with other invocations of do_marking_step() on different tasks 4039 (but only one per task, obviously) and concurrently with the 4040 mutator threads, or during remark, hence it eliminates the need 4041 for two versions of the code. When called during remark, it will 4042 pick up from where the task left off during the concurrent marking 4043 phase. Interestingly, tasks are also claimable during evacuation 4044 pauses too, since do_marking_step() ensures that it aborts before 4045 it needs to yield. 4046 4047 The data structures that is uses to do marking work are the 4048 following: 4049 4050 (1) Marking Bitmap. If there are gray objects that appear only 4051 on the bitmap (this happens either when dealing with an overflow 4052 or when the initial marking phase has simply marked the roots 4053 and didn't push them on the stack), then tasks claim heap 4054 regions whose bitmap they then scan to find gray objects. A 4055 global finger indicates where the end of the last claimed region 4056 is. A local finger indicates how far into the region a task has 4057 scanned. The two fingers are used to determine how to gray an 4058 object (i.e. whether simply marking it is OK, as it will be 4059 visited by a task in the future, or whether it needs to be also 4060 pushed on a stack). 4061 4062 (2) Local Queue. The local queue of the task which is accessed 4063 reasonably efficiently by the task. Other tasks can steal from 4064 it when they run out of work. Throughout the marking phase, a 4065 task attempts to keep its local queue short but not totally 4066 empty, so that entries are available for stealing by other 4067 tasks. Only when there is no more work, a task will totally 4068 drain its local queue. 4069 4070 (3) Global Mark Stack. This handles local queue overflow. During 4071 marking only sets of entries are moved between it and the local 4072 queues, as access to it requires a mutex and more fine-grain 4073 interaction with it which might cause contention. If it 4074 overflows, then the marking phase should restart and iterate 4075 over the bitmap to identify gray objects. Throughout the marking 4076 phase, tasks attempt to keep the global mark stack at a small 4077 length but not totally empty, so that entries are available for 4078 popping by other tasks. Only when there is no more work, tasks 4079 will totally drain the global mark stack. 4080 4081 (4) Global Region Stack. Entries on it correspond to areas of 4082 the bitmap that need to be scanned since they contain gray 4083 objects. Pushes on the region stack only happen during 4084 evacuation pauses and typically correspond to areas covered by 4085 GC LABS. If it overflows, then the marking phase should restart 4086 and iterate over the bitmap to identify gray objects. Tasks will 4087 try to totally drain the region stack as soon as possible. 4088 4089 (5) SATB Buffer Queue. This is where completed SATB buffers are 4090 made available. Buffers are regularly removed from this queue 4091 and scanned for roots, so that the queue doesn't get too 4092 long. During remark, all completed buffers are processed, as 4093 well as the filled in parts of any uncompleted buffers. 4094 4095 The do_marking_step() method tries to abort when the time target 4096 has been reached. There are a few other cases when the 4097 do_marking_step() method also aborts: 4098 4099 (1) When the marking phase has been aborted (after a Full GC). 4100 4101 (2) When a global overflow (either on the global stack or the 4102 region stack) has been triggered. Before the task aborts, it 4103 will actually sync up with the other tasks to ensure that all 4104 the marking data structures (local queues, stacks, fingers etc.) 4105 are re-initialised so that when do_marking_step() completes, 4106 the marking phase can immediately restart. 4107 4108 (3) When enough completed SATB buffers are available. The 4109 do_marking_step() method only tries to drain SATB buffers right 4110 at the beginning. So, if enough buffers are available, the 4111 marking step aborts and the SATB buffers are processed at 4112 the beginning of the next invocation. 4113 4114 (4) To yield. when we have to yield then we abort and yield 4115 right at the end of do_marking_step(). This saves us from a lot 4116 of hassle as, by yielding we might allow a Full GC. If this 4117 happens then objects will be compacted underneath our feet, the 4118 heap might shrink, etc. We save checking for this by just 4119 aborting and doing the yield right at the end. 4120 4121 From the above it follows that the do_marking_step() method should 4122 be called in a loop (or, otherwise, regularly) until it completes. 4123 4124 If a marking step completes without its has_aborted() flag being 4125 true, it means it has completed the current marking phase (and 4126 also all other marking tasks have done so and have all synced up). 4127 4128 A method called regular_clock_call() is invoked "regularly" (in 4129 sub ms intervals) throughout marking. It is this clock method that 4130 checks all the abort conditions which were mentioned above and 4131 decides when the task should abort. A work-based scheme is used to 4132 trigger this clock method: when the number of object words the 4133 marking phase has scanned or the number of references the marking 4134 phase has visited reach a given limit. Additional invocations to 4135 the method clock have been planted in a few other strategic places 4136 too. The initial reason for the clock method was to avoid calling 4137 vtime too regularly, as it is quite expensive. So, once it was in 4138 place, it was natural to piggy-back all the other conditions on it 4139 too and not constantly check them throughout the code. 4140 4141 *****************************************************************************/ 4142 4143 void CMTask::do_marking_step(double time_target_ms, 4144 bool do_stealing, 4145 bool do_termination) { 4146 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 4147 assert(concurrent() == _cm->concurrent(), "they should be the same"); 4148 4149 assert(concurrent() || _cm->region_stack_empty(), 4150 "the region stack should have been cleared before remark"); 4151 assert(concurrent() || !_cm->has_aborted_regions(), 4152 "aborted regions should have been cleared before remark"); 4153 assert(_region_finger == NULL, 4154 "this should be non-null only when a region is being scanned"); 4155 4156 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 4157 assert(_task_queues != NULL, "invariant"); 4158 assert(_task_queue != NULL, "invariant"); 4159 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 4160 4161 assert(!_claimed, 4162 "only one thread should claim this task at any one time"); 4163 4164 // OK, this doesn't safeguard again all possible scenarios, as it is 4165 // possible for two threads to set the _claimed flag at the same 4166 // time. But it is only for debugging purposes anyway and it will 4167 // catch most problems. 4168 _claimed = true; 4169 4170 _start_time_ms = os::elapsedVTime() * 1000.0; 4171 statsOnly( _interval_start_time_ms = _start_time_ms ); 4172 4173 double diff_prediction_ms = 4174 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 4175 _time_target_ms = time_target_ms - diff_prediction_ms; 4176 4177 // set up the variables that are used in the work-based scheme to 4178 // call the regular clock method 4179 _words_scanned = 0; 4180 _refs_reached = 0; 4181 recalculate_limits(); 4182 4183 // clear all flags 4184 clear_has_aborted(); 4185 _has_timed_out = false; 4186 _draining_satb_buffers = false; 4187 4188 ++_calls; 4189 4190 if (_cm->verbose_low()) { 4191 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 4192 "target = %1.2lfms >>>>>>>>>>", 4193 _task_id, _calls, _time_target_ms); 4194 } 4195 4196 // Set up the bitmap and oop closures. Anything that uses them is 4197 // eventually called from this method, so it is OK to allocate these 4198 // statically. 4199 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 4200 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 4201 set_cm_oop_closure(&cm_oop_closure); 4202 4203 if (_cm->has_overflown()) { 4204 // This can happen if the region stack or the mark stack overflows 4205 // during a GC pause and this task, after a yield point, 4206 // restarts. We have to abort as we need to get into the overflow 4207 // protocol which happens right at the end of this task. 4208 set_has_aborted(); 4209 } 4210 4211 // First drain any available SATB buffers. After this, we will not 4212 // look at SATB buffers before the next invocation of this method. 4213 // If enough completed SATB buffers are queued up, the regular clock 4214 // will abort this task so that it restarts. 4215 drain_satb_buffers(); 4216 // ...then partially drain the local queue and the global stack 4217 drain_local_queue(true); 4218 drain_global_stack(true); 4219 4220 // Then totally drain the region stack. We will not look at 4221 // it again before the next invocation of this method. Entries on 4222 // the region stack are only added during evacuation pauses, for 4223 // which we have to yield. When we do, we abort the task anyway so 4224 // it will look at the region stack again when it restarts. 4225 bitmap_closure.set_scanning_heap_region(false); 4226 drain_region_stack(&bitmap_closure); 4227 // ...then partially drain the local queue and the global stack 4228 drain_local_queue(true); 4229 drain_global_stack(true); 4230 4231 do { 4232 if (!has_aborted() && _curr_region != NULL) { 4233 // This means that we're already holding on to a region. 4234 assert(_finger != NULL, "if region is not NULL, then the finger " 4235 "should not be NULL either"); 4236 4237 // We might have restarted this task after an evacuation pause 4238 // which might have evacuated the region we're holding on to 4239 // underneath our feet. Let's read its limit again to make sure 4240 // that we do not iterate over a region of the heap that 4241 // contains garbage (update_region_limit() will also move 4242 // _finger to the start of the region if it is found empty). 4243 update_region_limit(); 4244 // We will start from _finger not from the start of the region, 4245 // as we might be restarting this task after aborting half-way 4246 // through scanning this region. In this case, _finger points to 4247 // the address where we last found a marked object. If this is a 4248 // fresh region, _finger points to start(). 4249 MemRegion mr = MemRegion(_finger, _region_limit); 4250 4251 if (_cm->verbose_low()) { 4252 gclog_or_tty->print_cr("[%d] we're scanning part " 4253 "["PTR_FORMAT", "PTR_FORMAT") " 4254 "of region "PTR_FORMAT, 4255 _task_id, _finger, _region_limit, _curr_region); 4256 } 4257 4258 // Let's iterate over the bitmap of the part of the 4259 // region that is left. 4260 bitmap_closure.set_scanning_heap_region(true); 4261 if (mr.is_empty() || 4262 _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 4263 // We successfully completed iterating over the region. Now, 4264 // let's give up the region. 4265 giveup_current_region(); 4266 regular_clock_call(); 4267 } else { 4268 assert(has_aborted(), "currently the only way to do so"); 4269 // The only way to abort the bitmap iteration is to return 4270 // false from the do_bit() method. However, inside the 4271 // do_bit() method we move the _finger to point to the 4272 // object currently being looked at. So, if we bail out, we 4273 // have definitely set _finger to something non-null. 4274 assert(_finger != NULL, "invariant"); 4275 4276 // Region iteration was actually aborted. So now _finger 4277 // points to the address of the object we last scanned. If we 4278 // leave it there, when we restart this task, we will rescan 4279 // the object. It is easy to avoid this. We move the finger by 4280 // enough to point to the next possible object header (the 4281 // bitmap knows by how much we need to move it as it knows its 4282 // granularity). 4283 assert(_finger < _region_limit, "invariant"); 4284 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 4285 // Check if bitmap iteration was aborted while scanning the last object 4286 if (new_finger >= _region_limit) { 4287 giveup_current_region(); 4288 } else { 4289 move_finger_to(new_finger); 4290 } 4291 } 4292 } 4293 // At this point we have either completed iterating over the 4294 // region we were holding on to, or we have aborted. 4295 4296 // We then partially drain the local queue and the global stack. 4297 // (Do we really need this?) 4298 drain_local_queue(true); 4299 drain_global_stack(true); 4300 4301 // Read the note on the claim_region() method on why it might 4302 // return NULL with potentially more regions available for 4303 // claiming and why we have to check out_of_regions() to determine 4304 // whether we're done or not. 4305 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4306 // We are going to try to claim a new region. We should have 4307 // given up on the previous one. 4308 // Separated the asserts so that we know which one fires. 4309 assert(_curr_region == NULL, "invariant"); 4310 assert(_finger == NULL, "invariant"); 4311 assert(_region_limit == NULL, "invariant"); 4312 if (_cm->verbose_low()) { 4313 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4314 } 4315 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4316 if (claimed_region != NULL) { 4317 // Yes, we managed to claim one 4318 statsOnly( ++_regions_claimed ); 4319 4320 if (_cm->verbose_low()) { 4321 gclog_or_tty->print_cr("[%d] we successfully claimed " 4322 "region "PTR_FORMAT, 4323 _task_id, claimed_region); 4324 } 4325 4326 setup_for_region(claimed_region); 4327 assert(_curr_region == claimed_region, "invariant"); 4328 } 4329 // It is important to call the regular clock here. It might take 4330 // a while to claim a region if, for example, we hit a large 4331 // block of empty regions. So we need to call the regular clock 4332 // method once round the loop to make sure it's called 4333 // frequently enough. 4334 regular_clock_call(); 4335 } 4336 4337 if (!has_aborted() && _curr_region == NULL) { 4338 assert(_cm->out_of_regions(), 4339 "at this point we should be out of regions"); 4340 } 4341 } while ( _curr_region != NULL && !has_aborted()); 4342 4343 if (!has_aborted()) { 4344 // We cannot check whether the global stack is empty, since other 4345 // tasks might be pushing objects to it concurrently. We also cannot 4346 // check if the region stack is empty because if a thread is aborting 4347 // it can push a partially done region back. 4348 assert(_cm->out_of_regions(), 4349 "at this point we should be out of regions"); 4350 4351 if (_cm->verbose_low()) { 4352 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4353 } 4354 4355 // Try to reduce the number of available SATB buffers so that 4356 // remark has less work to do. 4357 drain_satb_buffers(); 4358 } 4359 4360 // Since we've done everything else, we can now totally drain the 4361 // local queue and global stack. 4362 drain_local_queue(false); 4363 drain_global_stack(false); 4364 4365 // Attempt at work stealing from other task's queues. 4366 if (do_stealing && !has_aborted()) { 4367 // We have not aborted. This means that we have finished all that 4368 // we could. Let's try to do some stealing... 4369 4370 // We cannot check whether the global stack is empty, since other 4371 // tasks might be pushing objects to it concurrently. We also cannot 4372 // check if the region stack is empty because if a thread is aborting 4373 // it can push a partially done region back. 4374 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4375 "only way to reach here"); 4376 4377 if (_cm->verbose_low()) { 4378 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4379 } 4380 4381 while (!has_aborted()) { 4382 oop obj; 4383 statsOnly( ++_steal_attempts ); 4384 4385 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4386 if (_cm->verbose_medium()) { 4387 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4388 _task_id, (void*) obj); 4389 } 4390 4391 statsOnly( ++_steals ); 4392 4393 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4394 "any stolen object should be marked"); 4395 scan_object(obj); 4396 4397 // And since we're towards the end, let's totally drain the 4398 // local queue and global stack. 4399 drain_local_queue(false); 4400 drain_global_stack(false); 4401 } else { 4402 break; 4403 } 4404 } 4405 } 4406 4407 // If we are about to wrap up and go into termination, check if we 4408 // should raise the overflow flag. 4409 if (do_termination && !has_aborted()) { 4410 if (_cm->force_overflow()->should_force()) { 4411 _cm->set_has_overflown(); 4412 regular_clock_call(); 4413 } 4414 } 4415 4416 // We still haven't aborted. Now, let's try to get into the 4417 // termination protocol. 4418 if (do_termination && !has_aborted()) { 4419 // We cannot check whether the global stack is empty, since other 4420 // tasks might be concurrently pushing objects on it. We also cannot 4421 // check if the region stack is empty because if a thread is aborting 4422 // it can push a partially done region back. 4423 // Separated the asserts so that we know which one fires. 4424 assert(_cm->out_of_regions(), "only way to reach here"); 4425 assert(_task_queue->size() == 0, "only way to reach here"); 4426 4427 if (_cm->verbose_low()) { 4428 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4429 } 4430 4431 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4432 // The CMTask class also extends the TerminatorTerminator class, 4433 // hence its should_exit_termination() method will also decide 4434 // whether to exit the termination protocol or not. 4435 bool finished = _cm->terminator()->offer_termination(this); 4436 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4437 _termination_time_ms += 4438 termination_end_time_ms - _termination_start_time_ms; 4439 4440 if (finished) { 4441 // We're all done. 4442 4443 if (_task_id == 0) { 4444 // let's allow task 0 to do this 4445 if (concurrent()) { 4446 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4447 // we need to set this to false before the next 4448 // safepoint. This way we ensure that the marking phase 4449 // doesn't observe any more heap expansions. 4450 _cm->clear_concurrent_marking_in_progress(); 4451 } 4452 } 4453 4454 // We can now guarantee that the global stack is empty, since 4455 // all other tasks have finished. We separated the guarantees so 4456 // that, if a condition is false, we can immediately find out 4457 // which one. 4458 guarantee(_cm->out_of_regions(), "only way to reach here"); 4459 guarantee(_aborted_region.is_empty(), "only way to reach here"); 4460 guarantee(_cm->region_stack_empty(), "only way to reach here"); 4461 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4462 guarantee(_task_queue->size() == 0, "only way to reach here"); 4463 guarantee(!_cm->has_overflown(), "only way to reach here"); 4464 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4465 guarantee(!_cm->region_stack_overflow(), "only way to reach here"); 4466 4467 if (_cm->verbose_low()) { 4468 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4469 } 4470 } else { 4471 // Apparently there's more work to do. Let's abort this task. It 4472 // will restart it and we can hopefully find more things to do. 4473 4474 if (_cm->verbose_low()) { 4475 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4476 _task_id); 4477 } 4478 4479 set_has_aborted(); 4480 statsOnly( ++_aborted_termination ); 4481 } 4482 } 4483 4484 // Mainly for debugging purposes to make sure that a pointer to the 4485 // closure which was statically allocated in this frame doesn't 4486 // escape it by accident. 4487 set_cm_oop_closure(NULL); 4488 double end_time_ms = os::elapsedVTime() * 1000.0; 4489 double elapsed_time_ms = end_time_ms - _start_time_ms; 4490 // Update the step history. 4491 _step_times_ms.add(elapsed_time_ms); 4492 4493 if (has_aborted()) { 4494 // The task was aborted for some reason. 4495 4496 statsOnly( ++_aborted ); 4497 4498 if (_has_timed_out) { 4499 double diff_ms = elapsed_time_ms - _time_target_ms; 4500 // Keep statistics of how well we did with respect to hitting 4501 // our target only if we actually timed out (if we aborted for 4502 // other reasons, then the results might get skewed). 4503 _marking_step_diffs_ms.add(diff_ms); 4504 } 4505 4506 if (_cm->has_overflown()) { 4507 // This is the interesting one. We aborted because a global 4508 // overflow was raised. This means we have to restart the 4509 // marking phase and start iterating over regions. However, in 4510 // order to do this we have to make sure that all tasks stop 4511 // what they are doing and re-initialise in a safe manner. We 4512 // will achieve this with the use of two barrier sync points. 4513 4514 if (_cm->verbose_low()) { 4515 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4516 } 4517 4518 _cm->enter_first_sync_barrier(_task_id); 4519 // When we exit this sync barrier we know that all tasks have 4520 // stopped doing marking work. So, it's now safe to 4521 // re-initialise our data structures. At the end of this method, 4522 // task 0 will clear the global data structures. 4523 4524 statsOnly( ++_aborted_overflow ); 4525 4526 // We clear the local state of this task... 4527 clear_region_fields(); 4528 4529 // ...and enter the second barrier. 4530 _cm->enter_second_sync_barrier(_task_id); 4531 // At this point everything has bee re-initialised and we're 4532 // ready to restart. 4533 } 4534 4535 if (_cm->verbose_low()) { 4536 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4537 "elapsed = %1.2lfms <<<<<<<<<<", 4538 _task_id, _time_target_ms, elapsed_time_ms); 4539 if (_cm->has_aborted()) { 4540 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4541 _task_id); 4542 } 4543 } 4544 } else { 4545 if (_cm->verbose_low()) { 4546 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4547 "elapsed = %1.2lfms <<<<<<<<<<", 4548 _task_id, _time_target_ms, elapsed_time_ms); 4549 } 4550 } 4551 4552 _claimed = false; 4553 } 4554 4555 CMTask::CMTask(int task_id, 4556 ConcurrentMark* cm, 4557 CMTaskQueue* task_queue, 4558 CMTaskQueueSet* task_queues) 4559 : _g1h(G1CollectedHeap::heap()), 4560 _task_id(task_id), _cm(cm), 4561 _claimed(false), 4562 _nextMarkBitMap(NULL), _hash_seed(17), 4563 _task_queue(task_queue), 4564 _task_queues(task_queues), 4565 _cm_oop_closure(NULL), 4566 _aborted_region(MemRegion()) { 4567 guarantee(task_queue != NULL, "invariant"); 4568 guarantee(task_queues != NULL, "invariant"); 4569 4570 statsOnly( _clock_due_to_scanning = 0; 4571 _clock_due_to_marking = 0 ); 4572 4573 _marking_step_diffs_ms.add(0.5); 4574 } 4575 4576 // These are formatting macros that are used below to ensure 4577 // consistent formatting. The *_H_* versions are used to format the 4578 // header for a particular value and they should be kept consistent 4579 // with the corresponding macro. Also note that most of the macros add 4580 // the necessary white space (as a prefix) which makes them a bit 4581 // easier to compose. 4582 4583 // All the output lines are prefixed with this string to be able to 4584 // identify them easily in a large log file. 4585 #define G1PPRL_LINE_PREFIX "###" 4586 4587 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4588 #ifdef _LP64 4589 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4590 #else // _LP64 4591 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4592 #endif // _LP64 4593 4594 // For per-region info 4595 #define G1PPRL_TYPE_FORMAT " %-4s" 4596 #define G1PPRL_TYPE_H_FORMAT " %4s" 4597 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4598 #define G1PPRL_BYTE_H_FORMAT " %9s" 4599 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4600 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4601 4602 // For summary info 4603 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4604 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4605 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4606 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4607 4608 G1PrintRegionLivenessInfoClosure:: 4609 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4610 : _out(out), 4611 _total_used_bytes(0), _total_capacity_bytes(0), 4612 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4613 _hum_used_bytes(0), _hum_capacity_bytes(0), 4614 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4615 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4616 MemRegion g1_committed = g1h->g1_committed(); 4617 MemRegion g1_reserved = g1h->g1_reserved(); 4618 double now = os::elapsedTime(); 4619 4620 // Print the header of the output. 4621 _out->cr(); 4622 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4623 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4624 G1PPRL_SUM_ADDR_FORMAT("committed") 4625 G1PPRL_SUM_ADDR_FORMAT("reserved") 4626 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4627 g1_committed.start(), g1_committed.end(), 4628 g1_reserved.start(), g1_reserved.end(), 4629 HeapRegion::GrainBytes); 4630 _out->print_cr(G1PPRL_LINE_PREFIX); 4631 _out->print_cr(G1PPRL_LINE_PREFIX 4632 G1PPRL_TYPE_H_FORMAT 4633 G1PPRL_ADDR_BASE_H_FORMAT 4634 G1PPRL_BYTE_H_FORMAT 4635 G1PPRL_BYTE_H_FORMAT 4636 G1PPRL_BYTE_H_FORMAT 4637 G1PPRL_DOUBLE_H_FORMAT, 4638 "type", "address-range", 4639 "used", "prev-live", "next-live", "gc-eff"); 4640 } 4641 4642 // It takes as a parameter a reference to one of the _hum_* fields, it 4643 // deduces the corresponding value for a region in a humongous region 4644 // series (either the region size, or what's left if the _hum_* field 4645 // is < the region size), and updates the _hum_* field accordingly. 4646 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4647 size_t bytes = 0; 4648 // The > 0 check is to deal with the prev and next live bytes which 4649 // could be 0. 4650 if (*hum_bytes > 0) { 4651 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes); 4652 *hum_bytes -= bytes; 4653 } 4654 return bytes; 4655 } 4656 4657 // It deduces the values for a region in a humongous region series 4658 // from the _hum_* fields and updates those accordingly. It assumes 4659 // that that _hum_* fields have already been set up from the "starts 4660 // humongous" region and we visit the regions in address order. 4661 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4662 size_t* capacity_bytes, 4663 size_t* prev_live_bytes, 4664 size_t* next_live_bytes) { 4665 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4666 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4667 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4668 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4669 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4670 } 4671 4672 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4673 const char* type = ""; 4674 HeapWord* bottom = r->bottom(); 4675 HeapWord* end = r->end(); 4676 size_t capacity_bytes = r->capacity(); 4677 size_t used_bytes = r->used(); 4678 size_t prev_live_bytes = r->live_bytes(); 4679 size_t next_live_bytes = r->next_live_bytes(); 4680 double gc_eff = r->gc_efficiency(); 4681 if (r->used() == 0) { 4682 type = "FREE"; 4683 } else if (r->is_survivor()) { 4684 type = "SURV"; 4685 } else if (r->is_young()) { 4686 type = "EDEN"; 4687 } else if (r->startsHumongous()) { 4688 type = "HUMS"; 4689 4690 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4691 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4692 "they should have been zeroed after the last time we used them"); 4693 // Set up the _hum_* fields. 4694 _hum_capacity_bytes = capacity_bytes; 4695 _hum_used_bytes = used_bytes; 4696 _hum_prev_live_bytes = prev_live_bytes; 4697 _hum_next_live_bytes = next_live_bytes; 4698 get_hum_bytes(&used_bytes, &capacity_bytes, 4699 &prev_live_bytes, &next_live_bytes); 4700 end = bottom + HeapRegion::GrainWords; 4701 } else if (r->continuesHumongous()) { 4702 type = "HUMC"; 4703 get_hum_bytes(&used_bytes, &capacity_bytes, 4704 &prev_live_bytes, &next_live_bytes); 4705 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4706 } else { 4707 type = "OLD"; 4708 } 4709 4710 _total_used_bytes += used_bytes; 4711 _total_capacity_bytes += capacity_bytes; 4712 _total_prev_live_bytes += prev_live_bytes; 4713 _total_next_live_bytes += next_live_bytes; 4714 4715 // Print a line for this particular region. 4716 _out->print_cr(G1PPRL_LINE_PREFIX 4717 G1PPRL_TYPE_FORMAT 4718 G1PPRL_ADDR_BASE_FORMAT 4719 G1PPRL_BYTE_FORMAT 4720 G1PPRL_BYTE_FORMAT 4721 G1PPRL_BYTE_FORMAT 4722 G1PPRL_DOUBLE_FORMAT, 4723 type, bottom, end, 4724 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4725 4726 return false; 4727 } 4728 4729 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4730 // Print the footer of the output. 4731 _out->print_cr(G1PPRL_LINE_PREFIX); 4732 _out->print_cr(G1PPRL_LINE_PREFIX 4733 " SUMMARY" 4734 G1PPRL_SUM_MB_FORMAT("capacity") 4735 G1PPRL_SUM_MB_PERC_FORMAT("used") 4736 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4737 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4738 bytes_to_mb(_total_capacity_bytes), 4739 bytes_to_mb(_total_used_bytes), 4740 perc(_total_used_bytes, _total_capacity_bytes), 4741 bytes_to_mb(_total_prev_live_bytes), 4742 perc(_total_prev_live_bytes, _total_capacity_bytes), 4743 bytes_to_mb(_total_next_live_bytes), 4744 perc(_total_next_live_bytes, _total_capacity_bytes)); 4745 _out->cr(); 4746 }