1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 #ifdef ASSERT 250 , _drain_in_progress(false) 251 , _drain_in_progress_yields(false) 252 #endif 253 {} 254 255 bool CMMarkStack::allocate(size_t capacity) { 256 // allocate a stack of the requisite depth 257 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 258 if (!rs.is_reserved()) { 259 warning("ConcurrentMark MarkStack allocation failure"); 260 return false; 261 } 262 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 263 if (!_virtual_space.initialize(rs, rs.size())) { 264 warning("ConcurrentMark MarkStack backing store failure"); 265 // Release the virtual memory reserved for the marking stack 266 rs.release(); 267 return false; 268 } 269 assert(_virtual_space.committed_size() == rs.size(), 270 "Didn't reserve backing store for all of ConcurrentMark stack?"); 271 _base = (oop*) _virtual_space.low(); 272 setEmpty(); 273 _capacity = (jint) capacity; 274 _saved_index = -1; 275 _should_expand = false; 276 return true; 277 } 278 279 void CMMarkStack::expand() { 280 // Called, during remark, if we've overflown the marking stack during marking. 281 assert(isEmpty(), "stack should been emptied while handling overflow"); 282 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 283 // Clear expansion flag 284 _should_expand = false; 285 if (_capacity == (jint) MarkStackSizeMax) { 286 if (PrintGCDetails && Verbose) { 287 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 288 } 289 return; 290 } 291 // Double capacity if possible 292 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 293 // Do not give up existing stack until we have managed to 294 // get the double capacity that we desired. 295 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 296 sizeof(oop))); 297 if (rs.is_reserved()) { 298 // Release the backing store associated with old stack 299 _virtual_space.release(); 300 // Reinitialize virtual space for new stack 301 if (!_virtual_space.initialize(rs, rs.size())) { 302 fatal("Not enough swap for expanded marking stack capacity"); 303 } 304 _base = (oop*)(_virtual_space.low()); 305 _index = 0; 306 _capacity = new_capacity; 307 } else { 308 if (PrintGCDetails && Verbose) { 309 // Failed to double capacity, continue; 310 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 311 SIZE_FORMAT "K to " SIZE_FORMAT "K", 312 _capacity / K, new_capacity / K); 313 } 314 } 315 } 316 317 void CMMarkStack::set_should_expand() { 318 // If we're resetting the marking state because of an 319 // marking stack overflow, record that we should, if 320 // possible, expand the stack. 321 _should_expand = _cm->has_overflown(); 322 } 323 324 CMMarkStack::~CMMarkStack() { 325 if (_base != NULL) { 326 _base = NULL; 327 _virtual_space.release(); 328 } 329 } 330 331 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 332 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 333 jint start = _index; 334 jint next_index = start + n; 335 if (next_index > _capacity) { 336 _overflow = true; 337 return; 338 } 339 // Otherwise. 340 _index = next_index; 341 for (int i = 0; i < n; i++) { 342 int ind = start + i; 343 assert(ind < _capacity, "By overflow test above."); 344 _base[ind] = ptr_arr[i]; 345 } 346 } 347 348 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 349 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 350 jint index = _index; 351 if (index == 0) { 352 *n = 0; 353 return false; 354 } else { 355 int k = MIN2(max, index); 356 jint new_ind = index - k; 357 for (int j = 0; j < k; j++) { 358 ptr_arr[j] = _base[new_ind + j]; 359 } 360 _index = new_ind; 361 *n = k; 362 return true; 363 } 364 } 365 366 template<class OopClosureClass> 367 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 368 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 369 || SafepointSynchronize::is_at_safepoint(), 370 "Drain recursion must be yield-safe."); 371 bool res = true; 372 debug_only(_drain_in_progress = true); 373 debug_only(_drain_in_progress_yields = yield_after); 374 while (!isEmpty()) { 375 oop newOop = pop(); 376 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 377 assert(newOop->is_oop(), "Expected an oop"); 378 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 379 "only grey objects on this stack"); 380 newOop->oop_iterate(cl); 381 if (yield_after && _cm->do_yield_check()) { 382 res = false; 383 break; 384 } 385 } 386 debug_only(_drain_in_progress = false); 387 return res; 388 } 389 390 void CMMarkStack::note_start_of_gc() { 391 assert(_saved_index == -1, 392 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 393 _saved_index = _index; 394 } 395 396 void CMMarkStack::note_end_of_gc() { 397 // This is intentionally a guarantee, instead of an assert. If we 398 // accidentally add something to the mark stack during GC, it 399 // will be a correctness issue so it's better if we crash. we'll 400 // only check this once per GC anyway, so it won't be a performance 401 // issue in any way. 402 guarantee(_saved_index == _index, 403 err_msg("saved index: %d index: %d", _saved_index, _index)); 404 _saved_index = -1; 405 } 406 407 CMRootRegions::CMRootRegions() : 408 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 409 _should_abort(false), _next_survivor(NULL) { } 410 411 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 412 _young_list = g1h->young_list(); 413 _cm = cm; 414 } 415 416 void CMRootRegions::prepare_for_scan() { 417 assert(!scan_in_progress(), "pre-condition"); 418 419 // Currently, only survivors can be root regions. 420 assert(_next_survivor == NULL, "pre-condition"); 421 _next_survivor = _young_list->first_survivor_region(); 422 _scan_in_progress = (_next_survivor != NULL); 423 _should_abort = false; 424 } 425 426 HeapRegion* CMRootRegions::claim_next() { 427 if (_should_abort) { 428 // If someone has set the should_abort flag, we return NULL to 429 // force the caller to bail out of their loop. 430 return NULL; 431 } 432 433 // Currently, only survivors can be root regions. 434 HeapRegion* res = _next_survivor; 435 if (res != NULL) { 436 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 437 // Read it again in case it changed while we were waiting for the lock. 438 res = _next_survivor; 439 if (res != NULL) { 440 if (res == _young_list->last_survivor_region()) { 441 // We just claimed the last survivor so store NULL to indicate 442 // that we're done. 443 _next_survivor = NULL; 444 } else { 445 _next_survivor = res->get_next_young_region(); 446 } 447 } else { 448 // Someone else claimed the last survivor while we were trying 449 // to take the lock so nothing else to do. 450 } 451 } 452 assert(res == NULL || res->is_survivor(), "post-condition"); 453 454 return res; 455 } 456 457 void CMRootRegions::scan_finished() { 458 assert(scan_in_progress(), "pre-condition"); 459 460 // Currently, only survivors can be root regions. 461 if (!_should_abort) { 462 assert(_next_survivor == NULL, "we should have claimed all survivors"); 463 } 464 _next_survivor = NULL; 465 466 { 467 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 468 _scan_in_progress = false; 469 RootRegionScan_lock->notify_all(); 470 } 471 } 472 473 bool CMRootRegions::wait_until_scan_finished() { 474 if (!scan_in_progress()) return false; 475 476 { 477 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 478 while (scan_in_progress()) { 479 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 480 } 481 } 482 return true; 483 } 484 485 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 486 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 487 #endif // _MSC_VER 488 489 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 490 return MAX2((n_par_threads + 2) / 4, 1U); 491 } 492 493 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 494 _g1h(g1h), 495 _markBitMap1(), 496 _markBitMap2(), 497 _parallel_marking_threads(0), 498 _max_parallel_marking_threads(0), 499 _sleep_factor(0.0), 500 _marking_task_overhead(1.0), 501 _cleanup_sleep_factor(0.0), 502 _cleanup_task_overhead(1.0), 503 _cleanup_list("Cleanup List"), 504 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 505 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 506 CardTableModRefBS::card_shift, 507 false /* in_resource_area*/), 508 509 _prevMarkBitMap(&_markBitMap1), 510 _nextMarkBitMap(&_markBitMap2), 511 512 _markStack(this), 513 // _finger set in set_non_marking_state 514 515 _max_worker_id(ParallelGCThreads), 516 // _active_tasks set in set_non_marking_state 517 // _tasks set inside the constructor 518 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 519 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 520 521 _has_overflown(false), 522 _concurrent(false), 523 _has_aborted(false), 524 _restart_for_overflow(false), 525 _concurrent_marking_in_progress(false), 526 527 // _verbose_level set below 528 529 _init_times(), 530 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 531 _cleanup_times(), 532 _total_counting_time(0.0), 533 _total_rs_scrub_time(0.0), 534 535 _parallel_workers(NULL), 536 537 _count_card_bitmaps(NULL), 538 _count_marked_bytes(NULL), 539 _completed_initialization(false) { 540 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 541 if (verbose_level < no_verbose) { 542 verbose_level = no_verbose; 543 } 544 if (verbose_level > high_verbose) { 545 verbose_level = high_verbose; 546 } 547 _verbose_level = verbose_level; 548 549 if (verbose_low()) { 550 gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", " 551 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 552 } 553 554 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 555 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 556 557 // Create & start a ConcurrentMark thread. 558 _cmThread = new ConcurrentMarkThread(this); 559 assert(cmThread() != NULL, "CM Thread should have been created"); 560 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 561 if (_cmThread->osthread() == NULL) { 562 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 563 } 564 565 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 566 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 567 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 568 569 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 570 satb_qs.set_buffer_size(G1SATBBufferSize); 571 572 _root_regions.init(_g1h, this); 573 574 if (ConcGCThreads > ParallelGCThreads) { 575 warning("Can't have more ConcGCThreads (%u) " 576 "than ParallelGCThreads (%u).", 577 ConcGCThreads, ParallelGCThreads); 578 return; 579 } 580 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 581 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 582 // if both are set 583 _sleep_factor = 0.0; 584 _marking_task_overhead = 1.0; 585 } else if (G1MarkingOverheadPercent > 0) { 586 // We will calculate the number of parallel marking threads based 587 // on a target overhead with respect to the soft real-time goal 588 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 589 double overall_cm_overhead = 590 (double) MaxGCPauseMillis * marking_overhead / 591 (double) GCPauseIntervalMillis; 592 double cpu_ratio = 1.0 / (double) os::processor_count(); 593 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 594 double marking_task_overhead = 595 overall_cm_overhead / marking_thread_num * 596 (double) os::processor_count(); 597 double sleep_factor = 598 (1.0 - marking_task_overhead) / marking_task_overhead; 599 600 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 601 _sleep_factor = sleep_factor; 602 _marking_task_overhead = marking_task_overhead; 603 } else { 604 // Calculate the number of parallel marking threads by scaling 605 // the number of parallel GC threads. 606 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 607 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 608 _sleep_factor = 0.0; 609 _marking_task_overhead = 1.0; 610 } 611 612 assert(ConcGCThreads > 0, "Should have been set"); 613 _parallel_marking_threads = ConcGCThreads; 614 _max_parallel_marking_threads = _parallel_marking_threads; 615 616 if (parallel_marking_threads() > 1) { 617 _cleanup_task_overhead = 1.0; 618 } else { 619 _cleanup_task_overhead = marking_task_overhead(); 620 } 621 _cleanup_sleep_factor = 622 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 623 624 #if 0 625 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 626 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 627 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 628 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 629 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 630 #endif 631 632 _parallel_workers = new WorkGang("G1 Marker", 633 _max_parallel_marking_threads, false, true); 634 if (_parallel_workers == NULL) { 635 vm_exit_during_initialization("Failed necessary allocation."); 636 } else { 637 _parallel_workers->initialize_workers(); 638 } 639 640 if (FLAG_IS_DEFAULT(MarkStackSize)) { 641 size_t mark_stack_size = 642 MIN2(MarkStackSizeMax, 643 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 644 // Verify that the calculated value for MarkStackSize is in range. 645 // It would be nice to use the private utility routine from Arguments. 646 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 647 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 648 "must be between 1 and " SIZE_FORMAT, 649 mark_stack_size, MarkStackSizeMax); 650 return; 651 } 652 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 653 } else { 654 // Verify MarkStackSize is in range. 655 if (FLAG_IS_CMDLINE(MarkStackSize)) { 656 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 657 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 658 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 659 "must be between 1 and " SIZE_FORMAT, 660 MarkStackSize, MarkStackSizeMax); 661 return; 662 } 663 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 664 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 665 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 666 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 667 MarkStackSize, MarkStackSizeMax); 668 return; 669 } 670 } 671 } 672 } 673 674 if (!_markStack.allocate(MarkStackSize)) { 675 warning("Failed to allocate CM marking stack"); 676 return; 677 } 678 679 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 680 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 681 682 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 683 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 684 685 BitMap::idx_t card_bm_size = _card_bm.size(); 686 687 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 688 _active_tasks = _max_worker_id; 689 690 uint max_regions = _g1h->max_regions(); 691 for (uint i = 0; i < _max_worker_id; ++i) { 692 CMTaskQueue* task_queue = new CMTaskQueue(); 693 task_queue->initialize(); 694 _task_queues->register_queue(i, task_queue); 695 696 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 697 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 698 699 _tasks[i] = new CMTask(i, this, 700 _count_marked_bytes[i], 701 &_count_card_bitmaps[i], 702 task_queue, _task_queues); 703 704 _accum_task_vtime[i] = 0.0; 705 } 706 707 // Calculate the card number for the bottom of the heap. Used 708 // in biasing indexes into the accounting card bitmaps. 709 _heap_bottom_card_num = 710 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 711 CardTableModRefBS::card_shift); 712 713 // Clear all the liveness counting data 714 clear_all_count_data(); 715 716 // so that the call below can read a sensible value 717 _heap_start = g1h->reserved_region().start(); 718 set_non_marking_state(); 719 _completed_initialization = true; 720 } 721 722 void ConcurrentMark::reset() { 723 // Starting values for these two. This should be called in a STW 724 // phase. 725 MemRegion reserved = _g1h->g1_reserved(); 726 _heap_start = reserved.start(); 727 _heap_end = reserved.end(); 728 729 // Separated the asserts so that we know which one fires. 730 assert(_heap_start != NULL, "heap bounds should look ok"); 731 assert(_heap_end != NULL, "heap bounds should look ok"); 732 assert(_heap_start < _heap_end, "heap bounds should look ok"); 733 734 // Reset all the marking data structures and any necessary flags 735 reset_marking_state(); 736 737 if (verbose_low()) { 738 gclog_or_tty->print_cr("[global] resetting"); 739 } 740 741 // We do reset all of them, since different phases will use 742 // different number of active threads. So, it's easiest to have all 743 // of them ready. 744 for (uint i = 0; i < _max_worker_id; ++i) { 745 _tasks[i]->reset(_nextMarkBitMap); 746 } 747 748 // we need this to make sure that the flag is on during the evac 749 // pause with initial mark piggy-backed 750 set_concurrent_marking_in_progress(); 751 } 752 753 754 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 755 _markStack.set_should_expand(); 756 _markStack.setEmpty(); // Also clears the _markStack overflow flag 757 if (clear_overflow) { 758 clear_has_overflown(); 759 } else { 760 assert(has_overflown(), "pre-condition"); 761 } 762 _finger = _heap_start; 763 764 for (uint i = 0; i < _max_worker_id; ++i) { 765 CMTaskQueue* queue = _task_queues->queue(i); 766 queue->set_empty(); 767 } 768 } 769 770 void ConcurrentMark::set_concurrency(uint active_tasks) { 771 assert(active_tasks <= _max_worker_id, "we should not have more"); 772 773 _active_tasks = active_tasks; 774 // Need to update the three data structures below according to the 775 // number of active threads for this phase. 776 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 777 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 778 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 779 } 780 781 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 782 set_concurrency(active_tasks); 783 784 _concurrent = concurrent; 785 // We propagate this to all tasks, not just the active ones. 786 for (uint i = 0; i < _max_worker_id; ++i) 787 _tasks[i]->set_concurrent(concurrent); 788 789 if (concurrent) { 790 set_concurrent_marking_in_progress(); 791 } else { 792 // We currently assume that the concurrent flag has been set to 793 // false before we start remark. At this point we should also be 794 // in a STW phase. 795 assert(!concurrent_marking_in_progress(), "invariant"); 796 assert(out_of_regions(), 797 err_msg("only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 798 p2i(_finger), p2i(_heap_end))); 799 } 800 } 801 802 void ConcurrentMark::set_non_marking_state() { 803 // We set the global marking state to some default values when we're 804 // not doing marking. 805 reset_marking_state(); 806 _active_tasks = 0; 807 clear_concurrent_marking_in_progress(); 808 } 809 810 ConcurrentMark::~ConcurrentMark() { 811 // The ConcurrentMark instance is never freed. 812 ShouldNotReachHere(); 813 } 814 815 void ConcurrentMark::clearNextBitmap() { 816 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 817 818 // Make sure that the concurrent mark thread looks to still be in 819 // the current cycle. 820 guarantee(cmThread()->during_cycle(), "invariant"); 821 822 // We are finishing up the current cycle by clearing the next 823 // marking bitmap and getting it ready for the next cycle. During 824 // this time no other cycle can start. So, let's make sure that this 825 // is the case. 826 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 827 828 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 829 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 830 _parallel_workers->run_task(&task); 831 832 // Clear the liveness counting data. If the marking has been aborted, the abort() 833 // call already did that. 834 if (cl.complete()) { 835 clear_all_count_data(); 836 } 837 838 // Repeat the asserts from above. 839 guarantee(cmThread()->during_cycle(), "invariant"); 840 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 841 } 842 843 class CheckBitmapClearHRClosure : public HeapRegionClosure { 844 CMBitMap* _bitmap; 845 bool _error; 846 public: 847 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 848 } 849 850 virtual bool doHeapRegion(HeapRegion* r) { 851 // This closure can be called concurrently to the mutator, so we must make sure 852 // that the result of the getNextMarkedWordAddress() call is compared to the 853 // value passed to it as limit to detect any found bits. 854 // We can use the region's orig_end() for the limit and the comparison value 855 // as it always contains the "real" end of the region that never changes and 856 // has no side effects. 857 // Due to the latter, there can also be no problem with the compiler generating 858 // reloads of the orig_end() call. 859 HeapWord* end = r->orig_end(); 860 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 861 } 862 }; 863 864 bool ConcurrentMark::nextMarkBitmapIsClear() { 865 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 866 _g1h->heap_region_iterate(&cl); 867 return cl.complete(); 868 } 869 870 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 871 public: 872 bool doHeapRegion(HeapRegion* r) { 873 if (!r->is_continues_humongous()) { 874 r->note_start_of_marking(); 875 } 876 return false; 877 } 878 }; 879 880 void ConcurrentMark::checkpointRootsInitialPre() { 881 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 882 G1CollectorPolicy* g1p = g1h->g1_policy(); 883 884 _has_aborted = false; 885 886 // Initialize marking structures. This has to be done in a STW phase. 887 reset(); 888 889 // For each region note start of marking. 890 NoteStartOfMarkHRClosure startcl; 891 g1h->heap_region_iterate(&startcl); 892 } 893 894 895 void ConcurrentMark::checkpointRootsInitialPost() { 896 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 897 898 // If we force an overflow during remark, the remark operation will 899 // actually abort and we'll restart concurrent marking. If we always 900 // force an overflow during remark we'll never actually complete the 901 // marking phase. So, we initialize this here, at the start of the 902 // cycle, so that at the remaining overflow number will decrease at 903 // every remark and we'll eventually not need to cause one. 904 force_overflow_stw()->init(); 905 906 // Start Concurrent Marking weak-reference discovery. 907 ReferenceProcessor* rp = g1h->ref_processor_cm(); 908 // enable ("weak") refs discovery 909 rp->enable_discovery(); 910 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 911 912 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 913 // This is the start of the marking cycle, we're expected all 914 // threads to have SATB queues with active set to false. 915 satb_mq_set.set_active_all_threads(true, /* new active value */ 916 false /* expected_active */); 917 918 _root_regions.prepare_for_scan(); 919 920 // update_g1_committed() will be called at the end of an evac pause 921 // when marking is on. So, it's also called at the end of the 922 // initial-mark pause to update the heap end, if the heap expands 923 // during it. No need to call it here. 924 } 925 926 /* 927 * Notice that in the next two methods, we actually leave the STS 928 * during the barrier sync and join it immediately afterwards. If we 929 * do not do this, the following deadlock can occur: one thread could 930 * be in the barrier sync code, waiting for the other thread to also 931 * sync up, whereas another one could be trying to yield, while also 932 * waiting for the other threads to sync up too. 933 * 934 * Note, however, that this code is also used during remark and in 935 * this case we should not attempt to leave / enter the STS, otherwise 936 * we'll either hit an assert (debug / fastdebug) or deadlock 937 * (product). So we should only leave / enter the STS if we are 938 * operating concurrently. 939 * 940 * Because the thread that does the sync barrier has left the STS, it 941 * is possible to be suspended for a Full GC or an evacuation pause 942 * could occur. This is actually safe, since the entering the sync 943 * barrier is one of the last things do_marking_step() does, and it 944 * doesn't manipulate any data structures afterwards. 945 */ 946 947 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 948 bool barrier_aborted; 949 950 if (verbose_low()) { 951 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 952 } 953 954 { 955 SuspendibleThreadSetLeaver sts_leave(concurrent()); 956 barrier_aborted = !_first_overflow_barrier_sync.enter(); 957 } 958 959 // at this point everyone should have synced up and not be doing any 960 // more work 961 962 if (verbose_low()) { 963 if (barrier_aborted) { 964 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 965 } else { 966 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 967 } 968 } 969 970 if (barrier_aborted) { 971 // If the barrier aborted we ignore the overflow condition and 972 // just abort the whole marking phase as quickly as possible. 973 return; 974 } 975 976 // If we're executing the concurrent phase of marking, reset the marking 977 // state; otherwise the marking state is reset after reference processing, 978 // during the remark pause. 979 // If we reset here as a result of an overflow during the remark we will 980 // see assertion failures from any subsequent set_concurrency_and_phase() 981 // calls. 982 if (concurrent()) { 983 // let the task associated with with worker 0 do this 984 if (worker_id == 0) { 985 // task 0 is responsible for clearing the global data structures 986 // We should be here because of an overflow. During STW we should 987 // not clear the overflow flag since we rely on it being true when 988 // we exit this method to abort the pause and restart concurrent 989 // marking. 990 reset_marking_state(true /* clear_overflow */); 991 force_overflow()->update(); 992 993 if (G1Log::fine()) { 994 gclog_or_tty->gclog_stamp(); 995 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 996 } 997 } 998 } 999 1000 // after this, each task should reset its own data structures then 1001 // then go into the second barrier 1002 } 1003 1004 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 1005 bool barrier_aborted; 1006 1007 if (verbose_low()) { 1008 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 1009 } 1010 1011 { 1012 SuspendibleThreadSetLeaver sts_leave(concurrent()); 1013 barrier_aborted = !_second_overflow_barrier_sync.enter(); 1014 } 1015 1016 // at this point everything should be re-initialized and ready to go 1017 1018 if (verbose_low()) { 1019 if (barrier_aborted) { 1020 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1021 } else { 1022 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1023 } 1024 } 1025 } 1026 1027 #ifndef PRODUCT 1028 void ForceOverflowSettings::init() { 1029 _num_remaining = G1ConcMarkForceOverflow; 1030 _force = false; 1031 update(); 1032 } 1033 1034 void ForceOverflowSettings::update() { 1035 if (_num_remaining > 0) { 1036 _num_remaining -= 1; 1037 _force = true; 1038 } else { 1039 _force = false; 1040 } 1041 } 1042 1043 bool ForceOverflowSettings::should_force() { 1044 if (_force) { 1045 _force = false; 1046 return true; 1047 } else { 1048 return false; 1049 } 1050 } 1051 #endif // !PRODUCT 1052 1053 class CMConcurrentMarkingTask: public AbstractGangTask { 1054 private: 1055 ConcurrentMark* _cm; 1056 ConcurrentMarkThread* _cmt; 1057 1058 public: 1059 void work(uint worker_id) { 1060 assert(Thread::current()->is_ConcurrentGC_thread(), 1061 "this should only be done by a conc GC thread"); 1062 ResourceMark rm; 1063 1064 double start_vtime = os::elapsedVTime(); 1065 1066 { 1067 SuspendibleThreadSetJoiner sts_join; 1068 1069 assert(worker_id < _cm->active_tasks(), "invariant"); 1070 CMTask* the_task = _cm->task(worker_id); 1071 the_task->record_start_time(); 1072 if (!_cm->has_aborted()) { 1073 do { 1074 double start_vtime_sec = os::elapsedVTime(); 1075 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1076 1077 the_task->do_marking_step(mark_step_duration_ms, 1078 true /* do_termination */, 1079 false /* is_serial*/); 1080 1081 double end_vtime_sec = os::elapsedVTime(); 1082 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1083 _cm->clear_has_overflown(); 1084 1085 _cm->do_yield_check(worker_id); 1086 1087 jlong sleep_time_ms; 1088 if (!_cm->has_aborted() && the_task->has_aborted()) { 1089 sleep_time_ms = 1090 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1091 { 1092 SuspendibleThreadSetLeaver sts_leave; 1093 os::sleep(Thread::current(), sleep_time_ms, false); 1094 } 1095 } 1096 } while (!_cm->has_aborted() && the_task->has_aborted()); 1097 } 1098 the_task->record_end_time(); 1099 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1100 } 1101 1102 double end_vtime = os::elapsedVTime(); 1103 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1104 } 1105 1106 CMConcurrentMarkingTask(ConcurrentMark* cm, 1107 ConcurrentMarkThread* cmt) : 1108 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1109 1110 ~CMConcurrentMarkingTask() { } 1111 }; 1112 1113 // Calculates the number of active workers for a concurrent 1114 // phase. 1115 uint ConcurrentMark::calc_parallel_marking_threads() { 1116 uint n_conc_workers = 0; 1117 if (!UseDynamicNumberOfGCThreads || 1118 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1119 !ForceDynamicNumberOfGCThreads)) { 1120 n_conc_workers = max_parallel_marking_threads(); 1121 } else { 1122 n_conc_workers = 1123 AdaptiveSizePolicy::calc_default_active_workers( 1124 max_parallel_marking_threads(), 1125 1, /* Minimum workers */ 1126 parallel_marking_threads(), 1127 Threads::number_of_non_daemon_threads()); 1128 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1129 // that scaling has already gone into "_max_parallel_marking_threads". 1130 } 1131 assert(n_conc_workers > 0, "Always need at least 1"); 1132 return n_conc_workers; 1133 } 1134 1135 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1136 // Currently, only survivors can be root regions. 1137 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1138 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1139 1140 const uintx interval = PrefetchScanIntervalInBytes; 1141 HeapWord* curr = hr->bottom(); 1142 const HeapWord* end = hr->top(); 1143 while (curr < end) { 1144 Prefetch::read(curr, interval); 1145 oop obj = oop(curr); 1146 int size = obj->oop_iterate_size(&cl); 1147 assert(size == obj->size(), "sanity"); 1148 curr += size; 1149 } 1150 } 1151 1152 class CMRootRegionScanTask : public AbstractGangTask { 1153 private: 1154 ConcurrentMark* _cm; 1155 1156 public: 1157 CMRootRegionScanTask(ConcurrentMark* cm) : 1158 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1159 1160 void work(uint worker_id) { 1161 assert(Thread::current()->is_ConcurrentGC_thread(), 1162 "this should only be done by a conc GC thread"); 1163 1164 CMRootRegions* root_regions = _cm->root_regions(); 1165 HeapRegion* hr = root_regions->claim_next(); 1166 while (hr != NULL) { 1167 _cm->scanRootRegion(hr, worker_id); 1168 hr = root_regions->claim_next(); 1169 } 1170 } 1171 }; 1172 1173 void ConcurrentMark::scanRootRegions() { 1174 double scan_start = os::elapsedTime(); 1175 1176 // Start of concurrent marking. 1177 ClassLoaderDataGraph::clear_claimed_marks(); 1178 1179 // scan_in_progress() will have been set to true only if there was 1180 // at least one root region to scan. So, if it's false, we 1181 // should not attempt to do any further work. 1182 if (root_regions()->scan_in_progress()) { 1183 if (G1Log::fine()) { 1184 gclog_or_tty->gclog_stamp(); 1185 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1186 } 1187 1188 _parallel_marking_threads = calc_parallel_marking_threads(); 1189 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1190 "Maximum number of marking threads exceeded"); 1191 uint active_workers = MAX2(1U, parallel_marking_threads()); 1192 1193 CMRootRegionScanTask task(this); 1194 _parallel_workers->set_active_workers(active_workers); 1195 _parallel_workers->run_task(&task); 1196 1197 if (G1Log::fine()) { 1198 gclog_or_tty->gclog_stamp(); 1199 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1200 } 1201 1202 // It's possible that has_aborted() is true here without actually 1203 // aborting the survivor scan earlier. This is OK as it's 1204 // mainly used for sanity checking. 1205 root_regions()->scan_finished(); 1206 } 1207 } 1208 1209 void ConcurrentMark::markFromRoots() { 1210 // we might be tempted to assert that: 1211 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1212 // "inconsistent argument?"); 1213 // However that wouldn't be right, because it's possible that 1214 // a safepoint is indeed in progress as a younger generation 1215 // stop-the-world GC happens even as we mark in this generation. 1216 1217 _restart_for_overflow = false; 1218 force_overflow_conc()->init(); 1219 1220 // _g1h has _n_par_threads 1221 _parallel_marking_threads = calc_parallel_marking_threads(); 1222 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1223 "Maximum number of marking threads exceeded"); 1224 1225 uint active_workers = MAX2(1U, parallel_marking_threads()); 1226 assert(active_workers > 0, "Should have been set"); 1227 1228 // Parallel task terminator is set in "set_concurrency_and_phase()" 1229 set_concurrency_and_phase(active_workers, true /* concurrent */); 1230 1231 CMConcurrentMarkingTask markingTask(this, cmThread()); 1232 _parallel_workers->set_active_workers(active_workers); 1233 _parallel_workers->run_task(&markingTask); 1234 print_stats(); 1235 } 1236 1237 // Helper class to get rid of some boilerplate code. 1238 class G1CMTraceTime : public GCTraceTime { 1239 static bool doit_and_prepend(bool doit) { 1240 if (doit) { 1241 gclog_or_tty->put(' '); 1242 } 1243 return doit; 1244 } 1245 1246 public: 1247 G1CMTraceTime(const char* title, bool doit) 1248 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1249 } 1250 }; 1251 1252 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1253 // world is stopped at this checkpoint 1254 assert(SafepointSynchronize::is_at_safepoint(), 1255 "world should be stopped"); 1256 1257 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1258 1259 // If a full collection has happened, we shouldn't do this. 1260 if (has_aborted()) { 1261 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1262 return; 1263 } 1264 1265 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1266 1267 if (VerifyDuringGC) { 1268 HandleMark hm; // handle scope 1269 g1h->prepare_for_verify(); 1270 Universe::verify(VerifyOption_G1UsePrevMarking, 1271 " VerifyDuringGC:(before)"); 1272 } 1273 g1h->check_bitmaps("Remark Start"); 1274 1275 G1CollectorPolicy* g1p = g1h->g1_policy(); 1276 g1p->record_concurrent_mark_remark_start(); 1277 1278 double start = os::elapsedTime(); 1279 1280 checkpointRootsFinalWork(); 1281 1282 double mark_work_end = os::elapsedTime(); 1283 1284 weakRefsWork(clear_all_soft_refs); 1285 1286 if (has_overflown()) { 1287 // Oops. We overflowed. Restart concurrent marking. 1288 _restart_for_overflow = true; 1289 if (G1TraceMarkStackOverflow) { 1290 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1291 } 1292 1293 // Verify the heap w.r.t. the previous marking bitmap. 1294 if (VerifyDuringGC) { 1295 HandleMark hm; // handle scope 1296 g1h->prepare_for_verify(); 1297 Universe::verify(VerifyOption_G1UsePrevMarking, 1298 " VerifyDuringGC:(overflow)"); 1299 } 1300 1301 // Clear the marking state because we will be restarting 1302 // marking due to overflowing the global mark stack. 1303 reset_marking_state(); 1304 } else { 1305 { 1306 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1307 1308 // Aggregate the per-task counting data that we have accumulated 1309 // while marking. 1310 aggregate_count_data(); 1311 } 1312 1313 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1314 // We're done with marking. 1315 // This is the end of the marking cycle, we're expected all 1316 // threads to have SATB queues with active set to true. 1317 satb_mq_set.set_active_all_threads(false, /* new active value */ 1318 true /* expected_active */); 1319 1320 if (VerifyDuringGC) { 1321 HandleMark hm; // handle scope 1322 g1h->prepare_for_verify(); 1323 Universe::verify(VerifyOption_G1UseNextMarking, 1324 " VerifyDuringGC:(after)"); 1325 } 1326 g1h->check_bitmaps("Remark End"); 1327 assert(!restart_for_overflow(), "sanity"); 1328 // Completely reset the marking state since marking completed 1329 set_non_marking_state(); 1330 } 1331 1332 // Expand the marking stack, if we have to and if we can. 1333 if (_markStack.should_expand()) { 1334 _markStack.expand(); 1335 } 1336 1337 // Statistics 1338 double now = os::elapsedTime(); 1339 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1340 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1341 _remark_times.add((now - start) * 1000.0); 1342 1343 g1p->record_concurrent_mark_remark_end(); 1344 1345 G1CMIsAliveClosure is_alive(g1h); 1346 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1347 } 1348 1349 // Base class of the closures that finalize and verify the 1350 // liveness counting data. 1351 class CMCountDataClosureBase: public HeapRegionClosure { 1352 protected: 1353 G1CollectedHeap* _g1h; 1354 ConcurrentMark* _cm; 1355 CardTableModRefBS* _ct_bs; 1356 1357 BitMap* _region_bm; 1358 BitMap* _card_bm; 1359 1360 // Takes a region that's not empty (i.e., it has at least one 1361 // live object in it and sets its corresponding bit on the region 1362 // bitmap to 1. If the region is "starts humongous" it will also set 1363 // to 1 the bits on the region bitmap that correspond to its 1364 // associated "continues humongous" regions. 1365 void set_bit_for_region(HeapRegion* hr) { 1366 assert(!hr->is_continues_humongous(), "should have filtered those out"); 1367 1368 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1369 if (!hr->is_starts_humongous()) { 1370 // Normal (non-humongous) case: just set the bit. 1371 _region_bm->par_at_put(index, true); 1372 } else { 1373 // Starts humongous case: calculate how many regions are part of 1374 // this humongous region and then set the bit range. 1375 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1376 _region_bm->par_at_put_range(index, end_index, true); 1377 } 1378 } 1379 1380 public: 1381 CMCountDataClosureBase(G1CollectedHeap* g1h, 1382 BitMap* region_bm, BitMap* card_bm): 1383 _g1h(g1h), _cm(g1h->concurrent_mark()), 1384 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1385 _region_bm(region_bm), _card_bm(card_bm) { } 1386 }; 1387 1388 // Closure that calculates the # live objects per region. Used 1389 // for verification purposes during the cleanup pause. 1390 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1391 CMBitMapRO* _bm; 1392 size_t _region_marked_bytes; 1393 1394 public: 1395 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1396 BitMap* region_bm, BitMap* card_bm) : 1397 CMCountDataClosureBase(g1h, region_bm, card_bm), 1398 _bm(bm), _region_marked_bytes(0) { } 1399 1400 bool doHeapRegion(HeapRegion* hr) { 1401 1402 if (hr->is_continues_humongous()) { 1403 // We will ignore these here and process them when their 1404 // associated "starts humongous" region is processed (see 1405 // set_bit_for_heap_region()). Note that we cannot rely on their 1406 // associated "starts humongous" region to have their bit set to 1407 // 1 since, due to the region chunking in the parallel region 1408 // iteration, a "continues humongous" region might be visited 1409 // before its associated "starts humongous". 1410 return false; 1411 } 1412 1413 HeapWord* ntams = hr->next_top_at_mark_start(); 1414 HeapWord* start = hr->bottom(); 1415 1416 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1417 err_msg("Preconditions not met - " 1418 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1419 p2i(start), p2i(ntams), p2i(hr->end()))); 1420 1421 // Find the first marked object at or after "start". 1422 start = _bm->getNextMarkedWordAddress(start, ntams); 1423 1424 size_t marked_bytes = 0; 1425 1426 while (start < ntams) { 1427 oop obj = oop(start); 1428 int obj_sz = obj->size(); 1429 HeapWord* obj_end = start + obj_sz; 1430 1431 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1432 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1433 1434 // Note: if we're looking at the last region in heap - obj_end 1435 // could be actually just beyond the end of the heap; end_idx 1436 // will then correspond to a (non-existent) card that is also 1437 // just beyond the heap. 1438 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1439 // end of object is not card aligned - increment to cover 1440 // all the cards spanned by the object 1441 end_idx += 1; 1442 } 1443 1444 // Set the bits in the card BM for the cards spanned by this object. 1445 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1446 1447 // Add the size of this object to the number of marked bytes. 1448 marked_bytes += (size_t)obj_sz * HeapWordSize; 1449 1450 // Find the next marked object after this one. 1451 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1452 } 1453 1454 // Mark the allocated-since-marking portion... 1455 HeapWord* top = hr->top(); 1456 if (ntams < top) { 1457 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1458 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1459 1460 // Note: if we're looking at the last region in heap - top 1461 // could be actually just beyond the end of the heap; end_idx 1462 // will then correspond to a (non-existent) card that is also 1463 // just beyond the heap. 1464 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1465 // end of object is not card aligned - increment to cover 1466 // all the cards spanned by the object 1467 end_idx += 1; 1468 } 1469 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1470 1471 // This definitely means the region has live objects. 1472 set_bit_for_region(hr); 1473 } 1474 1475 // Update the live region bitmap. 1476 if (marked_bytes > 0) { 1477 set_bit_for_region(hr); 1478 } 1479 1480 // Set the marked bytes for the current region so that 1481 // it can be queried by a calling verification routine 1482 _region_marked_bytes = marked_bytes; 1483 1484 return false; 1485 } 1486 1487 size_t region_marked_bytes() const { return _region_marked_bytes; } 1488 }; 1489 1490 // Heap region closure used for verifying the counting data 1491 // that was accumulated concurrently and aggregated during 1492 // the remark pause. This closure is applied to the heap 1493 // regions during the STW cleanup pause. 1494 1495 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1496 G1CollectedHeap* _g1h; 1497 ConcurrentMark* _cm; 1498 CalcLiveObjectsClosure _calc_cl; 1499 BitMap* _region_bm; // Region BM to be verified 1500 BitMap* _card_bm; // Card BM to be verified 1501 bool _verbose; // verbose output? 1502 1503 BitMap* _exp_region_bm; // Expected Region BM values 1504 BitMap* _exp_card_bm; // Expected card BM values 1505 1506 int _failures; 1507 1508 public: 1509 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1510 BitMap* region_bm, 1511 BitMap* card_bm, 1512 BitMap* exp_region_bm, 1513 BitMap* exp_card_bm, 1514 bool verbose) : 1515 _g1h(g1h), _cm(g1h->concurrent_mark()), 1516 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1517 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1518 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1519 _failures(0) { } 1520 1521 int failures() const { return _failures; } 1522 1523 bool doHeapRegion(HeapRegion* hr) { 1524 if (hr->is_continues_humongous()) { 1525 // We will ignore these here and process them when their 1526 // associated "starts humongous" region is processed (see 1527 // set_bit_for_heap_region()). Note that we cannot rely on their 1528 // associated "starts humongous" region to have their bit set to 1529 // 1 since, due to the region chunking in the parallel region 1530 // iteration, a "continues humongous" region might be visited 1531 // before its associated "starts humongous". 1532 return false; 1533 } 1534 1535 int failures = 0; 1536 1537 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1538 // this region and set the corresponding bits in the expected region 1539 // and card bitmaps. 1540 bool res = _calc_cl.doHeapRegion(hr); 1541 assert(res == false, "should be continuing"); 1542 1543 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1544 Mutex::_no_safepoint_check_flag); 1545 1546 // Verify the marked bytes for this region. 1547 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1548 size_t act_marked_bytes = hr->next_marked_bytes(); 1549 1550 // We're not OK if expected marked bytes > actual marked bytes. It means 1551 // we have missed accounting some objects during the actual marking. 1552 if (exp_marked_bytes > act_marked_bytes) { 1553 if (_verbose) { 1554 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1555 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1556 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1557 } 1558 failures += 1; 1559 } 1560 1561 // Verify the bit, for this region, in the actual and expected 1562 // (which was just calculated) region bit maps. 1563 // We're not OK if the bit in the calculated expected region 1564 // bitmap is set and the bit in the actual region bitmap is not. 1565 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1566 1567 bool expected = _exp_region_bm->at(index); 1568 bool actual = _region_bm->at(index); 1569 if (expected && !actual) { 1570 if (_verbose) { 1571 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1572 "expected: %s, actual: %s", 1573 hr->hrm_index(), 1574 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1575 } 1576 failures += 1; 1577 } 1578 1579 // Verify that the card bit maps for the cards spanned by the current 1580 // region match. We have an error if we have a set bit in the expected 1581 // bit map and the corresponding bit in the actual bitmap is not set. 1582 1583 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1584 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1585 1586 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1587 expected = _exp_card_bm->at(i); 1588 actual = _card_bm->at(i); 1589 1590 if (expected && !actual) { 1591 if (_verbose) { 1592 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1593 "expected: %s, actual: %s", 1594 hr->hrm_index(), i, 1595 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1596 } 1597 failures += 1; 1598 } 1599 } 1600 1601 if (failures > 0 && _verbose) { 1602 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1603 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1604 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1605 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1606 } 1607 1608 _failures += failures; 1609 1610 // We could stop iteration over the heap when we 1611 // find the first violating region by returning true. 1612 return false; 1613 } 1614 }; 1615 1616 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1617 protected: 1618 G1CollectedHeap* _g1h; 1619 ConcurrentMark* _cm; 1620 BitMap* _actual_region_bm; 1621 BitMap* _actual_card_bm; 1622 1623 uint _n_workers; 1624 1625 BitMap* _expected_region_bm; 1626 BitMap* _expected_card_bm; 1627 1628 int _failures; 1629 bool _verbose; 1630 1631 HeapRegionClaimer _hrclaimer; 1632 1633 public: 1634 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1635 BitMap* region_bm, BitMap* card_bm, 1636 BitMap* expected_region_bm, BitMap* expected_card_bm) 1637 : AbstractGangTask("G1 verify final counting"), 1638 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1639 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1640 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1641 _failures(0), _verbose(false), 1642 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1643 assert(VerifyDuringGC, "don't call this otherwise"); 1644 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1645 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1646 1647 _verbose = _cm->verbose_medium(); 1648 } 1649 1650 void work(uint worker_id) { 1651 assert(worker_id < _n_workers, "invariant"); 1652 1653 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1654 _actual_region_bm, _actual_card_bm, 1655 _expected_region_bm, 1656 _expected_card_bm, 1657 _verbose); 1658 1659 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1660 1661 Atomic::add(verify_cl.failures(), &_failures); 1662 } 1663 1664 int failures() const { return _failures; } 1665 }; 1666 1667 // Closure that finalizes the liveness counting data. 1668 // Used during the cleanup pause. 1669 // Sets the bits corresponding to the interval [NTAMS, top] 1670 // (which contains the implicitly live objects) in the 1671 // card liveness bitmap. Also sets the bit for each region, 1672 // containing live data, in the region liveness bitmap. 1673 1674 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1675 public: 1676 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1677 BitMap* region_bm, 1678 BitMap* card_bm) : 1679 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1680 1681 bool doHeapRegion(HeapRegion* hr) { 1682 1683 if (hr->is_continues_humongous()) { 1684 // We will ignore these here and process them when their 1685 // associated "starts humongous" region is processed (see 1686 // set_bit_for_heap_region()). Note that we cannot rely on their 1687 // associated "starts humongous" region to have their bit set to 1688 // 1 since, due to the region chunking in the parallel region 1689 // iteration, a "continues humongous" region might be visited 1690 // before its associated "starts humongous". 1691 return false; 1692 } 1693 1694 HeapWord* ntams = hr->next_top_at_mark_start(); 1695 HeapWord* top = hr->top(); 1696 1697 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1698 1699 // Mark the allocated-since-marking portion... 1700 if (ntams < top) { 1701 // This definitely means the region has live objects. 1702 set_bit_for_region(hr); 1703 1704 // Now set the bits in the card bitmap for [ntams, top) 1705 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1706 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1707 1708 // Note: if we're looking at the last region in heap - top 1709 // could be actually just beyond the end of the heap; end_idx 1710 // will then correspond to a (non-existent) card that is also 1711 // just beyond the heap. 1712 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1713 // end of object is not card aligned - increment to cover 1714 // all the cards spanned by the object 1715 end_idx += 1; 1716 } 1717 1718 assert(end_idx <= _card_bm->size(), 1719 err_msg("oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1720 end_idx, _card_bm->size())); 1721 assert(start_idx < _card_bm->size(), 1722 err_msg("oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1723 start_idx, _card_bm->size())); 1724 1725 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1726 } 1727 1728 // Set the bit for the region if it contains live data 1729 if (hr->next_marked_bytes() > 0) { 1730 set_bit_for_region(hr); 1731 } 1732 1733 return false; 1734 } 1735 }; 1736 1737 class G1ParFinalCountTask: public AbstractGangTask { 1738 protected: 1739 G1CollectedHeap* _g1h; 1740 ConcurrentMark* _cm; 1741 BitMap* _actual_region_bm; 1742 BitMap* _actual_card_bm; 1743 1744 uint _n_workers; 1745 HeapRegionClaimer _hrclaimer; 1746 1747 public: 1748 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1749 : AbstractGangTask("G1 final counting"), 1750 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1751 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1752 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1753 } 1754 1755 void work(uint worker_id) { 1756 assert(worker_id < _n_workers, "invariant"); 1757 1758 FinalCountDataUpdateClosure final_update_cl(_g1h, 1759 _actual_region_bm, 1760 _actual_card_bm); 1761 1762 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1763 } 1764 }; 1765 1766 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1767 G1CollectedHeap* _g1; 1768 size_t _freed_bytes; 1769 FreeRegionList* _local_cleanup_list; 1770 HeapRegionSetCount _old_regions_removed; 1771 HeapRegionSetCount _humongous_regions_removed; 1772 HRRSCleanupTask* _hrrs_cleanup_task; 1773 1774 public: 1775 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1776 FreeRegionList* local_cleanup_list, 1777 HRRSCleanupTask* hrrs_cleanup_task) : 1778 _g1(g1), 1779 _freed_bytes(0), 1780 _local_cleanup_list(local_cleanup_list), 1781 _old_regions_removed(), 1782 _humongous_regions_removed(), 1783 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1784 1785 size_t freed_bytes() { return _freed_bytes; } 1786 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1787 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1788 1789 bool doHeapRegion(HeapRegion *hr) { 1790 if (hr->is_continues_humongous() || hr->is_archive()) { 1791 return false; 1792 } 1793 // We use a claim value of zero here because all regions 1794 // were claimed with value 1 in the FinalCount task. 1795 _g1->reset_gc_time_stamps(hr); 1796 hr->note_end_of_marking(); 1797 1798 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1799 _freed_bytes += hr->used(); 1800 hr->set_containing_set(NULL); 1801 if (hr->is_humongous()) { 1802 assert(hr->is_starts_humongous(), "we should only see starts humongous"); 1803 _humongous_regions_removed.increment(1u, hr->capacity()); 1804 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1805 } else { 1806 _old_regions_removed.increment(1u, hr->capacity()); 1807 _g1->free_region(hr, _local_cleanup_list, true); 1808 } 1809 } else { 1810 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1811 } 1812 1813 return false; 1814 } 1815 }; 1816 1817 class G1ParNoteEndTask: public AbstractGangTask { 1818 friend class G1NoteEndOfConcMarkClosure; 1819 1820 protected: 1821 G1CollectedHeap* _g1h; 1822 FreeRegionList* _cleanup_list; 1823 HeapRegionClaimer _hrclaimer; 1824 1825 public: 1826 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1827 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1828 } 1829 1830 void work(uint worker_id) { 1831 FreeRegionList local_cleanup_list("Local Cleanup List"); 1832 HRRSCleanupTask hrrs_cleanup_task; 1833 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1834 &hrrs_cleanup_task); 1835 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1836 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1837 1838 // Now update the lists 1839 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1840 { 1841 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1842 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1843 1844 // If we iterate over the global cleanup list at the end of 1845 // cleanup to do this printing we will not guarantee to only 1846 // generate output for the newly-reclaimed regions (the list 1847 // might not be empty at the beginning of cleanup; we might 1848 // still be working on its previous contents). So we do the 1849 // printing here, before we append the new regions to the global 1850 // cleanup list. 1851 1852 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1853 if (hr_printer->is_active()) { 1854 FreeRegionListIterator iter(&local_cleanup_list); 1855 while (iter.more_available()) { 1856 HeapRegion* hr = iter.get_next(); 1857 hr_printer->cleanup(hr); 1858 } 1859 } 1860 1861 _cleanup_list->add_ordered(&local_cleanup_list); 1862 assert(local_cleanup_list.is_empty(), "post-condition"); 1863 1864 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1865 } 1866 } 1867 }; 1868 1869 class G1ParScrubRemSetTask: public AbstractGangTask { 1870 protected: 1871 G1RemSet* _g1rs; 1872 BitMap* _region_bm; 1873 BitMap* _card_bm; 1874 HeapRegionClaimer _hrclaimer; 1875 1876 public: 1877 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1878 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1879 } 1880 1881 void work(uint worker_id) { 1882 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1883 } 1884 1885 }; 1886 1887 void ConcurrentMark::cleanup() { 1888 // world is stopped at this checkpoint 1889 assert(SafepointSynchronize::is_at_safepoint(), 1890 "world should be stopped"); 1891 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1892 1893 // If a full collection has happened, we shouldn't do this. 1894 if (has_aborted()) { 1895 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1896 return; 1897 } 1898 1899 g1h->verify_region_sets_optional(); 1900 1901 if (VerifyDuringGC) { 1902 HandleMark hm; // handle scope 1903 g1h->prepare_for_verify(); 1904 Universe::verify(VerifyOption_G1UsePrevMarking, 1905 " VerifyDuringGC:(before)"); 1906 } 1907 g1h->check_bitmaps("Cleanup Start"); 1908 1909 G1CollectorPolicy* g1p = g1h->g1_policy(); 1910 g1p->record_concurrent_mark_cleanup_start(); 1911 1912 double start = os::elapsedTime(); 1913 1914 HeapRegionRemSet::reset_for_cleanup_tasks(); 1915 1916 // Do counting once more with the world stopped for good measure. 1917 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1918 1919 g1h->workers()->run_task(&g1_par_count_task); 1920 1921 if (VerifyDuringGC) { 1922 // Verify that the counting data accumulated during marking matches 1923 // that calculated by walking the marking bitmap. 1924 1925 // Bitmaps to hold expected values 1926 BitMap expected_region_bm(_region_bm.size(), true); 1927 BitMap expected_card_bm(_card_bm.size(), true); 1928 1929 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1930 &_region_bm, 1931 &_card_bm, 1932 &expected_region_bm, 1933 &expected_card_bm); 1934 1935 g1h->workers()->run_task(&g1_par_verify_task); 1936 1937 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1938 } 1939 1940 size_t start_used_bytes = g1h->used(); 1941 g1h->collector_state()->set_mark_in_progress(false); 1942 1943 double count_end = os::elapsedTime(); 1944 double this_final_counting_time = (count_end - start); 1945 _total_counting_time += this_final_counting_time; 1946 1947 if (G1PrintRegionLivenessInfo) { 1948 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1949 _g1h->heap_region_iterate(&cl); 1950 } 1951 1952 // Install newly created mark bitMap as "prev". 1953 swapMarkBitMaps(); 1954 1955 g1h->reset_gc_time_stamp(); 1956 1957 uint n_workers = _g1h->workers()->active_workers(); 1958 1959 // Note end of marking in all heap regions. 1960 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1961 g1h->workers()->run_task(&g1_par_note_end_task); 1962 g1h->check_gc_time_stamps(); 1963 1964 if (!cleanup_list_is_empty()) { 1965 // The cleanup list is not empty, so we'll have to process it 1966 // concurrently. Notify anyone else that might be wanting free 1967 // regions that there will be more free regions coming soon. 1968 g1h->set_free_regions_coming(); 1969 } 1970 1971 // call below, since it affects the metric by which we sort the heap 1972 // regions. 1973 if (G1ScrubRemSets) { 1974 double rs_scrub_start = os::elapsedTime(); 1975 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1976 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1977 1978 double rs_scrub_end = os::elapsedTime(); 1979 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1980 _total_rs_scrub_time += this_rs_scrub_time; 1981 } 1982 1983 // this will also free any regions totally full of garbage objects, 1984 // and sort the regions. 1985 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1986 1987 // Statistics. 1988 double end = os::elapsedTime(); 1989 _cleanup_times.add((end - start) * 1000.0); 1990 1991 if (G1Log::fine()) { 1992 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1993 } 1994 1995 // Clean up will have freed any regions completely full of garbage. 1996 // Update the soft reference policy with the new heap occupancy. 1997 Universe::update_heap_info_at_gc(); 1998 1999 if (VerifyDuringGC) { 2000 HandleMark hm; // handle scope 2001 g1h->prepare_for_verify(); 2002 Universe::verify(VerifyOption_G1UsePrevMarking, 2003 " VerifyDuringGC:(after)"); 2004 } 2005 2006 g1h->check_bitmaps("Cleanup End"); 2007 2008 g1h->verify_region_sets_optional(); 2009 2010 // We need to make this be a "collection" so any collection pause that 2011 // races with it goes around and waits for completeCleanup to finish. 2012 g1h->increment_total_collections(); 2013 2014 // Clean out dead classes and update Metaspace sizes. 2015 if (ClassUnloadingWithConcurrentMark) { 2016 ClassLoaderDataGraph::purge(); 2017 } 2018 MetaspaceGC::compute_new_size(); 2019 2020 // We reclaimed old regions so we should calculate the sizes to make 2021 // sure we update the old gen/space data. 2022 g1h->g1mm()->update_sizes(); 2023 g1h->allocation_context_stats().update_after_mark(); 2024 2025 g1h->trace_heap_after_concurrent_cycle(); 2026 } 2027 2028 void ConcurrentMark::completeCleanup() { 2029 if (has_aborted()) return; 2030 2031 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2032 2033 _cleanup_list.verify_optional(); 2034 FreeRegionList tmp_free_list("Tmp Free List"); 2035 2036 if (G1ConcRegionFreeingVerbose) { 2037 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2038 "cleanup list has %u entries", 2039 _cleanup_list.length()); 2040 } 2041 2042 // No one else should be accessing the _cleanup_list at this point, 2043 // so it is not necessary to take any locks 2044 while (!_cleanup_list.is_empty()) { 2045 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 2046 assert(hr != NULL, "Got NULL from a non-empty list"); 2047 hr->par_clear(); 2048 tmp_free_list.add_ordered(hr); 2049 2050 // Instead of adding one region at a time to the secondary_free_list, 2051 // we accumulate them in the local list and move them a few at a 2052 // time. This also cuts down on the number of notify_all() calls 2053 // we do during this process. We'll also append the local list when 2054 // _cleanup_list is empty (which means we just removed the last 2055 // region from the _cleanup_list). 2056 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2057 _cleanup_list.is_empty()) { 2058 if (G1ConcRegionFreeingVerbose) { 2059 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2060 "appending %u entries to the secondary_free_list, " 2061 "cleanup list still has %u entries", 2062 tmp_free_list.length(), 2063 _cleanup_list.length()); 2064 } 2065 2066 { 2067 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2068 g1h->secondary_free_list_add(&tmp_free_list); 2069 SecondaryFreeList_lock->notify_all(); 2070 } 2071 #ifndef PRODUCT 2072 if (G1StressConcRegionFreeing) { 2073 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2074 os::sleep(Thread::current(), (jlong) 1, false); 2075 } 2076 } 2077 #endif 2078 } 2079 } 2080 assert(tmp_free_list.is_empty(), "post-condition"); 2081 } 2082 2083 // Supporting Object and Oop closures for reference discovery 2084 // and processing in during marking 2085 2086 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2087 HeapWord* addr = (HeapWord*)obj; 2088 return addr != NULL && 2089 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2090 } 2091 2092 // 'Keep Alive' oop closure used by both serial parallel reference processing. 2093 // Uses the CMTask associated with a worker thread (for serial reference 2094 // processing the CMTask for worker 0 is used) to preserve (mark) and 2095 // trace referent objects. 2096 // 2097 // Using the CMTask and embedded local queues avoids having the worker 2098 // threads operating on the global mark stack. This reduces the risk 2099 // of overflowing the stack - which we would rather avoid at this late 2100 // state. Also using the tasks' local queues removes the potential 2101 // of the workers interfering with each other that could occur if 2102 // operating on the global stack. 2103 2104 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2105 ConcurrentMark* _cm; 2106 CMTask* _task; 2107 int _ref_counter_limit; 2108 int _ref_counter; 2109 bool _is_serial; 2110 public: 2111 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2112 _cm(cm), _task(task), _is_serial(is_serial), 2113 _ref_counter_limit(G1RefProcDrainInterval) { 2114 assert(_ref_counter_limit > 0, "sanity"); 2115 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2116 _ref_counter = _ref_counter_limit; 2117 } 2118 2119 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2120 virtual void do_oop( oop* p) { do_oop_work(p); } 2121 2122 template <class T> void do_oop_work(T* p) { 2123 if (!_cm->has_overflown()) { 2124 oop obj = oopDesc::load_decode_heap_oop(p); 2125 if (_cm->verbose_high()) { 2126 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2127 "*" PTR_FORMAT " = " PTR_FORMAT, 2128 _task->worker_id(), p2i(p), p2i((void*) obj)); 2129 } 2130 2131 _task->deal_with_reference(obj); 2132 _ref_counter--; 2133 2134 if (_ref_counter == 0) { 2135 // We have dealt with _ref_counter_limit references, pushing them 2136 // and objects reachable from them on to the local stack (and 2137 // possibly the global stack). Call CMTask::do_marking_step() to 2138 // process these entries. 2139 // 2140 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2141 // there's nothing more to do (i.e. we're done with the entries that 2142 // were pushed as a result of the CMTask::deal_with_reference() calls 2143 // above) or we overflow. 2144 // 2145 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2146 // flag while there may still be some work to do. (See the comment at 2147 // the beginning of CMTask::do_marking_step() for those conditions - 2148 // one of which is reaching the specified time target.) It is only 2149 // when CMTask::do_marking_step() returns without setting the 2150 // has_aborted() flag that the marking step has completed. 2151 do { 2152 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2153 _task->do_marking_step(mark_step_duration_ms, 2154 false /* do_termination */, 2155 _is_serial); 2156 } while (_task->has_aborted() && !_cm->has_overflown()); 2157 _ref_counter = _ref_counter_limit; 2158 } 2159 } else { 2160 if (_cm->verbose_high()) { 2161 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2162 } 2163 } 2164 } 2165 }; 2166 2167 // 'Drain' oop closure used by both serial and parallel reference processing. 2168 // Uses the CMTask associated with a given worker thread (for serial 2169 // reference processing the CMtask for worker 0 is used). Calls the 2170 // do_marking_step routine, with an unbelievably large timeout value, 2171 // to drain the marking data structures of the remaining entries 2172 // added by the 'keep alive' oop closure above. 2173 2174 class G1CMDrainMarkingStackClosure: public VoidClosure { 2175 ConcurrentMark* _cm; 2176 CMTask* _task; 2177 bool _is_serial; 2178 public: 2179 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2180 _cm(cm), _task(task), _is_serial(is_serial) { 2181 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2182 } 2183 2184 void do_void() { 2185 do { 2186 if (_cm->verbose_high()) { 2187 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2188 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2189 } 2190 2191 // We call CMTask::do_marking_step() to completely drain the local 2192 // and global marking stacks of entries pushed by the 'keep alive' 2193 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2194 // 2195 // CMTask::do_marking_step() is called in a loop, which we'll exit 2196 // if there's nothing more to do (i.e. we've completely drained the 2197 // entries that were pushed as a a result of applying the 'keep alive' 2198 // closure to the entries on the discovered ref lists) or we overflow 2199 // the global marking stack. 2200 // 2201 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2202 // flag while there may still be some work to do. (See the comment at 2203 // the beginning of CMTask::do_marking_step() for those conditions - 2204 // one of which is reaching the specified time target.) It is only 2205 // when CMTask::do_marking_step() returns without setting the 2206 // has_aborted() flag that the marking step has completed. 2207 2208 _task->do_marking_step(1000000000.0 /* something very large */, 2209 true /* do_termination */, 2210 _is_serial); 2211 } while (_task->has_aborted() && !_cm->has_overflown()); 2212 } 2213 }; 2214 2215 // Implementation of AbstractRefProcTaskExecutor for parallel 2216 // reference processing at the end of G1 concurrent marking 2217 2218 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2219 private: 2220 G1CollectedHeap* _g1h; 2221 ConcurrentMark* _cm; 2222 WorkGang* _workers; 2223 uint _active_workers; 2224 2225 public: 2226 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2227 ConcurrentMark* cm, 2228 WorkGang* workers, 2229 uint n_workers) : 2230 _g1h(g1h), _cm(cm), 2231 _workers(workers), _active_workers(n_workers) { } 2232 2233 // Executes the given task using concurrent marking worker threads. 2234 virtual void execute(ProcessTask& task); 2235 virtual void execute(EnqueueTask& task); 2236 }; 2237 2238 class G1CMRefProcTaskProxy: public AbstractGangTask { 2239 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2240 ProcessTask& _proc_task; 2241 G1CollectedHeap* _g1h; 2242 ConcurrentMark* _cm; 2243 2244 public: 2245 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2246 G1CollectedHeap* g1h, 2247 ConcurrentMark* cm) : 2248 AbstractGangTask("Process reference objects in parallel"), 2249 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2250 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2251 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2252 } 2253 2254 virtual void work(uint worker_id) { 2255 ResourceMark rm; 2256 HandleMark hm; 2257 CMTask* task = _cm->task(worker_id); 2258 G1CMIsAliveClosure g1_is_alive(_g1h); 2259 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2260 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2261 2262 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2263 } 2264 }; 2265 2266 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2267 assert(_workers != NULL, "Need parallel worker threads."); 2268 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2269 2270 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2271 2272 // We need to reset the concurrency level before each 2273 // proxy task execution, so that the termination protocol 2274 // and overflow handling in CMTask::do_marking_step() knows 2275 // how many workers to wait for. 2276 _cm->set_concurrency(_active_workers); 2277 _workers->run_task(&proc_task_proxy); 2278 } 2279 2280 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2281 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2282 EnqueueTask& _enq_task; 2283 2284 public: 2285 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2286 AbstractGangTask("Enqueue reference objects in parallel"), 2287 _enq_task(enq_task) { } 2288 2289 virtual void work(uint worker_id) { 2290 _enq_task.work(worker_id); 2291 } 2292 }; 2293 2294 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2295 assert(_workers != NULL, "Need parallel worker threads."); 2296 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2297 2298 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2299 2300 // Not strictly necessary but... 2301 // 2302 // We need to reset the concurrency level before each 2303 // proxy task execution, so that the termination protocol 2304 // and overflow handling in CMTask::do_marking_step() knows 2305 // how many workers to wait for. 2306 _cm->set_concurrency(_active_workers); 2307 _workers->run_task(&enq_task_proxy); 2308 } 2309 2310 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2311 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2312 } 2313 2314 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2315 if (has_overflown()) { 2316 // Skip processing the discovered references if we have 2317 // overflown the global marking stack. Reference objects 2318 // only get discovered once so it is OK to not 2319 // de-populate the discovered reference lists. We could have, 2320 // but the only benefit would be that, when marking restarts, 2321 // less reference objects are discovered. 2322 return; 2323 } 2324 2325 ResourceMark rm; 2326 HandleMark hm; 2327 2328 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2329 2330 // Is alive closure. 2331 G1CMIsAliveClosure g1_is_alive(g1h); 2332 2333 // Inner scope to exclude the cleaning of the string and symbol 2334 // tables from the displayed time. 2335 { 2336 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2337 2338 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2339 2340 // See the comment in G1CollectedHeap::ref_processing_init() 2341 // about how reference processing currently works in G1. 2342 2343 // Set the soft reference policy 2344 rp->setup_policy(clear_all_soft_refs); 2345 assert(_markStack.isEmpty(), "mark stack should be empty"); 2346 2347 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2348 // in serial reference processing. Note these closures are also 2349 // used for serially processing (by the the current thread) the 2350 // JNI references during parallel reference processing. 2351 // 2352 // These closures do not need to synchronize with the worker 2353 // threads involved in parallel reference processing as these 2354 // instances are executed serially by the current thread (e.g. 2355 // reference processing is not multi-threaded and is thus 2356 // performed by the current thread instead of a gang worker). 2357 // 2358 // The gang tasks involved in parallel reference processing create 2359 // their own instances of these closures, which do their own 2360 // synchronization among themselves. 2361 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2362 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2363 2364 // We need at least one active thread. If reference processing 2365 // is not multi-threaded we use the current (VMThread) thread, 2366 // otherwise we use the work gang from the G1CollectedHeap and 2367 // we utilize all the worker threads we can. 2368 bool processing_is_mt = rp->processing_is_mt(); 2369 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2370 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2371 2372 // Parallel processing task executor. 2373 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2374 g1h->workers(), active_workers); 2375 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2376 2377 // Set the concurrency level. The phase was already set prior to 2378 // executing the remark task. 2379 set_concurrency(active_workers); 2380 2381 // Set the degree of MT processing here. If the discovery was done MT, 2382 // the number of threads involved during discovery could differ from 2383 // the number of active workers. This is OK as long as the discovered 2384 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2385 rp->set_active_mt_degree(active_workers); 2386 2387 // Process the weak references. 2388 const ReferenceProcessorStats& stats = 2389 rp->process_discovered_references(&g1_is_alive, 2390 &g1_keep_alive, 2391 &g1_drain_mark_stack, 2392 executor, 2393 g1h->gc_timer_cm()); 2394 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2395 2396 // The do_oop work routines of the keep_alive and drain_marking_stack 2397 // oop closures will set the has_overflown flag if we overflow the 2398 // global marking stack. 2399 2400 assert(_markStack.overflow() || _markStack.isEmpty(), 2401 "mark stack should be empty (unless it overflowed)"); 2402 2403 if (_markStack.overflow()) { 2404 // This should have been done already when we tried to push an 2405 // entry on to the global mark stack. But let's do it again. 2406 set_has_overflown(); 2407 } 2408 2409 assert(rp->num_q() == active_workers, "why not"); 2410 2411 rp->enqueue_discovered_references(executor); 2412 2413 rp->verify_no_references_recorded(); 2414 assert(!rp->discovery_enabled(), "Post condition"); 2415 } 2416 2417 if (has_overflown()) { 2418 // We can not trust g1_is_alive if the marking stack overflowed 2419 return; 2420 } 2421 2422 assert(_markStack.isEmpty(), "Marking should have completed"); 2423 2424 // Unload Klasses, String, Symbols, Code Cache, etc. 2425 { 2426 G1CMTraceTime trace("Unloading", G1Log::finer()); 2427 2428 if (ClassUnloadingWithConcurrentMark) { 2429 bool purged_classes; 2430 2431 { 2432 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2433 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2434 } 2435 2436 { 2437 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2438 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2439 } 2440 } 2441 2442 if (G1StringDedup::is_enabled()) { 2443 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2444 G1StringDedup::unlink(&g1_is_alive); 2445 } 2446 } 2447 } 2448 2449 void ConcurrentMark::swapMarkBitMaps() { 2450 CMBitMapRO* temp = _prevMarkBitMap; 2451 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2452 _nextMarkBitMap = (CMBitMap*) temp; 2453 } 2454 2455 // Closure for marking entries in SATB buffers. 2456 class CMSATBBufferClosure : public SATBBufferClosure { 2457 private: 2458 CMTask* _task; 2459 G1CollectedHeap* _g1h; 2460 2461 // This is very similar to CMTask::deal_with_reference, but with 2462 // more relaxed requirements for the argument, so this must be more 2463 // circumspect about treating the argument as an object. 2464 void do_entry(void* entry) const { 2465 _task->increment_refs_reached(); 2466 HeapRegion* hr = _g1h->heap_region_containing_raw(entry); 2467 if (entry < hr->next_top_at_mark_start()) { 2468 // Until we get here, we don't know whether entry refers to a valid 2469 // object; it could instead have been a stale reference. 2470 oop obj = static_cast<oop>(entry); 2471 assert(obj->is_oop(true /* ignore mark word */), 2472 err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj))); 2473 _task->make_reference_grey(obj, hr); 2474 } 2475 } 2476 2477 public: 2478 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2479 : _task(task), _g1h(g1h) { } 2480 2481 virtual void do_buffer(void** buffer, size_t size) { 2482 for (size_t i = 0; i < size; ++i) { 2483 do_entry(buffer[i]); 2484 } 2485 } 2486 }; 2487 2488 class G1RemarkThreadsClosure : public ThreadClosure { 2489 CMSATBBufferClosure _cm_satb_cl; 2490 G1CMOopClosure _cm_cl; 2491 MarkingCodeBlobClosure _code_cl; 2492 int _thread_parity; 2493 2494 public: 2495 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2496 _cm_satb_cl(task, g1h), 2497 _cm_cl(g1h, g1h->concurrent_mark(), task), 2498 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2499 _thread_parity(Threads::thread_claim_parity()) {} 2500 2501 void do_thread(Thread* thread) { 2502 if (thread->is_Java_thread()) { 2503 if (thread->claim_oops_do(true, _thread_parity)) { 2504 JavaThread* jt = (JavaThread*)thread; 2505 2506 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2507 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2508 // * Alive if on the stack of an executing method 2509 // * Weakly reachable otherwise 2510 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2511 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2512 jt->nmethods_do(&_code_cl); 2513 2514 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2515 } 2516 } else if (thread->is_VM_thread()) { 2517 if (thread->claim_oops_do(true, _thread_parity)) { 2518 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2519 } 2520 } 2521 } 2522 }; 2523 2524 class CMRemarkTask: public AbstractGangTask { 2525 private: 2526 ConcurrentMark* _cm; 2527 public: 2528 void work(uint worker_id) { 2529 // Since all available tasks are actually started, we should 2530 // only proceed if we're supposed to be active. 2531 if (worker_id < _cm->active_tasks()) { 2532 CMTask* task = _cm->task(worker_id); 2533 task->record_start_time(); 2534 { 2535 ResourceMark rm; 2536 HandleMark hm; 2537 2538 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2539 Threads::threads_do(&threads_f); 2540 } 2541 2542 do { 2543 task->do_marking_step(1000000000.0 /* something very large */, 2544 true /* do_termination */, 2545 false /* is_serial */); 2546 } while (task->has_aborted() && !_cm->has_overflown()); 2547 // If we overflow, then we do not want to restart. We instead 2548 // want to abort remark and do concurrent marking again. 2549 task->record_end_time(); 2550 } 2551 } 2552 2553 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2554 AbstractGangTask("Par Remark"), _cm(cm) { 2555 _cm->terminator()->reset_for_reuse(active_workers); 2556 } 2557 }; 2558 2559 void ConcurrentMark::checkpointRootsFinalWork() { 2560 ResourceMark rm; 2561 HandleMark hm; 2562 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2563 2564 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2565 2566 g1h->ensure_parsability(false); 2567 2568 // this is remark, so we'll use up all active threads 2569 uint active_workers = g1h->workers()->active_workers(); 2570 set_concurrency_and_phase(active_workers, false /* concurrent */); 2571 // Leave _parallel_marking_threads at it's 2572 // value originally calculated in the ConcurrentMark 2573 // constructor and pass values of the active workers 2574 // through the gang in the task. 2575 2576 { 2577 StrongRootsScope srs(active_workers); 2578 2579 CMRemarkTask remarkTask(this, active_workers); 2580 // We will start all available threads, even if we decide that the 2581 // active_workers will be fewer. The extra ones will just bail out 2582 // immediately. 2583 g1h->workers()->run_task(&remarkTask); 2584 } 2585 2586 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2587 guarantee(has_overflown() || 2588 satb_mq_set.completed_buffers_num() == 0, 2589 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2590 BOOL_TO_STR(has_overflown()), 2591 satb_mq_set.completed_buffers_num())); 2592 2593 print_stats(); 2594 } 2595 2596 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2597 // Note we are overriding the read-only view of the prev map here, via 2598 // the cast. 2599 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2600 } 2601 2602 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2603 _nextMarkBitMap->clearRange(mr); 2604 } 2605 2606 HeapRegion* 2607 ConcurrentMark::claim_region(uint worker_id) { 2608 // "checkpoint" the finger 2609 HeapWord* finger = _finger; 2610 2611 // _heap_end will not change underneath our feet; it only changes at 2612 // yield points. 2613 while (finger < _heap_end) { 2614 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2615 2616 // Note on how this code handles humongous regions. In the 2617 // normal case the finger will reach the start of a "starts 2618 // humongous" (SH) region. Its end will either be the end of the 2619 // last "continues humongous" (CH) region in the sequence, or the 2620 // standard end of the SH region (if the SH is the only region in 2621 // the sequence). That way claim_region() will skip over the CH 2622 // regions. However, there is a subtle race between a CM thread 2623 // executing this method and a mutator thread doing a humongous 2624 // object allocation. The two are not mutually exclusive as the CM 2625 // thread does not need to hold the Heap_lock when it gets 2626 // here. So there is a chance that claim_region() will come across 2627 // a free region that's in the progress of becoming a SH or a CH 2628 // region. In the former case, it will either 2629 // a) Miss the update to the region's end, in which case it will 2630 // visit every subsequent CH region, will find their bitmaps 2631 // empty, and do nothing, or 2632 // b) Will observe the update of the region's end (in which case 2633 // it will skip the subsequent CH regions). 2634 // If it comes across a region that suddenly becomes CH, the 2635 // scenario will be similar to b). So, the race between 2636 // claim_region() and a humongous object allocation might force us 2637 // to do a bit of unnecessary work (due to some unnecessary bitmap 2638 // iterations) but it should not introduce and correctness issues. 2639 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2640 2641 // Above heap_region_containing_raw may return NULL as we always scan claim 2642 // until the end of the heap. In this case, just jump to the next region. 2643 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2644 2645 // Is the gap between reading the finger and doing the CAS too long? 2646 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2647 if (res == finger && curr_region != NULL) { 2648 // we succeeded 2649 HeapWord* bottom = curr_region->bottom(); 2650 HeapWord* limit = curr_region->next_top_at_mark_start(); 2651 2652 if (verbose_low()) { 2653 gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " " 2654 "[" PTR_FORMAT ", " PTR_FORMAT "), " 2655 "limit = " PTR_FORMAT, 2656 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2657 } 2658 2659 // notice that _finger == end cannot be guaranteed here since, 2660 // someone else might have moved the finger even further 2661 assert(_finger >= end, "the finger should have moved forward"); 2662 2663 if (verbose_low()) { 2664 gclog_or_tty->print_cr("[%u] we were successful with region = " 2665 PTR_FORMAT, worker_id, p2i(curr_region)); 2666 } 2667 2668 if (limit > bottom) { 2669 if (verbose_low()) { 2670 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, " 2671 "returning it ", worker_id, p2i(curr_region)); 2672 } 2673 return curr_region; 2674 } else { 2675 assert(limit == bottom, 2676 "the region limit should be at bottom"); 2677 if (verbose_low()) { 2678 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, " 2679 "returning NULL", worker_id, p2i(curr_region)); 2680 } 2681 // we return NULL and the caller should try calling 2682 // claim_region() again. 2683 return NULL; 2684 } 2685 } else { 2686 assert(_finger > finger, "the finger should have moved forward"); 2687 if (verbose_low()) { 2688 if (curr_region == NULL) { 2689 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 2690 "global finger = " PTR_FORMAT ", " 2691 "our finger = " PTR_FORMAT, 2692 worker_id, p2i(_finger), p2i(finger)); 2693 } else { 2694 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2695 "global finger = " PTR_FORMAT ", " 2696 "our finger = " PTR_FORMAT, 2697 worker_id, p2i(_finger), p2i(finger)); 2698 } 2699 } 2700 2701 // read it again 2702 finger = _finger; 2703 } 2704 } 2705 2706 return NULL; 2707 } 2708 2709 #ifndef PRODUCT 2710 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2711 private: 2712 G1CollectedHeap* _g1h; 2713 const char* _phase; 2714 int _info; 2715 2716 public: 2717 VerifyNoCSetOops(const char* phase, int info = -1) : 2718 _g1h(G1CollectedHeap::heap()), 2719 _phase(phase), 2720 _info(info) 2721 { } 2722 2723 void operator()(oop obj) const { 2724 guarantee(obj->is_oop(), 2725 err_msg("Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2726 p2i(obj), _phase, _info)); 2727 guarantee(!_g1h->obj_in_cs(obj), 2728 err_msg("obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2729 p2i(obj), _phase, _info)); 2730 } 2731 }; 2732 2733 void ConcurrentMark::verify_no_cset_oops() { 2734 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2735 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2736 return; 2737 } 2738 2739 // Verify entries on the global mark stack 2740 _markStack.iterate(VerifyNoCSetOops("Stack")); 2741 2742 // Verify entries on the task queues 2743 for (uint i = 0; i < _max_worker_id; ++i) { 2744 CMTaskQueue* queue = _task_queues->queue(i); 2745 queue->iterate(VerifyNoCSetOops("Queue", i)); 2746 } 2747 2748 // Verify the global finger 2749 HeapWord* global_finger = finger(); 2750 if (global_finger != NULL && global_finger < _heap_end) { 2751 // The global finger always points to a heap region boundary. We 2752 // use heap_region_containing_raw() to get the containing region 2753 // given that the global finger could be pointing to a free region 2754 // which subsequently becomes continues humongous. If that 2755 // happens, heap_region_containing() will return the bottom of the 2756 // corresponding starts humongous region and the check below will 2757 // not hold any more. 2758 // Since we always iterate over all regions, we might get a NULL HeapRegion 2759 // here. 2760 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2761 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2762 err_msg("global finger: " PTR_FORMAT " region: " HR_FORMAT, 2763 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 2764 } 2765 2766 // Verify the task fingers 2767 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2768 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2769 CMTask* task = _tasks[i]; 2770 HeapWord* task_finger = task->finger(); 2771 if (task_finger != NULL && task_finger < _heap_end) { 2772 // See above note on the global finger verification. 2773 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2774 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2775 !task_hr->in_collection_set(), 2776 err_msg("task finger: " PTR_FORMAT " region: " HR_FORMAT, 2777 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 2778 } 2779 } 2780 } 2781 #endif // PRODUCT 2782 2783 // Aggregate the counting data that was constructed concurrently 2784 // with marking. 2785 class AggregateCountDataHRClosure: public HeapRegionClosure { 2786 G1CollectedHeap* _g1h; 2787 ConcurrentMark* _cm; 2788 CardTableModRefBS* _ct_bs; 2789 BitMap* _cm_card_bm; 2790 uint _max_worker_id; 2791 2792 public: 2793 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2794 BitMap* cm_card_bm, 2795 uint max_worker_id) : 2796 _g1h(g1h), _cm(g1h->concurrent_mark()), 2797 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2798 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2799 2800 bool doHeapRegion(HeapRegion* hr) { 2801 if (hr->is_continues_humongous()) { 2802 // We will ignore these here and process them when their 2803 // associated "starts humongous" region is processed. 2804 // Note that we cannot rely on their associated 2805 // "starts humongous" region to have their bit set to 1 2806 // since, due to the region chunking in the parallel region 2807 // iteration, a "continues humongous" region might be visited 2808 // before its associated "starts humongous". 2809 return false; 2810 } 2811 2812 HeapWord* start = hr->bottom(); 2813 HeapWord* limit = hr->next_top_at_mark_start(); 2814 HeapWord* end = hr->end(); 2815 2816 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2817 err_msg("Preconditions not met - " 2818 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2819 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2820 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 2821 2822 assert(hr->next_marked_bytes() == 0, "Precondition"); 2823 2824 if (start == limit) { 2825 // NTAMS of this region has not been set so nothing to do. 2826 return false; 2827 } 2828 2829 // 'start' should be in the heap. 2830 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2831 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2832 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2833 2834 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2835 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2836 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2837 2838 // If ntams is not card aligned then we bump card bitmap index 2839 // for limit so that we get the all the cards spanned by 2840 // the object ending at ntams. 2841 // Note: if this is the last region in the heap then ntams 2842 // could be actually just beyond the end of the the heap; 2843 // limit_idx will then correspond to a (non-existent) card 2844 // that is also outside the heap. 2845 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2846 limit_idx += 1; 2847 } 2848 2849 assert(limit_idx <= end_idx, "or else use atomics"); 2850 2851 // Aggregate the "stripe" in the count data associated with hr. 2852 uint hrm_index = hr->hrm_index(); 2853 size_t marked_bytes = 0; 2854 2855 for (uint i = 0; i < _max_worker_id; i += 1) { 2856 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2857 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2858 2859 // Fetch the marked_bytes in this region for task i and 2860 // add it to the running total for this region. 2861 marked_bytes += marked_bytes_array[hrm_index]; 2862 2863 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2864 // into the global card bitmap. 2865 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2866 2867 while (scan_idx < limit_idx) { 2868 assert(task_card_bm->at(scan_idx) == true, "should be"); 2869 _cm_card_bm->set_bit(scan_idx); 2870 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2871 2872 // BitMap::get_next_one_offset() can handle the case when 2873 // its left_offset parameter is greater than its right_offset 2874 // parameter. It does, however, have an early exit if 2875 // left_offset == right_offset. So let's limit the value 2876 // passed in for left offset here. 2877 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2878 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2879 } 2880 } 2881 2882 // Update the marked bytes for this region. 2883 hr->add_to_marked_bytes(marked_bytes); 2884 2885 // Next heap region 2886 return false; 2887 } 2888 }; 2889 2890 class G1AggregateCountDataTask: public AbstractGangTask { 2891 protected: 2892 G1CollectedHeap* _g1h; 2893 ConcurrentMark* _cm; 2894 BitMap* _cm_card_bm; 2895 uint _max_worker_id; 2896 uint _active_workers; 2897 HeapRegionClaimer _hrclaimer; 2898 2899 public: 2900 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2901 ConcurrentMark* cm, 2902 BitMap* cm_card_bm, 2903 uint max_worker_id, 2904 uint n_workers) : 2905 AbstractGangTask("Count Aggregation"), 2906 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2907 _max_worker_id(max_worker_id), 2908 _active_workers(n_workers), 2909 _hrclaimer(_active_workers) { 2910 } 2911 2912 void work(uint worker_id) { 2913 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2914 2915 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2916 } 2917 }; 2918 2919 2920 void ConcurrentMark::aggregate_count_data() { 2921 uint n_workers = _g1h->workers()->active_workers(); 2922 2923 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2924 _max_worker_id, n_workers); 2925 2926 _g1h->workers()->run_task(&g1_par_agg_task); 2927 } 2928 2929 // Clear the per-worker arrays used to store the per-region counting data 2930 void ConcurrentMark::clear_all_count_data() { 2931 // Clear the global card bitmap - it will be filled during 2932 // liveness count aggregation (during remark) and the 2933 // final counting task. 2934 _card_bm.clear(); 2935 2936 // Clear the global region bitmap - it will be filled as part 2937 // of the final counting task. 2938 _region_bm.clear(); 2939 2940 uint max_regions = _g1h->max_regions(); 2941 assert(_max_worker_id > 0, "uninitialized"); 2942 2943 for (uint i = 0; i < _max_worker_id; i += 1) { 2944 BitMap* task_card_bm = count_card_bitmap_for(i); 2945 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2946 2947 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2948 assert(marked_bytes_array != NULL, "uninitialized"); 2949 2950 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2951 task_card_bm->clear(); 2952 } 2953 } 2954 2955 void ConcurrentMark::print_stats() { 2956 if (verbose_stats()) { 2957 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2958 for (size_t i = 0; i < _active_tasks; ++i) { 2959 _tasks[i]->print_stats(); 2960 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2961 } 2962 } 2963 } 2964 2965 // abandon current marking iteration due to a Full GC 2966 void ConcurrentMark::abort() { 2967 if (!cmThread()->during_cycle() || _has_aborted) { 2968 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2969 return; 2970 } 2971 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2972 // concurrent bitmap clearing. 2973 _nextMarkBitMap->clearAll(); 2974 2975 // Note we cannot clear the previous marking bitmap here 2976 // since VerifyDuringGC verifies the objects marked during 2977 // a full GC against the previous bitmap. 2978 2979 // Clear the liveness counting data 2980 clear_all_count_data(); 2981 // Empty mark stack 2982 reset_marking_state(); 2983 for (uint i = 0; i < _max_worker_id; ++i) { 2984 _tasks[i]->clear_region_fields(); 2985 } 2986 _first_overflow_barrier_sync.abort(); 2987 _second_overflow_barrier_sync.abort(); 2988 _has_aborted = true; 2989 2990 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2991 satb_mq_set.abandon_partial_marking(); 2992 // This can be called either during or outside marking, we'll read 2993 // the expected_active value from the SATB queue set. 2994 satb_mq_set.set_active_all_threads( 2995 false, /* new active value */ 2996 satb_mq_set.is_active() /* expected_active */); 2997 2998 _g1h->trace_heap_after_concurrent_cycle(); 2999 _g1h->register_concurrent_cycle_end(); 3000 } 3001 3002 static void print_ms_time_info(const char* prefix, const char* name, 3003 NumberSeq& ns) { 3004 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3005 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3006 if (ns.num() > 0) { 3007 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3008 prefix, ns.sd(), ns.maximum()); 3009 } 3010 } 3011 3012 void ConcurrentMark::print_summary_info() { 3013 gclog_or_tty->print_cr(" Concurrent marking:"); 3014 print_ms_time_info(" ", "init marks", _init_times); 3015 print_ms_time_info(" ", "remarks", _remark_times); 3016 { 3017 print_ms_time_info(" ", "final marks", _remark_mark_times); 3018 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3019 3020 } 3021 print_ms_time_info(" ", "cleanups", _cleanup_times); 3022 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3023 _total_counting_time, 3024 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3025 (double)_cleanup_times.num() 3026 : 0.0)); 3027 if (G1ScrubRemSets) { 3028 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3029 _total_rs_scrub_time, 3030 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3031 (double)_cleanup_times.num() 3032 : 0.0)); 3033 } 3034 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3035 (_init_times.sum() + _remark_times.sum() + 3036 _cleanup_times.sum())/1000.0); 3037 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3038 "(%8.2f s marking).", 3039 cmThread()->vtime_accum(), 3040 cmThread()->vtime_mark_accum()); 3041 } 3042 3043 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3044 _parallel_workers->print_worker_threads_on(st); 3045 } 3046 3047 void ConcurrentMark::print_on_error(outputStream* st) const { 3048 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 3049 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 3050 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 3051 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 3052 } 3053 3054 // We take a break if someone is trying to stop the world. 3055 bool ConcurrentMark::do_yield_check(uint worker_id) { 3056 if (SuspendibleThreadSet::should_yield()) { 3057 if (worker_id == 0) { 3058 _g1h->g1_policy()->record_concurrent_pause(); 3059 } 3060 SuspendibleThreadSet::yield(); 3061 return true; 3062 } else { 3063 return false; 3064 } 3065 } 3066 3067 #ifndef PRODUCT 3068 // for debugging purposes 3069 void ConcurrentMark::print_finger() { 3070 gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT, 3071 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 3072 for (uint i = 0; i < _max_worker_id; ++i) { 3073 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 3074 } 3075 gclog_or_tty->cr(); 3076 } 3077 #endif 3078 3079 // Closure for iteration over bitmaps 3080 class CMBitMapClosure : public BitMapClosure { 3081 private: 3082 // the bitmap that is being iterated over 3083 CMBitMap* _nextMarkBitMap; 3084 ConcurrentMark* _cm; 3085 CMTask* _task; 3086 3087 public: 3088 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3089 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3090 3091 bool do_bit(size_t offset) { 3092 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3093 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3094 assert( addr < _cm->finger(), "invariant"); 3095 3096 statsOnly( _task->increase_objs_found_on_bitmap() ); 3097 assert(addr >= _task->finger(), "invariant"); 3098 3099 // We move that task's local finger along. 3100 _task->move_finger_to(addr); 3101 3102 _task->scan_object(oop(addr)); 3103 // we only partially drain the local queue and global stack 3104 _task->drain_local_queue(true); 3105 _task->drain_global_stack(true); 3106 3107 // if the has_aborted flag has been raised, we need to bail out of 3108 // the iteration 3109 return !_task->has_aborted(); 3110 } 3111 }; 3112 3113 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3114 ConcurrentMark* cm, 3115 CMTask* task) 3116 : _g1h(g1h), _cm(cm), _task(task) { 3117 assert(_ref_processor == NULL, "should be initialized to NULL"); 3118 3119 if (G1UseConcMarkReferenceProcessing) { 3120 _ref_processor = g1h->ref_processor_cm(); 3121 assert(_ref_processor != NULL, "should not be NULL"); 3122 } 3123 } 3124 3125 void CMTask::setup_for_region(HeapRegion* hr) { 3126 assert(hr != NULL, 3127 "claim_region() should have filtered out NULL regions"); 3128 assert(!hr->is_continues_humongous(), 3129 "claim_region() should have filtered out continues humongous regions"); 3130 3131 if (_cm->verbose_low()) { 3132 gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT, 3133 _worker_id, p2i(hr)); 3134 } 3135 3136 _curr_region = hr; 3137 _finger = hr->bottom(); 3138 update_region_limit(); 3139 } 3140 3141 void CMTask::update_region_limit() { 3142 HeapRegion* hr = _curr_region; 3143 HeapWord* bottom = hr->bottom(); 3144 HeapWord* limit = hr->next_top_at_mark_start(); 3145 3146 if (limit == bottom) { 3147 if (_cm->verbose_low()) { 3148 gclog_or_tty->print_cr("[%u] found an empty region " 3149 "[" PTR_FORMAT ", " PTR_FORMAT ")", 3150 _worker_id, p2i(bottom), p2i(limit)); 3151 } 3152 // The region was collected underneath our feet. 3153 // We set the finger to bottom to ensure that the bitmap 3154 // iteration that will follow this will not do anything. 3155 // (this is not a condition that holds when we set the region up, 3156 // as the region is not supposed to be empty in the first place) 3157 _finger = bottom; 3158 } else if (limit >= _region_limit) { 3159 assert(limit >= _finger, "peace of mind"); 3160 } else { 3161 assert(limit < _region_limit, "only way to get here"); 3162 // This can happen under some pretty unusual circumstances. An 3163 // evacuation pause empties the region underneath our feet (NTAMS 3164 // at bottom). We then do some allocation in the region (NTAMS 3165 // stays at bottom), followed by the region being used as a GC 3166 // alloc region (NTAMS will move to top() and the objects 3167 // originally below it will be grayed). All objects now marked in 3168 // the region are explicitly grayed, if below the global finger, 3169 // and we do not need in fact to scan anything else. So, we simply 3170 // set _finger to be limit to ensure that the bitmap iteration 3171 // doesn't do anything. 3172 _finger = limit; 3173 } 3174 3175 _region_limit = limit; 3176 } 3177 3178 void CMTask::giveup_current_region() { 3179 assert(_curr_region != NULL, "invariant"); 3180 if (_cm->verbose_low()) { 3181 gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT, 3182 _worker_id, p2i(_curr_region)); 3183 } 3184 clear_region_fields(); 3185 } 3186 3187 void CMTask::clear_region_fields() { 3188 // Values for these three fields that indicate that we're not 3189 // holding on to a region. 3190 _curr_region = NULL; 3191 _finger = NULL; 3192 _region_limit = NULL; 3193 } 3194 3195 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3196 if (cm_oop_closure == NULL) { 3197 assert(_cm_oop_closure != NULL, "invariant"); 3198 } else { 3199 assert(_cm_oop_closure == NULL, "invariant"); 3200 } 3201 _cm_oop_closure = cm_oop_closure; 3202 } 3203 3204 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3205 guarantee(nextMarkBitMap != NULL, "invariant"); 3206 3207 if (_cm->verbose_low()) { 3208 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3209 } 3210 3211 _nextMarkBitMap = nextMarkBitMap; 3212 clear_region_fields(); 3213 3214 _calls = 0; 3215 _elapsed_time_ms = 0.0; 3216 _termination_time_ms = 0.0; 3217 _termination_start_time_ms = 0.0; 3218 3219 #if _MARKING_STATS_ 3220 _aborted = 0; 3221 _aborted_overflow = 0; 3222 _aborted_cm_aborted = 0; 3223 _aborted_yield = 0; 3224 _aborted_timed_out = 0; 3225 _aborted_satb = 0; 3226 _aborted_termination = 0; 3227 _steal_attempts = 0; 3228 _steals = 0; 3229 _local_pushes = 0; 3230 _local_pops = 0; 3231 _local_max_size = 0; 3232 _objs_scanned = 0; 3233 _global_pushes = 0; 3234 _global_pops = 0; 3235 _global_max_size = 0; 3236 _global_transfers_to = 0; 3237 _global_transfers_from = 0; 3238 _regions_claimed = 0; 3239 _objs_found_on_bitmap = 0; 3240 _satb_buffers_processed = 0; 3241 #endif // _MARKING_STATS_ 3242 } 3243 3244 bool CMTask::should_exit_termination() { 3245 regular_clock_call(); 3246 // This is called when we are in the termination protocol. We should 3247 // quit if, for some reason, this task wants to abort or the global 3248 // stack is not empty (this means that we can get work from it). 3249 return !_cm->mark_stack_empty() || has_aborted(); 3250 } 3251 3252 void CMTask::reached_limit() { 3253 assert(_words_scanned >= _words_scanned_limit || 3254 _refs_reached >= _refs_reached_limit , 3255 "shouldn't have been called otherwise"); 3256 regular_clock_call(); 3257 } 3258 3259 void CMTask::regular_clock_call() { 3260 if (has_aborted()) return; 3261 3262 // First, we need to recalculate the words scanned and refs reached 3263 // limits for the next clock call. 3264 recalculate_limits(); 3265 3266 // During the regular clock call we do the following 3267 3268 // (1) If an overflow has been flagged, then we abort. 3269 if (_cm->has_overflown()) { 3270 set_has_aborted(); 3271 return; 3272 } 3273 3274 // If we are not concurrent (i.e. we're doing remark) we don't need 3275 // to check anything else. The other steps are only needed during 3276 // the concurrent marking phase. 3277 if (!concurrent()) return; 3278 3279 // (2) If marking has been aborted for Full GC, then we also abort. 3280 if (_cm->has_aborted()) { 3281 set_has_aborted(); 3282 statsOnly( ++_aborted_cm_aborted ); 3283 return; 3284 } 3285 3286 double curr_time_ms = os::elapsedVTime() * 1000.0; 3287 3288 // (3) If marking stats are enabled, then we update the step history. 3289 #if _MARKING_STATS_ 3290 if (_words_scanned >= _words_scanned_limit) { 3291 ++_clock_due_to_scanning; 3292 } 3293 if (_refs_reached >= _refs_reached_limit) { 3294 ++_clock_due_to_marking; 3295 } 3296 3297 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3298 _interval_start_time_ms = curr_time_ms; 3299 _all_clock_intervals_ms.add(last_interval_ms); 3300 3301 if (_cm->verbose_medium()) { 3302 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3303 "scanned = " SIZE_FORMAT "%s, refs reached = " SIZE_FORMAT "%s", 3304 _worker_id, last_interval_ms, 3305 _words_scanned, 3306 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3307 _refs_reached, 3308 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3309 } 3310 #endif // _MARKING_STATS_ 3311 3312 // (4) We check whether we should yield. If we have to, then we abort. 3313 if (SuspendibleThreadSet::should_yield()) { 3314 // We should yield. To do this we abort the task. The caller is 3315 // responsible for yielding. 3316 set_has_aborted(); 3317 statsOnly( ++_aborted_yield ); 3318 return; 3319 } 3320 3321 // (5) We check whether we've reached our time quota. If we have, 3322 // then we abort. 3323 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3324 if (elapsed_time_ms > _time_target_ms) { 3325 set_has_aborted(); 3326 _has_timed_out = true; 3327 statsOnly( ++_aborted_timed_out ); 3328 return; 3329 } 3330 3331 // (6) Finally, we check whether there are enough completed STAB 3332 // buffers available for processing. If there are, we abort. 3333 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3334 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3335 if (_cm->verbose_low()) { 3336 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3337 _worker_id); 3338 } 3339 // we do need to process SATB buffers, we'll abort and restart 3340 // the marking task to do so 3341 set_has_aborted(); 3342 statsOnly( ++_aborted_satb ); 3343 return; 3344 } 3345 } 3346 3347 void CMTask::recalculate_limits() { 3348 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3349 _words_scanned_limit = _real_words_scanned_limit; 3350 3351 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3352 _refs_reached_limit = _real_refs_reached_limit; 3353 } 3354 3355 void CMTask::decrease_limits() { 3356 // This is called when we believe that we're going to do an infrequent 3357 // operation which will increase the per byte scanned cost (i.e. move 3358 // entries to/from the global stack). It basically tries to decrease the 3359 // scanning limit so that the clock is called earlier. 3360 3361 if (_cm->verbose_medium()) { 3362 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3363 } 3364 3365 _words_scanned_limit = _real_words_scanned_limit - 3366 3 * words_scanned_period / 4; 3367 _refs_reached_limit = _real_refs_reached_limit - 3368 3 * refs_reached_period / 4; 3369 } 3370 3371 void CMTask::move_entries_to_global_stack() { 3372 // local array where we'll store the entries that will be popped 3373 // from the local queue 3374 oop buffer[global_stack_transfer_size]; 3375 3376 int n = 0; 3377 oop obj; 3378 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3379 buffer[n] = obj; 3380 ++n; 3381 } 3382 3383 if (n > 0) { 3384 // we popped at least one entry from the local queue 3385 3386 statsOnly( ++_global_transfers_to; _local_pops += n ); 3387 3388 if (!_cm->mark_stack_push(buffer, n)) { 3389 if (_cm->verbose_low()) { 3390 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3391 _worker_id); 3392 } 3393 set_has_aborted(); 3394 } else { 3395 // the transfer was successful 3396 3397 if (_cm->verbose_medium()) { 3398 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3399 _worker_id, n); 3400 } 3401 statsOnly( size_t tmp_size = _cm->mark_stack_size(); 3402 if (tmp_size > _global_max_size) { 3403 _global_max_size = tmp_size; 3404 } 3405 _global_pushes += n ); 3406 } 3407 } 3408 3409 // this operation was quite expensive, so decrease the limits 3410 decrease_limits(); 3411 } 3412 3413 void CMTask::get_entries_from_global_stack() { 3414 // local array where we'll store the entries that will be popped 3415 // from the global stack. 3416 oop buffer[global_stack_transfer_size]; 3417 int n; 3418 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3419 assert(n <= global_stack_transfer_size, 3420 "we should not pop more than the given limit"); 3421 if (n > 0) { 3422 // yes, we did actually pop at least one entry 3423 3424 statsOnly( ++_global_transfers_from; _global_pops += n ); 3425 if (_cm->verbose_medium()) { 3426 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3427 _worker_id, n); 3428 } 3429 for (int i = 0; i < n; ++i) { 3430 bool success = _task_queue->push(buffer[i]); 3431 // We only call this when the local queue is empty or under a 3432 // given target limit. So, we do not expect this push to fail. 3433 assert(success, "invariant"); 3434 } 3435 3436 statsOnly( size_t tmp_size = (size_t)_task_queue->size(); 3437 if (tmp_size > _local_max_size) { 3438 _local_max_size = tmp_size; 3439 } 3440 _local_pushes += n ); 3441 } 3442 3443 // this operation was quite expensive, so decrease the limits 3444 decrease_limits(); 3445 } 3446 3447 void CMTask::drain_local_queue(bool partially) { 3448 if (has_aborted()) return; 3449 3450 // Decide what the target size is, depending whether we're going to 3451 // drain it partially (so that other tasks can steal if they run out 3452 // of things to do) or totally (at the very end). 3453 size_t target_size; 3454 if (partially) { 3455 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3456 } else { 3457 target_size = 0; 3458 } 3459 3460 if (_task_queue->size() > target_size) { 3461 if (_cm->verbose_high()) { 3462 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3463 _worker_id, target_size); 3464 } 3465 3466 oop obj; 3467 bool ret = _task_queue->pop_local(obj); 3468 while (ret) { 3469 statsOnly( ++_local_pops ); 3470 3471 if (_cm->verbose_high()) { 3472 gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id, 3473 p2i((void*) obj)); 3474 } 3475 3476 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3477 assert(!_g1h->is_on_master_free_list( 3478 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3479 3480 scan_object(obj); 3481 3482 if (_task_queue->size() <= target_size || has_aborted()) { 3483 ret = false; 3484 } else { 3485 ret = _task_queue->pop_local(obj); 3486 } 3487 } 3488 3489 if (_cm->verbose_high()) { 3490 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3491 _worker_id, _task_queue->size()); 3492 } 3493 } 3494 } 3495 3496 void CMTask::drain_global_stack(bool partially) { 3497 if (has_aborted()) return; 3498 3499 // We have a policy to drain the local queue before we attempt to 3500 // drain the global stack. 3501 assert(partially || _task_queue->size() == 0, "invariant"); 3502 3503 // Decide what the target size is, depending whether we're going to 3504 // drain it partially (so that other tasks can steal if they run out 3505 // of things to do) or totally (at the very end). Notice that, 3506 // because we move entries from the global stack in chunks or 3507 // because another task might be doing the same, we might in fact 3508 // drop below the target. But, this is not a problem. 3509 size_t target_size; 3510 if (partially) { 3511 target_size = _cm->partial_mark_stack_size_target(); 3512 } else { 3513 target_size = 0; 3514 } 3515 3516 if (_cm->mark_stack_size() > target_size) { 3517 if (_cm->verbose_low()) { 3518 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3519 _worker_id, target_size); 3520 } 3521 3522 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3523 get_entries_from_global_stack(); 3524 drain_local_queue(partially); 3525 } 3526 3527 if (_cm->verbose_low()) { 3528 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3529 _worker_id, _cm->mark_stack_size()); 3530 } 3531 } 3532 } 3533 3534 // SATB Queue has several assumptions on whether to call the par or 3535 // non-par versions of the methods. this is why some of the code is 3536 // replicated. We should really get rid of the single-threaded version 3537 // of the code to simplify things. 3538 void CMTask::drain_satb_buffers() { 3539 if (has_aborted()) return; 3540 3541 // We set this so that the regular clock knows that we're in the 3542 // middle of draining buffers and doesn't set the abort flag when it 3543 // notices that SATB buffers are available for draining. It'd be 3544 // very counter productive if it did that. :-) 3545 _draining_satb_buffers = true; 3546 3547 CMSATBBufferClosure satb_cl(this, _g1h); 3548 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3549 3550 // This keeps claiming and applying the closure to completed buffers 3551 // until we run out of buffers or we need to abort. 3552 while (!has_aborted() && 3553 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3554 if (_cm->verbose_medium()) { 3555 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3556 } 3557 statsOnly( ++_satb_buffers_processed ); 3558 regular_clock_call(); 3559 } 3560 3561 _draining_satb_buffers = false; 3562 3563 assert(has_aborted() || 3564 concurrent() || 3565 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3566 3567 // again, this was a potentially expensive operation, decrease the 3568 // limits to get the regular clock call early 3569 decrease_limits(); 3570 } 3571 3572 void CMTask::print_stats() { 3573 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3574 _worker_id, _calls); 3575 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3576 _elapsed_time_ms, _termination_time_ms); 3577 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3578 _step_times_ms.num(), _step_times_ms.avg(), 3579 _step_times_ms.sd()); 3580 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3581 _step_times_ms.maximum(), _step_times_ms.sum()); 3582 3583 #if _MARKING_STATS_ 3584 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3585 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3586 _all_clock_intervals_ms.sd()); 3587 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3588 _all_clock_intervals_ms.maximum(), 3589 _all_clock_intervals_ms.sum()); 3590 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT, 3591 _clock_due_to_scanning, _clock_due_to_marking); 3592 gclog_or_tty->print_cr(" Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT, 3593 _objs_scanned, _objs_found_on_bitmap); 3594 gclog_or_tty->print_cr(" Local Queue: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3595 _local_pushes, _local_pops, _local_max_size); 3596 gclog_or_tty->print_cr(" Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3597 _global_pushes, _global_pops, _global_max_size); 3598 gclog_or_tty->print_cr(" transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT, 3599 _global_transfers_to,_global_transfers_from); 3600 gclog_or_tty->print_cr(" Regions: claimed = " SIZE_FORMAT, _regions_claimed); 3601 gclog_or_tty->print_cr(" SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed); 3602 gclog_or_tty->print_cr(" Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT, 3603 _steal_attempts, _steals); 3604 gclog_or_tty->print_cr(" Aborted: " SIZE_FORMAT ", due to", _aborted); 3605 gclog_or_tty->print_cr(" overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT, 3606 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3607 gclog_or_tty->print_cr(" time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT, 3608 _aborted_timed_out, _aborted_satb, _aborted_termination); 3609 #endif // _MARKING_STATS_ 3610 } 3611 3612 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3613 return _task_queues->steal(worker_id, hash_seed, obj); 3614 } 3615 3616 /***************************************************************************** 3617 3618 The do_marking_step(time_target_ms, ...) method is the building 3619 block of the parallel marking framework. It can be called in parallel 3620 with other invocations of do_marking_step() on different tasks 3621 (but only one per task, obviously) and concurrently with the 3622 mutator threads, or during remark, hence it eliminates the need 3623 for two versions of the code. When called during remark, it will 3624 pick up from where the task left off during the concurrent marking 3625 phase. Interestingly, tasks are also claimable during evacuation 3626 pauses too, since do_marking_step() ensures that it aborts before 3627 it needs to yield. 3628 3629 The data structures that it uses to do marking work are the 3630 following: 3631 3632 (1) Marking Bitmap. If there are gray objects that appear only 3633 on the bitmap (this happens either when dealing with an overflow 3634 or when the initial marking phase has simply marked the roots 3635 and didn't push them on the stack), then tasks claim heap 3636 regions whose bitmap they then scan to find gray objects. A 3637 global finger indicates where the end of the last claimed region 3638 is. A local finger indicates how far into the region a task has 3639 scanned. The two fingers are used to determine how to gray an 3640 object (i.e. whether simply marking it is OK, as it will be 3641 visited by a task in the future, or whether it needs to be also 3642 pushed on a stack). 3643 3644 (2) Local Queue. The local queue of the task which is accessed 3645 reasonably efficiently by the task. Other tasks can steal from 3646 it when they run out of work. Throughout the marking phase, a 3647 task attempts to keep its local queue short but not totally 3648 empty, so that entries are available for stealing by other 3649 tasks. Only when there is no more work, a task will totally 3650 drain its local queue. 3651 3652 (3) Global Mark Stack. This handles local queue overflow. During 3653 marking only sets of entries are moved between it and the local 3654 queues, as access to it requires a mutex and more fine-grain 3655 interaction with it which might cause contention. If it 3656 overflows, then the marking phase should restart and iterate 3657 over the bitmap to identify gray objects. Throughout the marking 3658 phase, tasks attempt to keep the global mark stack at a small 3659 length but not totally empty, so that entries are available for 3660 popping by other tasks. Only when there is no more work, tasks 3661 will totally drain the global mark stack. 3662 3663 (4) SATB Buffer Queue. This is where completed SATB buffers are 3664 made available. Buffers are regularly removed from this queue 3665 and scanned for roots, so that the queue doesn't get too 3666 long. During remark, all completed buffers are processed, as 3667 well as the filled in parts of any uncompleted buffers. 3668 3669 The do_marking_step() method tries to abort when the time target 3670 has been reached. There are a few other cases when the 3671 do_marking_step() method also aborts: 3672 3673 (1) When the marking phase has been aborted (after a Full GC). 3674 3675 (2) When a global overflow (on the global stack) has been 3676 triggered. Before the task aborts, it will actually sync up with 3677 the other tasks to ensure that all the marking data structures 3678 (local queues, stacks, fingers etc.) are re-initialized so that 3679 when do_marking_step() completes, the marking phase can 3680 immediately restart. 3681 3682 (3) When enough completed SATB buffers are available. The 3683 do_marking_step() method only tries to drain SATB buffers right 3684 at the beginning. So, if enough buffers are available, the 3685 marking step aborts and the SATB buffers are processed at 3686 the beginning of the next invocation. 3687 3688 (4) To yield. when we have to yield then we abort and yield 3689 right at the end of do_marking_step(). This saves us from a lot 3690 of hassle as, by yielding we might allow a Full GC. If this 3691 happens then objects will be compacted underneath our feet, the 3692 heap might shrink, etc. We save checking for this by just 3693 aborting and doing the yield right at the end. 3694 3695 From the above it follows that the do_marking_step() method should 3696 be called in a loop (or, otherwise, regularly) until it completes. 3697 3698 If a marking step completes without its has_aborted() flag being 3699 true, it means it has completed the current marking phase (and 3700 also all other marking tasks have done so and have all synced up). 3701 3702 A method called regular_clock_call() is invoked "regularly" (in 3703 sub ms intervals) throughout marking. It is this clock method that 3704 checks all the abort conditions which were mentioned above and 3705 decides when the task should abort. A work-based scheme is used to 3706 trigger this clock method: when the number of object words the 3707 marking phase has scanned or the number of references the marking 3708 phase has visited reach a given limit. Additional invocations to 3709 the method clock have been planted in a few other strategic places 3710 too. The initial reason for the clock method was to avoid calling 3711 vtime too regularly, as it is quite expensive. So, once it was in 3712 place, it was natural to piggy-back all the other conditions on it 3713 too and not constantly check them throughout the code. 3714 3715 If do_termination is true then do_marking_step will enter its 3716 termination protocol. 3717 3718 The value of is_serial must be true when do_marking_step is being 3719 called serially (i.e. by the VMThread) and do_marking_step should 3720 skip any synchronization in the termination and overflow code. 3721 Examples include the serial remark code and the serial reference 3722 processing closures. 3723 3724 The value of is_serial must be false when do_marking_step is 3725 being called by any of the worker threads in a work gang. 3726 Examples include the concurrent marking code (CMMarkingTask), 3727 the MT remark code, and the MT reference processing closures. 3728 3729 *****************************************************************************/ 3730 3731 void CMTask::do_marking_step(double time_target_ms, 3732 bool do_termination, 3733 bool is_serial) { 3734 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3735 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3736 3737 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3738 assert(_task_queues != NULL, "invariant"); 3739 assert(_task_queue != NULL, "invariant"); 3740 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3741 3742 assert(!_claimed, 3743 "only one thread should claim this task at any one time"); 3744 3745 // OK, this doesn't safeguard again all possible scenarios, as it is 3746 // possible for two threads to set the _claimed flag at the same 3747 // time. But it is only for debugging purposes anyway and it will 3748 // catch most problems. 3749 _claimed = true; 3750 3751 _start_time_ms = os::elapsedVTime() * 1000.0; 3752 statsOnly( _interval_start_time_ms = _start_time_ms ); 3753 3754 // If do_stealing is true then do_marking_step will attempt to 3755 // steal work from the other CMTasks. It only makes sense to 3756 // enable stealing when the termination protocol is enabled 3757 // and do_marking_step() is not being called serially. 3758 bool do_stealing = do_termination && !is_serial; 3759 3760 double diff_prediction_ms = 3761 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3762 _time_target_ms = time_target_ms - diff_prediction_ms; 3763 3764 // set up the variables that are used in the work-based scheme to 3765 // call the regular clock method 3766 _words_scanned = 0; 3767 _refs_reached = 0; 3768 recalculate_limits(); 3769 3770 // clear all flags 3771 clear_has_aborted(); 3772 _has_timed_out = false; 3773 _draining_satb_buffers = false; 3774 3775 ++_calls; 3776 3777 if (_cm->verbose_low()) { 3778 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 3779 "target = %1.2lfms >>>>>>>>>>", 3780 _worker_id, _calls, _time_target_ms); 3781 } 3782 3783 // Set up the bitmap and oop closures. Anything that uses them is 3784 // eventually called from this method, so it is OK to allocate these 3785 // statically. 3786 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3787 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3788 set_cm_oop_closure(&cm_oop_closure); 3789 3790 if (_cm->has_overflown()) { 3791 // This can happen if the mark stack overflows during a GC pause 3792 // and this task, after a yield point, restarts. We have to abort 3793 // as we need to get into the overflow protocol which happens 3794 // right at the end of this task. 3795 set_has_aborted(); 3796 } 3797 3798 // First drain any available SATB buffers. After this, we will not 3799 // look at SATB buffers before the next invocation of this method. 3800 // If enough completed SATB buffers are queued up, the regular clock 3801 // will abort this task so that it restarts. 3802 drain_satb_buffers(); 3803 // ...then partially drain the local queue and the global stack 3804 drain_local_queue(true); 3805 drain_global_stack(true); 3806 3807 do { 3808 if (!has_aborted() && _curr_region != NULL) { 3809 // This means that we're already holding on to a region. 3810 assert(_finger != NULL, "if region is not NULL, then the finger " 3811 "should not be NULL either"); 3812 3813 // We might have restarted this task after an evacuation pause 3814 // which might have evacuated the region we're holding on to 3815 // underneath our feet. Let's read its limit again to make sure 3816 // that we do not iterate over a region of the heap that 3817 // contains garbage (update_region_limit() will also move 3818 // _finger to the start of the region if it is found empty). 3819 update_region_limit(); 3820 // We will start from _finger not from the start of the region, 3821 // as we might be restarting this task after aborting half-way 3822 // through scanning this region. In this case, _finger points to 3823 // the address where we last found a marked object. If this is a 3824 // fresh region, _finger points to start(). 3825 MemRegion mr = MemRegion(_finger, _region_limit); 3826 3827 if (_cm->verbose_low()) { 3828 gclog_or_tty->print_cr("[%u] we're scanning part " 3829 "[" PTR_FORMAT ", " PTR_FORMAT ") " 3830 "of region " HR_FORMAT, 3831 _worker_id, p2i(_finger), p2i(_region_limit), 3832 HR_FORMAT_PARAMS(_curr_region)); 3833 } 3834 3835 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3836 "humongous regions should go around loop once only"); 3837 3838 // Some special cases: 3839 // If the memory region is empty, we can just give up the region. 3840 // If the current region is humongous then we only need to check 3841 // the bitmap for the bit associated with the start of the object, 3842 // scan the object if it's live, and give up the region. 3843 // Otherwise, let's iterate over the bitmap of the part of the region 3844 // that is left. 3845 // If the iteration is successful, give up the region. 3846 if (mr.is_empty()) { 3847 giveup_current_region(); 3848 regular_clock_call(); 3849 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3850 if (_nextMarkBitMap->isMarked(mr.start())) { 3851 // The object is marked - apply the closure 3852 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3853 bitmap_closure.do_bit(offset); 3854 } 3855 // Even if this task aborted while scanning the humongous object 3856 // we can (and should) give up the current region. 3857 giveup_current_region(); 3858 regular_clock_call(); 3859 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3860 giveup_current_region(); 3861 regular_clock_call(); 3862 } else { 3863 assert(has_aborted(), "currently the only way to do so"); 3864 // The only way to abort the bitmap iteration is to return 3865 // false from the do_bit() method. However, inside the 3866 // do_bit() method we move the _finger to point to the 3867 // object currently being looked at. So, if we bail out, we 3868 // have definitely set _finger to something non-null. 3869 assert(_finger != NULL, "invariant"); 3870 3871 // Region iteration was actually aborted. So now _finger 3872 // points to the address of the object we last scanned. If we 3873 // leave it there, when we restart this task, we will rescan 3874 // the object. It is easy to avoid this. We move the finger by 3875 // enough to point to the next possible object header (the 3876 // bitmap knows by how much we need to move it as it knows its 3877 // granularity). 3878 assert(_finger < _region_limit, "invariant"); 3879 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3880 // Check if bitmap iteration was aborted while scanning the last object 3881 if (new_finger >= _region_limit) { 3882 giveup_current_region(); 3883 } else { 3884 move_finger_to(new_finger); 3885 } 3886 } 3887 } 3888 // At this point we have either completed iterating over the 3889 // region we were holding on to, or we have aborted. 3890 3891 // We then partially drain the local queue and the global stack. 3892 // (Do we really need this?) 3893 drain_local_queue(true); 3894 drain_global_stack(true); 3895 3896 // Read the note on the claim_region() method on why it might 3897 // return NULL with potentially more regions available for 3898 // claiming and why we have to check out_of_regions() to determine 3899 // whether we're done or not. 3900 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3901 // We are going to try to claim a new region. We should have 3902 // given up on the previous one. 3903 // Separated the asserts so that we know which one fires. 3904 assert(_curr_region == NULL, "invariant"); 3905 assert(_finger == NULL, "invariant"); 3906 assert(_region_limit == NULL, "invariant"); 3907 if (_cm->verbose_low()) { 3908 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 3909 } 3910 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3911 if (claimed_region != NULL) { 3912 // Yes, we managed to claim one 3913 statsOnly( ++_regions_claimed ); 3914 3915 if (_cm->verbose_low()) { 3916 gclog_or_tty->print_cr("[%u] we successfully claimed " 3917 "region " PTR_FORMAT, 3918 _worker_id, p2i(claimed_region)); 3919 } 3920 3921 setup_for_region(claimed_region); 3922 assert(_curr_region == claimed_region, "invariant"); 3923 } 3924 // It is important to call the regular clock here. It might take 3925 // a while to claim a region if, for example, we hit a large 3926 // block of empty regions. So we need to call the regular clock 3927 // method once round the loop to make sure it's called 3928 // frequently enough. 3929 regular_clock_call(); 3930 } 3931 3932 if (!has_aborted() && _curr_region == NULL) { 3933 assert(_cm->out_of_regions(), 3934 "at this point we should be out of regions"); 3935 } 3936 } while ( _curr_region != NULL && !has_aborted()); 3937 3938 if (!has_aborted()) { 3939 // We cannot check whether the global stack is empty, since other 3940 // tasks might be pushing objects to it concurrently. 3941 assert(_cm->out_of_regions(), 3942 "at this point we should be out of regions"); 3943 3944 if (_cm->verbose_low()) { 3945 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 3946 } 3947 3948 // Try to reduce the number of available SATB buffers so that 3949 // remark has less work to do. 3950 drain_satb_buffers(); 3951 } 3952 3953 // Since we've done everything else, we can now totally drain the 3954 // local queue and global stack. 3955 drain_local_queue(false); 3956 drain_global_stack(false); 3957 3958 // Attempt at work stealing from other task's queues. 3959 if (do_stealing && !has_aborted()) { 3960 // We have not aborted. This means that we have finished all that 3961 // we could. Let's try to do some stealing... 3962 3963 // We cannot check whether the global stack is empty, since other 3964 // tasks might be pushing objects to it concurrently. 3965 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3966 "only way to reach here"); 3967 3968 if (_cm->verbose_low()) { 3969 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 3970 } 3971 3972 while (!has_aborted()) { 3973 oop obj; 3974 statsOnly( ++_steal_attempts ); 3975 3976 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3977 if (_cm->verbose_medium()) { 3978 gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully", 3979 _worker_id, p2i((void*) obj)); 3980 } 3981 3982 statsOnly( ++_steals ); 3983 3984 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3985 "any stolen object should be marked"); 3986 scan_object(obj); 3987 3988 // And since we're towards the end, let's totally drain the 3989 // local queue and global stack. 3990 drain_local_queue(false); 3991 drain_global_stack(false); 3992 } else { 3993 break; 3994 } 3995 } 3996 } 3997 3998 // If we are about to wrap up and go into termination, check if we 3999 // should raise the overflow flag. 4000 if (do_termination && !has_aborted()) { 4001 if (_cm->force_overflow()->should_force()) { 4002 _cm->set_has_overflown(); 4003 regular_clock_call(); 4004 } 4005 } 4006 4007 // We still haven't aborted. Now, let's try to get into the 4008 // termination protocol. 4009 if (do_termination && !has_aborted()) { 4010 // We cannot check whether the global stack is empty, since other 4011 // tasks might be concurrently pushing objects on it. 4012 // Separated the asserts so that we know which one fires. 4013 assert(_cm->out_of_regions(), "only way to reach here"); 4014 assert(_task_queue->size() == 0, "only way to reach here"); 4015 4016 if (_cm->verbose_low()) { 4017 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 4018 } 4019 4020 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4021 4022 // The CMTask class also extends the TerminatorTerminator class, 4023 // hence its should_exit_termination() method will also decide 4024 // whether to exit the termination protocol or not. 4025 bool finished = (is_serial || 4026 _cm->terminator()->offer_termination(this)); 4027 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4028 _termination_time_ms += 4029 termination_end_time_ms - _termination_start_time_ms; 4030 4031 if (finished) { 4032 // We're all done. 4033 4034 if (_worker_id == 0) { 4035 // let's allow task 0 to do this 4036 if (concurrent()) { 4037 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4038 // we need to set this to false before the next 4039 // safepoint. This way we ensure that the marking phase 4040 // doesn't observe any more heap expansions. 4041 _cm->clear_concurrent_marking_in_progress(); 4042 } 4043 } 4044 4045 // We can now guarantee that the global stack is empty, since 4046 // all other tasks have finished. We separated the guarantees so 4047 // that, if a condition is false, we can immediately find out 4048 // which one. 4049 guarantee(_cm->out_of_regions(), "only way to reach here"); 4050 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4051 guarantee(_task_queue->size() == 0, "only way to reach here"); 4052 guarantee(!_cm->has_overflown(), "only way to reach here"); 4053 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4054 4055 if (_cm->verbose_low()) { 4056 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 4057 } 4058 } else { 4059 // Apparently there's more work to do. Let's abort this task. It 4060 // will restart it and we can hopefully find more things to do. 4061 4062 if (_cm->verbose_low()) { 4063 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 4064 _worker_id); 4065 } 4066 4067 set_has_aborted(); 4068 statsOnly( ++_aborted_termination ); 4069 } 4070 } 4071 4072 // Mainly for debugging purposes to make sure that a pointer to the 4073 // closure which was statically allocated in this frame doesn't 4074 // escape it by accident. 4075 set_cm_oop_closure(NULL); 4076 double end_time_ms = os::elapsedVTime() * 1000.0; 4077 double elapsed_time_ms = end_time_ms - _start_time_ms; 4078 // Update the step history. 4079 _step_times_ms.add(elapsed_time_ms); 4080 4081 if (has_aborted()) { 4082 // The task was aborted for some reason. 4083 4084 statsOnly( ++_aborted ); 4085 4086 if (_has_timed_out) { 4087 double diff_ms = elapsed_time_ms - _time_target_ms; 4088 // Keep statistics of how well we did with respect to hitting 4089 // our target only if we actually timed out (if we aborted for 4090 // other reasons, then the results might get skewed). 4091 _marking_step_diffs_ms.add(diff_ms); 4092 } 4093 4094 if (_cm->has_overflown()) { 4095 // This is the interesting one. We aborted because a global 4096 // overflow was raised. This means we have to restart the 4097 // marking phase and start iterating over regions. However, in 4098 // order to do this we have to make sure that all tasks stop 4099 // what they are doing and re-initialize in a safe manner. We 4100 // will achieve this with the use of two barrier sync points. 4101 4102 if (_cm->verbose_low()) { 4103 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4104 } 4105 4106 if (!is_serial) { 4107 // We only need to enter the sync barrier if being called 4108 // from a parallel context 4109 _cm->enter_first_sync_barrier(_worker_id); 4110 4111 // When we exit this sync barrier we know that all tasks have 4112 // stopped doing marking work. So, it's now safe to 4113 // re-initialize our data structures. At the end of this method, 4114 // task 0 will clear the global data structures. 4115 } 4116 4117 statsOnly( ++_aborted_overflow ); 4118 4119 // We clear the local state of this task... 4120 clear_region_fields(); 4121 4122 if (!is_serial) { 4123 // ...and enter the second barrier. 4124 _cm->enter_second_sync_barrier(_worker_id); 4125 } 4126 // At this point, if we're during the concurrent phase of 4127 // marking, everything has been re-initialized and we're 4128 // ready to restart. 4129 } 4130 4131 if (_cm->verbose_low()) { 4132 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4133 "elapsed = %1.2lfms <<<<<<<<<<", 4134 _worker_id, _time_target_ms, elapsed_time_ms); 4135 if (_cm->has_aborted()) { 4136 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4137 _worker_id); 4138 } 4139 } 4140 } else { 4141 if (_cm->verbose_low()) { 4142 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4143 "elapsed = %1.2lfms <<<<<<<<<<", 4144 _worker_id, _time_target_ms, elapsed_time_ms); 4145 } 4146 } 4147 4148 _claimed = false; 4149 } 4150 4151 CMTask::CMTask(uint worker_id, 4152 ConcurrentMark* cm, 4153 size_t* marked_bytes, 4154 BitMap* card_bm, 4155 CMTaskQueue* task_queue, 4156 CMTaskQueueSet* task_queues) 4157 : _g1h(G1CollectedHeap::heap()), 4158 _worker_id(worker_id), _cm(cm), 4159 _claimed(false), 4160 _nextMarkBitMap(NULL), _hash_seed(17), 4161 _task_queue(task_queue), 4162 _task_queues(task_queues), 4163 _cm_oop_closure(NULL), 4164 _marked_bytes_array(marked_bytes), 4165 _card_bm(card_bm) { 4166 guarantee(task_queue != NULL, "invariant"); 4167 guarantee(task_queues != NULL, "invariant"); 4168 4169 statsOnly( _clock_due_to_scanning = 0; 4170 _clock_due_to_marking = 0 ); 4171 4172 _marking_step_diffs_ms.add(0.5); 4173 } 4174 4175 // These are formatting macros that are used below to ensure 4176 // consistent formatting. The *_H_* versions are used to format the 4177 // header for a particular value and they should be kept consistent 4178 // with the corresponding macro. Also note that most of the macros add 4179 // the necessary white space (as a prefix) which makes them a bit 4180 // easier to compose. 4181 4182 // All the output lines are prefixed with this string to be able to 4183 // identify them easily in a large log file. 4184 #define G1PPRL_LINE_PREFIX "###" 4185 4186 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 4187 #ifdef _LP64 4188 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4189 #else // _LP64 4190 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4191 #endif // _LP64 4192 4193 // For per-region info 4194 #define G1PPRL_TYPE_FORMAT " %-4s" 4195 #define G1PPRL_TYPE_H_FORMAT " %4s" 4196 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 4197 #define G1PPRL_BYTE_H_FORMAT " %9s" 4198 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4199 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4200 4201 // For summary info 4202 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 4203 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 4204 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 4205 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 4206 4207 G1PrintRegionLivenessInfoClosure:: 4208 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4209 : _out(out), 4210 _total_used_bytes(0), _total_capacity_bytes(0), 4211 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4212 _hum_used_bytes(0), _hum_capacity_bytes(0), 4213 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4214 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4215 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4216 MemRegion g1_reserved = g1h->g1_reserved(); 4217 double now = os::elapsedTime(); 4218 4219 // Print the header of the output. 4220 _out->cr(); 4221 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4222 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4223 G1PPRL_SUM_ADDR_FORMAT("reserved") 4224 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4225 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4226 HeapRegion::GrainBytes); 4227 _out->print_cr(G1PPRL_LINE_PREFIX); 4228 _out->print_cr(G1PPRL_LINE_PREFIX 4229 G1PPRL_TYPE_H_FORMAT 4230 G1PPRL_ADDR_BASE_H_FORMAT 4231 G1PPRL_BYTE_H_FORMAT 4232 G1PPRL_BYTE_H_FORMAT 4233 G1PPRL_BYTE_H_FORMAT 4234 G1PPRL_DOUBLE_H_FORMAT 4235 G1PPRL_BYTE_H_FORMAT 4236 G1PPRL_BYTE_H_FORMAT, 4237 "type", "address-range", 4238 "used", "prev-live", "next-live", "gc-eff", 4239 "remset", "code-roots"); 4240 _out->print_cr(G1PPRL_LINE_PREFIX 4241 G1PPRL_TYPE_H_FORMAT 4242 G1PPRL_ADDR_BASE_H_FORMAT 4243 G1PPRL_BYTE_H_FORMAT 4244 G1PPRL_BYTE_H_FORMAT 4245 G1PPRL_BYTE_H_FORMAT 4246 G1PPRL_DOUBLE_H_FORMAT 4247 G1PPRL_BYTE_H_FORMAT 4248 G1PPRL_BYTE_H_FORMAT, 4249 "", "", 4250 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4251 "(bytes)", "(bytes)"); 4252 } 4253 4254 // It takes as a parameter a reference to one of the _hum_* fields, it 4255 // deduces the corresponding value for a region in a humongous region 4256 // series (either the region size, or what's left if the _hum_* field 4257 // is < the region size), and updates the _hum_* field accordingly. 4258 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4259 size_t bytes = 0; 4260 // The > 0 check is to deal with the prev and next live bytes which 4261 // could be 0. 4262 if (*hum_bytes > 0) { 4263 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4264 *hum_bytes -= bytes; 4265 } 4266 return bytes; 4267 } 4268 4269 // It deduces the values for a region in a humongous region series 4270 // from the _hum_* fields and updates those accordingly. It assumes 4271 // that that _hum_* fields have already been set up from the "starts 4272 // humongous" region and we visit the regions in address order. 4273 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4274 size_t* capacity_bytes, 4275 size_t* prev_live_bytes, 4276 size_t* next_live_bytes) { 4277 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4278 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4279 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4280 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4281 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4282 } 4283 4284 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4285 const char* type = r->get_type_str(); 4286 HeapWord* bottom = r->bottom(); 4287 HeapWord* end = r->end(); 4288 size_t capacity_bytes = r->capacity(); 4289 size_t used_bytes = r->used(); 4290 size_t prev_live_bytes = r->live_bytes(); 4291 size_t next_live_bytes = r->next_live_bytes(); 4292 double gc_eff = r->gc_efficiency(); 4293 size_t remset_bytes = r->rem_set()->mem_size(); 4294 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4295 4296 if (r->is_starts_humongous()) { 4297 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4298 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4299 "they should have been zeroed after the last time we used them"); 4300 // Set up the _hum_* fields. 4301 _hum_capacity_bytes = capacity_bytes; 4302 _hum_used_bytes = used_bytes; 4303 _hum_prev_live_bytes = prev_live_bytes; 4304 _hum_next_live_bytes = next_live_bytes; 4305 get_hum_bytes(&used_bytes, &capacity_bytes, 4306 &prev_live_bytes, &next_live_bytes); 4307 end = bottom + HeapRegion::GrainWords; 4308 } else if (r->is_continues_humongous()) { 4309 get_hum_bytes(&used_bytes, &capacity_bytes, 4310 &prev_live_bytes, &next_live_bytes); 4311 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4312 } 4313 4314 _total_used_bytes += used_bytes; 4315 _total_capacity_bytes += capacity_bytes; 4316 _total_prev_live_bytes += prev_live_bytes; 4317 _total_next_live_bytes += next_live_bytes; 4318 _total_remset_bytes += remset_bytes; 4319 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4320 4321 // Print a line for this particular region. 4322 _out->print_cr(G1PPRL_LINE_PREFIX 4323 G1PPRL_TYPE_FORMAT 4324 G1PPRL_ADDR_BASE_FORMAT 4325 G1PPRL_BYTE_FORMAT 4326 G1PPRL_BYTE_FORMAT 4327 G1PPRL_BYTE_FORMAT 4328 G1PPRL_DOUBLE_FORMAT 4329 G1PPRL_BYTE_FORMAT 4330 G1PPRL_BYTE_FORMAT, 4331 type, p2i(bottom), p2i(end), 4332 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4333 remset_bytes, strong_code_roots_bytes); 4334 4335 return false; 4336 } 4337 4338 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4339 // add static memory usages to remembered set sizes 4340 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4341 // Print the footer of the output. 4342 _out->print_cr(G1PPRL_LINE_PREFIX); 4343 _out->print_cr(G1PPRL_LINE_PREFIX 4344 " SUMMARY" 4345 G1PPRL_SUM_MB_FORMAT("capacity") 4346 G1PPRL_SUM_MB_PERC_FORMAT("used") 4347 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4348 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4349 G1PPRL_SUM_MB_FORMAT("remset") 4350 G1PPRL_SUM_MB_FORMAT("code-roots"), 4351 bytes_to_mb(_total_capacity_bytes), 4352 bytes_to_mb(_total_used_bytes), 4353 perc(_total_used_bytes, _total_capacity_bytes), 4354 bytes_to_mb(_total_prev_live_bytes), 4355 perc(_total_prev_live_bytes, _total_capacity_bytes), 4356 bytes_to_mb(_total_next_live_bytes), 4357 perc(_total_next_live_bytes, _total_capacity_bytes), 4358 bytes_to_mb(_total_remset_bytes), 4359 bytes_to_mb(_total_strong_code_roots_bytes)); 4360 _out->cr(); 4361 }