1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcTimer.hpp" 45 #include "gc/shared/gcTrace.hpp" 46 #include "gc/shared/gcTraceTime.hpp" 47 #include "gc/shared/genOopClosures.inline.hpp" 48 #include "gc/shared/referencePolicy.hpp" 49 #include "gc/shared/strongRootsScope.hpp" 50 #include "gc/shared/taskqueue.inline.hpp" 51 #include "gc/shared/vmGCOperations.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.inline.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 61 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 62 63 CMBitMap::initialize(heap, storage->reserved()); 64 65 storage->set_mapping_changed_listener(&_listener); 66 } 67 68 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 69 if (zero_filled) { 70 return; 71 } 72 // We need to clear the bitmap on commit, removing any existing information. 73 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 74 _bm->clearRange(mr); 75 } 76 77 // Closure used for clearing the given mark bitmap. 78 class ClearBitmapHRClosure : public HeapRegionClosure { 79 private: 80 ConcurrentMark* _cm; 81 CMBitMap* _bitmap; 82 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 83 public: 84 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 85 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 86 } 87 88 virtual bool doHeapRegion(HeapRegion* r) { 89 size_t const chunk_size_in_words = M / HeapWordSize; 90 91 HeapWord* cur = r->bottom(); 92 HeapWord* const end = r->end(); 93 94 while (cur < end) { 95 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 96 _bitmap->clearRange(mr); 97 98 cur += chunk_size_in_words; 99 100 // Abort iteration if after yielding the marking has been aborted. 101 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 102 return true; 103 } 104 // Repeat the asserts from before the start of the closure. We will do them 105 // as asserts here to minimize their overhead on the product. However, we 106 // will have them as guarantees at the beginning / end of the bitmap 107 // clearing to get some checking in the product. 108 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 109 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 110 } 111 112 return false; 113 } 114 }; 115 116 class ParClearNextMarkBitmapTask : public AbstractGangTask { 117 ClearBitmapHRClosure* _cl; 118 HeapRegionClaimer _hrclaimer; 119 bool _suspendible; // If the task is suspendible, workers must join the STS. 120 121 public: 122 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 123 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 124 125 void work(uint worker_id) { 126 SuspendibleThreadSetJoiner sts_join(_suspendible); 127 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 128 } 129 }; 130 131 void G1CMBitMap::clearAll() { 132 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 133 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 134 uint n_workers = g1h->workers()->active_workers(); 135 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 136 g1h->workers()->run_task(&task); 137 guarantee(cl.complete(), "Must have completed iteration."); 138 return; 139 } 140 141 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 142 _base(NULL), _cm(cm) 143 #ifdef ASSERT 144 , _drain_in_progress(false) 145 , _drain_in_progress_yields(false) 146 #endif 147 {} 148 149 bool CMMarkStack::allocate(size_t capacity) { 150 // allocate a stack of the requisite depth 151 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 152 if (!rs.is_reserved()) { 153 warning("ConcurrentMark MarkStack allocation failure"); 154 return false; 155 } 156 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 157 if (!_virtual_space.initialize(rs, rs.size())) { 158 warning("ConcurrentMark MarkStack backing store failure"); 159 // Release the virtual memory reserved for the marking stack 160 rs.release(); 161 return false; 162 } 163 assert(_virtual_space.committed_size() == rs.size(), 164 "Didn't reserve backing store for all of ConcurrentMark stack?"); 165 _base = (oop*) _virtual_space.low(); 166 setEmpty(); 167 _capacity = (jint) capacity; 168 _saved_index = -1; 169 _should_expand = false; 170 return true; 171 } 172 173 void CMMarkStack::expand() { 174 // Called, during remark, if we've overflown the marking stack during marking. 175 assert(isEmpty(), "stack should been emptied while handling overflow"); 176 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 177 // Clear expansion flag 178 _should_expand = false; 179 if (_capacity == (jint) MarkStackSizeMax) { 180 if (PrintGCDetails && Verbose) { 181 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 182 } 183 return; 184 } 185 // Double capacity if possible 186 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 187 // Do not give up existing stack until we have managed to 188 // get the double capacity that we desired. 189 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 190 sizeof(oop))); 191 if (rs.is_reserved()) { 192 // Release the backing store associated with old stack 193 _virtual_space.release(); 194 // Reinitialize virtual space for new stack 195 if (!_virtual_space.initialize(rs, rs.size())) { 196 fatal("Not enough swap for expanded marking stack capacity"); 197 } 198 _base = (oop*)(_virtual_space.low()); 199 _index = 0; 200 _capacity = new_capacity; 201 } else { 202 if (PrintGCDetails && Verbose) { 203 // Failed to double capacity, continue; 204 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 205 SIZE_FORMAT "K to " SIZE_FORMAT "K", 206 _capacity / K, new_capacity / K); 207 } 208 } 209 } 210 211 void CMMarkStack::set_should_expand() { 212 // If we're resetting the marking state because of an 213 // marking stack overflow, record that we should, if 214 // possible, expand the stack. 215 _should_expand = _cm->has_overflown(); 216 } 217 218 CMMarkStack::~CMMarkStack() { 219 if (_base != NULL) { 220 _base = NULL; 221 _virtual_space.release(); 222 } 223 } 224 225 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 226 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 227 jint start = _index; 228 jint next_index = start + n; 229 if (next_index > _capacity) { 230 _overflow = true; 231 return; 232 } 233 // Otherwise. 234 _index = next_index; 235 for (int i = 0; i < n; i++) { 236 int ind = start + i; 237 assert(ind < _capacity, "By overflow test above."); 238 _base[ind] = ptr_arr[i]; 239 } 240 } 241 242 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 243 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 244 jint index = _index; 245 if (index == 0) { 246 *n = 0; 247 return false; 248 } else { 249 int k = MIN2(max, index); 250 jint new_ind = index - k; 251 for (int j = 0; j < k; j++) { 252 ptr_arr[j] = _base[new_ind + j]; 253 } 254 _index = new_ind; 255 *n = k; 256 return true; 257 } 258 } 259 260 template<class OopClosureClass> 261 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 262 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 263 || SafepointSynchronize::is_at_safepoint(), 264 "Drain recursion must be yield-safe."); 265 bool res = true; 266 debug_only(_drain_in_progress = true); 267 debug_only(_drain_in_progress_yields = yield_after); 268 while (!isEmpty()) { 269 oop newOop = pop(); 270 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 271 assert(newOop->is_oop(), "Expected an oop"); 272 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 273 "only grey objects on this stack"); 274 newOop->oop_iterate(cl); 275 if (yield_after && _cm->do_yield_check()) { 276 res = false; 277 break; 278 } 279 } 280 debug_only(_drain_in_progress = false); 281 return res; 282 } 283 284 void CMMarkStack::note_start_of_gc() { 285 assert(_saved_index == -1, 286 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 287 _saved_index = _index; 288 } 289 290 void CMMarkStack::note_end_of_gc() { 291 // This is intentionally a guarantee, instead of an assert. If we 292 // accidentally add something to the mark stack during GC, it 293 // will be a correctness issue so it's better if we crash. we'll 294 // only check this once per GC anyway, so it won't be a performance 295 // issue in any way. 296 guarantee(_saved_index == _index, 297 err_msg("saved index: %d index: %d", _saved_index, _index)); 298 _saved_index = -1; 299 } 300 301 CMRootRegions::CMRootRegions() : 302 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 303 _should_abort(false), _next_survivor(NULL) { } 304 305 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 306 _young_list = g1h->young_list(); 307 _cm = cm; 308 } 309 310 void CMRootRegions::prepare_for_scan() { 311 assert(!scan_in_progress(), "pre-condition"); 312 313 // Currently, only survivors can be root regions. 314 assert(_next_survivor == NULL, "pre-condition"); 315 _next_survivor = _young_list->first_survivor_region(); 316 _scan_in_progress = (_next_survivor != NULL); 317 _should_abort = false; 318 } 319 320 HeapRegion* CMRootRegions::claim_next() { 321 if (_should_abort) { 322 // If someone has set the should_abort flag, we return NULL to 323 // force the caller to bail out of their loop. 324 return NULL; 325 } 326 327 // Currently, only survivors can be root regions. 328 HeapRegion* res = _next_survivor; 329 if (res != NULL) { 330 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 331 // Read it again in case it changed while we were waiting for the lock. 332 res = _next_survivor; 333 if (res != NULL) { 334 if (res == _young_list->last_survivor_region()) { 335 // We just claimed the last survivor so store NULL to indicate 336 // that we're done. 337 _next_survivor = NULL; 338 } else { 339 _next_survivor = res->get_next_young_region(); 340 } 341 } else { 342 // Someone else claimed the last survivor while we were trying 343 // to take the lock so nothing else to do. 344 } 345 } 346 assert(res == NULL || res->is_survivor(), "post-condition"); 347 348 return res; 349 } 350 351 void CMRootRegions::scan_finished() { 352 assert(scan_in_progress(), "pre-condition"); 353 354 // Currently, only survivors can be root regions. 355 if (!_should_abort) { 356 assert(_next_survivor == NULL, "we should have claimed all survivors"); 357 } 358 _next_survivor = NULL; 359 360 { 361 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 362 _scan_in_progress = false; 363 RootRegionScan_lock->notify_all(); 364 } 365 } 366 367 bool CMRootRegions::wait_until_scan_finished() { 368 if (!scan_in_progress()) return false; 369 370 { 371 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 372 while (scan_in_progress()) { 373 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 374 } 375 } 376 return true; 377 } 378 379 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 380 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 381 #endif // _MSC_VER 382 383 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 384 return MAX2((n_par_threads + 2) / 4, 1U); 385 } 386 387 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 388 _g1h(g1h), 389 _markBitMap1(), 390 _markBitMap2(), 391 _parallel_marking_threads(0), 392 _max_parallel_marking_threads(0), 393 _sleep_factor(0.0), 394 _marking_task_overhead(1.0), 395 _cleanup_sleep_factor(0.0), 396 _cleanup_task_overhead(1.0), 397 _cleanup_list("Cleanup List"), 398 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 399 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 400 CardTableModRefBS::card_shift, 401 false /* in_resource_area*/), 402 403 _prevMarkBitMap(&_markBitMap1), 404 _nextMarkBitMap(&_markBitMap2), 405 406 _markStack(this), 407 // _finger set in set_non_marking_state 408 409 _max_worker_id(ParallelGCThreads), 410 // _active_tasks set in set_non_marking_state 411 // _tasks set inside the constructor 412 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 413 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 414 415 _has_overflown(false), 416 _concurrent(false), 417 _has_aborted(false), 418 _aborted_gc_id(GCId::undefined()), 419 _restart_for_overflow(false), 420 _concurrent_marking_in_progress(false), 421 422 // _verbose_level set below 423 424 _init_times(), 425 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 426 _cleanup_times(), 427 _total_counting_time(0.0), 428 _total_rs_scrub_time(0.0), 429 430 _parallel_workers(NULL), 431 432 _count_card_bitmaps(NULL), 433 _count_marked_bytes(NULL), 434 _completed_initialization(false) { 435 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 436 if (verbose_level < no_verbose) { 437 verbose_level = no_verbose; 438 } 439 if (verbose_level > high_verbose) { 440 verbose_level = high_verbose; 441 } 442 _verbose_level = verbose_level; 443 444 if (verbose_low()) { 445 gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", " 446 "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end)); 447 } 448 449 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 450 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 451 452 // Create & start a ConcurrentMark thread. 453 _cmThread = new ConcurrentMarkThread(this); 454 assert(cmThread() != NULL, "CM Thread should have been created"); 455 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 456 if (_cmThread->osthread() == NULL) { 457 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 458 } 459 460 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 461 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 462 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 463 464 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 465 satb_qs.set_buffer_size(G1SATBBufferSize); 466 467 _root_regions.init(_g1h, this); 468 469 if (ConcGCThreads > ParallelGCThreads) { 470 warning("Can't have more ConcGCThreads (%u) " 471 "than ParallelGCThreads (%u).", 472 ConcGCThreads, ParallelGCThreads); 473 return; 474 } 475 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 476 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 477 // if both are set 478 _sleep_factor = 0.0; 479 _marking_task_overhead = 1.0; 480 } else if (G1MarkingOverheadPercent > 0) { 481 // We will calculate the number of parallel marking threads based 482 // on a target overhead with respect to the soft real-time goal 483 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 484 double overall_cm_overhead = 485 (double) MaxGCPauseMillis * marking_overhead / 486 (double) GCPauseIntervalMillis; 487 double cpu_ratio = 1.0 / (double) os::processor_count(); 488 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 489 double marking_task_overhead = 490 overall_cm_overhead / marking_thread_num * 491 (double) os::processor_count(); 492 double sleep_factor = 493 (1.0 - marking_task_overhead) / marking_task_overhead; 494 495 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 496 _sleep_factor = sleep_factor; 497 _marking_task_overhead = marking_task_overhead; 498 } else { 499 // Calculate the number of parallel marking threads by scaling 500 // the number of parallel GC threads. 501 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 502 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 503 _sleep_factor = 0.0; 504 _marking_task_overhead = 1.0; 505 } 506 507 assert(ConcGCThreads > 0, "Should have been set"); 508 _parallel_marking_threads = ConcGCThreads; 509 _max_parallel_marking_threads = _parallel_marking_threads; 510 511 if (parallel_marking_threads() > 1) { 512 _cleanup_task_overhead = 1.0; 513 } else { 514 _cleanup_task_overhead = marking_task_overhead(); 515 } 516 _cleanup_sleep_factor = 517 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 518 519 #if 0 520 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 521 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 522 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 523 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 524 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 525 #endif 526 527 _parallel_workers = new WorkGang("G1 Marker", 528 _max_parallel_marking_threads, false, true); 529 if (_parallel_workers == NULL) { 530 vm_exit_during_initialization("Failed necessary allocation."); 531 } else { 532 _parallel_workers->initialize_workers(); 533 } 534 535 if (FLAG_IS_DEFAULT(MarkStackSize)) { 536 size_t mark_stack_size = 537 MIN2(MarkStackSizeMax, 538 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 539 // Verify that the calculated value for MarkStackSize is in range. 540 // It would be nice to use the private utility routine from Arguments. 541 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 542 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 543 "must be between 1 and " SIZE_FORMAT, 544 mark_stack_size, MarkStackSizeMax); 545 return; 546 } 547 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 548 } else { 549 // Verify MarkStackSize is in range. 550 if (FLAG_IS_CMDLINE(MarkStackSize)) { 551 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 552 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 553 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 554 "must be between 1 and " SIZE_FORMAT, 555 MarkStackSize, MarkStackSizeMax); 556 return; 557 } 558 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 559 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 560 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 561 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 562 MarkStackSize, MarkStackSizeMax); 563 return; 564 } 565 } 566 } 567 } 568 569 if (!_markStack.allocate(MarkStackSize)) { 570 warning("Failed to allocate CM marking stack"); 571 return; 572 } 573 574 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 575 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 576 577 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 578 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 579 580 BitMap::idx_t card_bm_size = _card_bm.size(); 581 582 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 583 _active_tasks = _max_worker_id; 584 585 uint max_regions = _g1h->max_regions(); 586 for (uint i = 0; i < _max_worker_id; ++i) { 587 CMTaskQueue* task_queue = new CMTaskQueue(); 588 task_queue->initialize(); 589 _task_queues->register_queue(i, task_queue); 590 591 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 592 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 593 594 _tasks[i] = new CMTask(i, this, 595 _count_marked_bytes[i], 596 &_count_card_bitmaps[i], 597 task_queue, _task_queues); 598 599 _accum_task_vtime[i] = 0.0; 600 } 601 602 // Calculate the card number for the bottom of the heap. Used 603 // in biasing indexes into the accounting card bitmaps. 604 _heap_bottom_card_num = 605 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 606 CardTableModRefBS::card_shift); 607 608 // Clear all the liveness counting data 609 clear_all_count_data(); 610 611 // so that the call below can read a sensible value 612 _heap_start = g1h->reserved_region().start(); 613 set_non_marking_state(); 614 _completed_initialization = true; 615 } 616 617 void ConcurrentMark::reset() { 618 // Starting values for these two. This should be called in a STW 619 // phase. 620 MemRegion reserved = _g1h->g1_reserved(); 621 _heap_start = reserved.start(); 622 _heap_end = reserved.end(); 623 624 // Separated the asserts so that we know which one fires. 625 assert(_heap_start != NULL, "heap bounds should look ok"); 626 assert(_heap_end != NULL, "heap bounds should look ok"); 627 assert(_heap_start < _heap_end, "heap bounds should look ok"); 628 629 // Reset all the marking data structures and any necessary flags 630 reset_marking_state(); 631 632 if (verbose_low()) { 633 gclog_or_tty->print_cr("[global] resetting"); 634 } 635 636 // We do reset all of them, since different phases will use 637 // different number of active threads. So, it's easiest to have all 638 // of them ready. 639 for (uint i = 0; i < _max_worker_id; ++i) { 640 _tasks[i]->reset(_nextMarkBitMap); 641 } 642 643 // we need this to make sure that the flag is on during the evac 644 // pause with initial mark piggy-backed 645 set_concurrent_marking_in_progress(); 646 } 647 648 649 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 650 _markStack.set_should_expand(); 651 _markStack.setEmpty(); // Also clears the _markStack overflow flag 652 if (clear_overflow) { 653 clear_has_overflown(); 654 } else { 655 assert(has_overflown(), "pre-condition"); 656 } 657 _finger = _heap_start; 658 659 for (uint i = 0; i < _max_worker_id; ++i) { 660 CMTaskQueue* queue = _task_queues->queue(i); 661 queue->set_empty(); 662 } 663 } 664 665 void ConcurrentMark::set_concurrency(uint active_tasks) { 666 assert(active_tasks <= _max_worker_id, "we should not have more"); 667 668 _active_tasks = active_tasks; 669 // Need to update the three data structures below according to the 670 // number of active threads for this phase. 671 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 672 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 673 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 674 } 675 676 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 677 set_concurrency(active_tasks); 678 679 _concurrent = concurrent; 680 // We propagate this to all tasks, not just the active ones. 681 for (uint i = 0; i < _max_worker_id; ++i) 682 _tasks[i]->set_concurrent(concurrent); 683 684 if (concurrent) { 685 set_concurrent_marking_in_progress(); 686 } else { 687 // We currently assume that the concurrent flag has been set to 688 // false before we start remark. At this point we should also be 689 // in a STW phase. 690 assert(!concurrent_marking_in_progress(), "invariant"); 691 assert(out_of_regions(), 692 err_msg("only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 693 p2i(_finger), p2i(_heap_end))); 694 } 695 } 696 697 void ConcurrentMark::set_non_marking_state() { 698 // We set the global marking state to some default values when we're 699 // not doing marking. 700 reset_marking_state(); 701 _active_tasks = 0; 702 clear_concurrent_marking_in_progress(); 703 } 704 705 ConcurrentMark::~ConcurrentMark() { 706 // The ConcurrentMark instance is never freed. 707 ShouldNotReachHere(); 708 } 709 710 void ConcurrentMark::clearNextBitmap() { 711 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 712 713 // Make sure that the concurrent mark thread looks to still be in 714 // the current cycle. 715 guarantee(cmThread()->during_cycle(), "invariant"); 716 717 // We are finishing up the current cycle by clearing the next 718 // marking bitmap and getting it ready for the next cycle. During 719 // this time no other cycle can start. So, let's make sure that this 720 // is the case. 721 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 722 723 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 724 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 725 _parallel_workers->run_task(&task); 726 727 // Clear the liveness counting data. If the marking has been aborted, the abort() 728 // call already did that. 729 if (cl.complete()) { 730 clear_all_count_data(); 731 } 732 733 // Repeat the asserts from above. 734 guarantee(cmThread()->during_cycle(), "invariant"); 735 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 736 } 737 738 class CheckBitmapClearHRClosure : public HeapRegionClosure { 739 CMBitMap* _bitmap; 740 bool _error; 741 public: 742 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 743 } 744 745 virtual bool doHeapRegion(HeapRegion* r) { 746 // This closure can be called concurrently to the mutator, so we must make sure 747 // that the result of the getNextMarkedWordAddress() call is compared to the 748 // value passed to it as limit to detect any found bits. 749 // We can use the region's orig_end() for the limit and the comparison value 750 // as it always contains the "real" end of the region that never changes and 751 // has no side effects. 752 // Due to the latter, there can also be no problem with the compiler generating 753 // reloads of the orig_end() call. 754 HeapWord* end = r->orig_end(); 755 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 756 } 757 }; 758 759 bool ConcurrentMark::nextMarkBitmapIsClear() { 760 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 761 _g1h->heap_region_iterate(&cl); 762 return cl.complete(); 763 } 764 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 766 public: 767 bool doHeapRegion(HeapRegion* r) { 768 if (!r->is_continues_humongous()) { 769 r->note_start_of_marking(); 770 } 771 return false; 772 } 773 }; 774 775 void ConcurrentMark::checkpointRootsInitialPre() { 776 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 777 G1CollectorPolicy* g1p = g1h->g1_policy(); 778 779 _has_aborted = false; 780 781 // Initialize marking structures. This has to be done in a STW phase. 782 reset(); 783 784 // For each region note start of marking. 785 NoteStartOfMarkHRClosure startcl; 786 g1h->heap_region_iterate(&startcl); 787 } 788 789 790 void ConcurrentMark::checkpointRootsInitialPost() { 791 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 792 793 // If we force an overflow during remark, the remark operation will 794 // actually abort and we'll restart concurrent marking. If we always 795 // force an overflow during remark we'll never actually complete the 796 // marking phase. So, we initialize this here, at the start of the 797 // cycle, so that at the remaining overflow number will decrease at 798 // every remark and we'll eventually not need to cause one. 799 force_overflow_stw()->init(); 800 801 // Start Concurrent Marking weak-reference discovery. 802 ReferenceProcessor* rp = g1h->ref_processor_cm(); 803 // enable ("weak") refs discovery 804 rp->enable_discovery(); 805 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 806 807 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 808 // This is the start of the marking cycle, we're expected all 809 // threads to have SATB queues with active set to false. 810 satb_mq_set.set_active_all_threads(true, /* new active value */ 811 false /* expected_active */); 812 813 _root_regions.prepare_for_scan(); 814 815 // update_g1_committed() will be called at the end of an evac pause 816 // when marking is on. So, it's also called at the end of the 817 // initial-mark pause to update the heap end, if the heap expands 818 // during it. No need to call it here. 819 } 820 821 /* 822 * Notice that in the next two methods, we actually leave the STS 823 * during the barrier sync and join it immediately afterwards. If we 824 * do not do this, the following deadlock can occur: one thread could 825 * be in the barrier sync code, waiting for the other thread to also 826 * sync up, whereas another one could be trying to yield, while also 827 * waiting for the other threads to sync up too. 828 * 829 * Note, however, that this code is also used during remark and in 830 * this case we should not attempt to leave / enter the STS, otherwise 831 * we'll either hit an assert (debug / fastdebug) or deadlock 832 * (product). So we should only leave / enter the STS if we are 833 * operating concurrently. 834 * 835 * Because the thread that does the sync barrier has left the STS, it 836 * is possible to be suspended for a Full GC or an evacuation pause 837 * could occur. This is actually safe, since the entering the sync 838 * barrier is one of the last things do_marking_step() does, and it 839 * doesn't manipulate any data structures afterwards. 840 */ 841 842 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 843 bool barrier_aborted; 844 845 if (verbose_low()) { 846 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id); 847 } 848 849 { 850 SuspendibleThreadSetLeaver sts_leave(concurrent()); 851 barrier_aborted = !_first_overflow_barrier_sync.enter(); 852 } 853 854 // at this point everyone should have synced up and not be doing any 855 // more work 856 857 if (verbose_low()) { 858 if (barrier_aborted) { 859 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 860 } else { 861 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 862 } 863 } 864 865 if (barrier_aborted) { 866 // If the barrier aborted we ignore the overflow condition and 867 // just abort the whole marking phase as quickly as possible. 868 return; 869 } 870 871 // If we're executing the concurrent phase of marking, reset the marking 872 // state; otherwise the marking state is reset after reference processing, 873 // during the remark pause. 874 // If we reset here as a result of an overflow during the remark we will 875 // see assertion failures from any subsequent set_concurrency_and_phase() 876 // calls. 877 if (concurrent()) { 878 // let the task associated with with worker 0 do this 879 if (worker_id == 0) { 880 // task 0 is responsible for clearing the global data structures 881 // We should be here because of an overflow. During STW we should 882 // not clear the overflow flag since we rely on it being true when 883 // we exit this method to abort the pause and restart concurrent 884 // marking. 885 reset_marking_state(true /* clear_overflow */); 886 force_overflow()->update(); 887 888 if (G1Log::fine()) { 889 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 890 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 891 } 892 } 893 } 894 895 // after this, each task should reset its own data structures then 896 // then go into the second barrier 897 } 898 899 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 900 bool barrier_aborted; 901 902 if (verbose_low()) { 903 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id); 904 } 905 906 { 907 SuspendibleThreadSetLeaver sts_leave(concurrent()); 908 barrier_aborted = !_second_overflow_barrier_sync.enter(); 909 } 910 911 // at this point everything should be re-initialized and ready to go 912 913 if (verbose_low()) { 914 if (barrier_aborted) { 915 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 916 } else { 917 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 918 } 919 } 920 } 921 922 #ifndef PRODUCT 923 void ForceOverflowSettings::init() { 924 _num_remaining = G1ConcMarkForceOverflow; 925 _force = false; 926 update(); 927 } 928 929 void ForceOverflowSettings::update() { 930 if (_num_remaining > 0) { 931 _num_remaining -= 1; 932 _force = true; 933 } else { 934 _force = false; 935 } 936 } 937 938 bool ForceOverflowSettings::should_force() { 939 if (_force) { 940 _force = false; 941 return true; 942 } else { 943 return false; 944 } 945 } 946 #endif // !PRODUCT 947 948 class CMConcurrentMarkingTask: public AbstractGangTask { 949 private: 950 ConcurrentMark* _cm; 951 ConcurrentMarkThread* _cmt; 952 953 public: 954 void work(uint worker_id) { 955 assert(Thread::current()->is_ConcurrentGC_thread(), 956 "this should only be done by a conc GC thread"); 957 ResourceMark rm; 958 959 double start_vtime = os::elapsedVTime(); 960 961 { 962 SuspendibleThreadSetJoiner sts_join; 963 964 assert(worker_id < _cm->active_tasks(), "invariant"); 965 CMTask* the_task = _cm->task(worker_id); 966 the_task->record_start_time(); 967 if (!_cm->has_aborted()) { 968 do { 969 double start_vtime_sec = os::elapsedVTime(); 970 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 971 972 the_task->do_marking_step(mark_step_duration_ms, 973 true /* do_termination */, 974 false /* is_serial*/); 975 976 double end_vtime_sec = os::elapsedVTime(); 977 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 978 _cm->clear_has_overflown(); 979 980 _cm->do_yield_check(worker_id); 981 982 jlong sleep_time_ms; 983 if (!_cm->has_aborted() && the_task->has_aborted()) { 984 sleep_time_ms = 985 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 986 { 987 SuspendibleThreadSetLeaver sts_leave; 988 os::sleep(Thread::current(), sleep_time_ms, false); 989 } 990 } 991 } while (!_cm->has_aborted() && the_task->has_aborted()); 992 } 993 the_task->record_end_time(); 994 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 995 } 996 997 double end_vtime = os::elapsedVTime(); 998 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 999 } 1000 1001 CMConcurrentMarkingTask(ConcurrentMark* cm, 1002 ConcurrentMarkThread* cmt) : 1003 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1004 1005 ~CMConcurrentMarkingTask() { } 1006 }; 1007 1008 // Calculates the number of active workers for a concurrent 1009 // phase. 1010 uint ConcurrentMark::calc_parallel_marking_threads() { 1011 uint n_conc_workers = 0; 1012 if (!UseDynamicNumberOfGCThreads || 1013 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1014 !ForceDynamicNumberOfGCThreads)) { 1015 n_conc_workers = max_parallel_marking_threads(); 1016 } else { 1017 n_conc_workers = 1018 AdaptiveSizePolicy::calc_default_active_workers( 1019 max_parallel_marking_threads(), 1020 1, /* Minimum workers */ 1021 parallel_marking_threads(), 1022 Threads::number_of_non_daemon_threads()); 1023 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1024 // that scaling has already gone into "_max_parallel_marking_threads". 1025 } 1026 assert(n_conc_workers > 0, "Always need at least 1"); 1027 return n_conc_workers; 1028 } 1029 1030 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1031 // Currently, only survivors can be root regions. 1032 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1033 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1034 1035 const uintx interval = PrefetchScanIntervalInBytes; 1036 HeapWord* curr = hr->bottom(); 1037 const HeapWord* end = hr->top(); 1038 while (curr < end) { 1039 Prefetch::read(curr, interval); 1040 oop obj = oop(curr); 1041 int size = obj->oop_iterate_size(&cl); 1042 assert(size == obj->size(), "sanity"); 1043 curr += size; 1044 } 1045 } 1046 1047 class CMRootRegionScanTask : public AbstractGangTask { 1048 private: 1049 ConcurrentMark* _cm; 1050 1051 public: 1052 CMRootRegionScanTask(ConcurrentMark* cm) : 1053 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1054 1055 void work(uint worker_id) { 1056 assert(Thread::current()->is_ConcurrentGC_thread(), 1057 "this should only be done by a conc GC thread"); 1058 1059 CMRootRegions* root_regions = _cm->root_regions(); 1060 HeapRegion* hr = root_regions->claim_next(); 1061 while (hr != NULL) { 1062 _cm->scanRootRegion(hr, worker_id); 1063 hr = root_regions->claim_next(); 1064 } 1065 } 1066 }; 1067 1068 void ConcurrentMark::scanRootRegions() { 1069 double scan_start = os::elapsedTime(); 1070 1071 // Start of concurrent marking. 1072 ClassLoaderDataGraph::clear_claimed_marks(); 1073 1074 // scan_in_progress() will have been set to true only if there was 1075 // at least one root region to scan. So, if it's false, we 1076 // should not attempt to do any further work. 1077 if (root_regions()->scan_in_progress()) { 1078 if (G1Log::fine()) { 1079 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1080 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1081 } 1082 1083 _parallel_marking_threads = calc_parallel_marking_threads(); 1084 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1085 "Maximum number of marking threads exceeded"); 1086 uint active_workers = MAX2(1U, parallel_marking_threads()); 1087 1088 CMRootRegionScanTask task(this); 1089 _parallel_workers->set_active_workers(active_workers); 1090 _parallel_workers->run_task(&task); 1091 1092 if (G1Log::fine()) { 1093 gclog_or_tty->gclog_stamp(concurrent_gc_id()); 1094 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1095 } 1096 1097 // It's possible that has_aborted() is true here without actually 1098 // aborting the survivor scan earlier. This is OK as it's 1099 // mainly used for sanity checking. 1100 root_regions()->scan_finished(); 1101 } 1102 } 1103 1104 void ConcurrentMark::markFromRoots() { 1105 // we might be tempted to assert that: 1106 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1107 // "inconsistent argument?"); 1108 // However that wouldn't be right, because it's possible that 1109 // a safepoint is indeed in progress as a younger generation 1110 // stop-the-world GC happens even as we mark in this generation. 1111 1112 _restart_for_overflow = false; 1113 force_overflow_conc()->init(); 1114 1115 // _g1h has _n_par_threads 1116 _parallel_marking_threads = calc_parallel_marking_threads(); 1117 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1118 "Maximum number of marking threads exceeded"); 1119 1120 uint active_workers = MAX2(1U, parallel_marking_threads()); 1121 assert(active_workers > 0, "Should have been set"); 1122 1123 // Parallel task terminator is set in "set_concurrency_and_phase()" 1124 set_concurrency_and_phase(active_workers, true /* concurrent */); 1125 1126 CMConcurrentMarkingTask markingTask(this, cmThread()); 1127 _parallel_workers->set_active_workers(active_workers); 1128 _parallel_workers->run_task(&markingTask); 1129 print_stats(); 1130 } 1131 1132 // Helper class to get rid of some boilerplate code. 1133 class G1CMTraceTime : public GCTraceTime { 1134 static bool doit_and_prepend(bool doit) { 1135 if (doit) { 1136 gclog_or_tty->put(' '); 1137 } 1138 return doit; 1139 } 1140 1141 public: 1142 G1CMTraceTime(const char* title, bool doit) 1143 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(), 1144 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) { 1145 } 1146 }; 1147 1148 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1149 // world is stopped at this checkpoint 1150 assert(SafepointSynchronize::is_at_safepoint(), 1151 "world should be stopped"); 1152 1153 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1154 1155 // If a full collection has happened, we shouldn't do this. 1156 if (has_aborted()) { 1157 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1158 return; 1159 } 1160 1161 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1162 1163 if (VerifyDuringGC) { 1164 HandleMark hm; // handle scope 1165 g1h->prepare_for_verify(); 1166 Universe::verify(VerifyOption_G1UsePrevMarking, 1167 " VerifyDuringGC:(before)"); 1168 } 1169 g1h->check_bitmaps("Remark Start"); 1170 1171 G1CollectorPolicy* g1p = g1h->g1_policy(); 1172 g1p->record_concurrent_mark_remark_start(); 1173 1174 double start = os::elapsedTime(); 1175 1176 checkpointRootsFinalWork(); 1177 1178 double mark_work_end = os::elapsedTime(); 1179 1180 weakRefsWork(clear_all_soft_refs); 1181 1182 if (has_overflown()) { 1183 // Oops. We overflowed. Restart concurrent marking. 1184 _restart_for_overflow = true; 1185 if (G1TraceMarkStackOverflow) { 1186 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1187 } 1188 1189 // Verify the heap w.r.t. the previous marking bitmap. 1190 if (VerifyDuringGC) { 1191 HandleMark hm; // handle scope 1192 g1h->prepare_for_verify(); 1193 Universe::verify(VerifyOption_G1UsePrevMarking, 1194 " VerifyDuringGC:(overflow)"); 1195 } 1196 1197 // Clear the marking state because we will be restarting 1198 // marking due to overflowing the global mark stack. 1199 reset_marking_state(); 1200 } else { 1201 { 1202 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1203 1204 // Aggregate the per-task counting data that we have accumulated 1205 // while marking. 1206 aggregate_count_data(); 1207 } 1208 1209 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1210 // We're done with marking. 1211 // This is the end of the marking cycle, we're expected all 1212 // threads to have SATB queues with active set to true. 1213 satb_mq_set.set_active_all_threads(false, /* new active value */ 1214 true /* expected_active */); 1215 1216 if (VerifyDuringGC) { 1217 HandleMark hm; // handle scope 1218 g1h->prepare_for_verify(); 1219 Universe::verify(VerifyOption_G1UseNextMarking, 1220 " VerifyDuringGC:(after)"); 1221 } 1222 g1h->check_bitmaps("Remark End"); 1223 assert(!restart_for_overflow(), "sanity"); 1224 // Completely reset the marking state since marking completed 1225 set_non_marking_state(); 1226 } 1227 1228 // Expand the marking stack, if we have to and if we can. 1229 if (_markStack.should_expand()) { 1230 _markStack.expand(); 1231 } 1232 1233 // Statistics 1234 double now = os::elapsedTime(); 1235 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1236 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1237 _remark_times.add((now - start) * 1000.0); 1238 1239 g1p->record_concurrent_mark_remark_end(); 1240 1241 G1CMIsAliveClosure is_alive(g1h); 1242 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1243 } 1244 1245 // Base class of the closures that finalize and verify the 1246 // liveness counting data. 1247 class CMCountDataClosureBase: public HeapRegionClosure { 1248 protected: 1249 G1CollectedHeap* _g1h; 1250 ConcurrentMark* _cm; 1251 CardTableModRefBS* _ct_bs; 1252 1253 BitMap* _region_bm; 1254 BitMap* _card_bm; 1255 1256 // Takes a region that's not empty (i.e., it has at least one 1257 // live object in it and sets its corresponding bit on the region 1258 // bitmap to 1. If the region is "starts humongous" it will also set 1259 // to 1 the bits on the region bitmap that correspond to its 1260 // associated "continues humongous" regions. 1261 void set_bit_for_region(HeapRegion* hr) { 1262 assert(!hr->is_continues_humongous(), "should have filtered those out"); 1263 1264 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1265 if (!hr->is_starts_humongous()) { 1266 // Normal (non-humongous) case: just set the bit. 1267 _region_bm->par_at_put(index, true); 1268 } else { 1269 // Starts humongous case: calculate how many regions are part of 1270 // this humongous region and then set the bit range. 1271 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1272 _region_bm->par_at_put_range(index, end_index, true); 1273 } 1274 } 1275 1276 public: 1277 CMCountDataClosureBase(G1CollectedHeap* g1h, 1278 BitMap* region_bm, BitMap* card_bm): 1279 _g1h(g1h), _cm(g1h->concurrent_mark()), 1280 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1281 _region_bm(region_bm), _card_bm(card_bm) { } 1282 }; 1283 1284 // Closure that calculates the # live objects per region. Used 1285 // for verification purposes during the cleanup pause. 1286 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1287 CMBitMapRO* _bm; 1288 size_t _region_marked_bytes; 1289 1290 public: 1291 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1292 BitMap* region_bm, BitMap* card_bm) : 1293 CMCountDataClosureBase(g1h, region_bm, card_bm), 1294 _bm(bm), _region_marked_bytes(0) { } 1295 1296 bool doHeapRegion(HeapRegion* hr) { 1297 1298 if (hr->is_continues_humongous()) { 1299 // We will ignore these here and process them when their 1300 // associated "starts humongous" region is processed (see 1301 // set_bit_for_heap_region()). Note that we cannot rely on their 1302 // associated "starts humongous" region to have their bit set to 1303 // 1 since, due to the region chunking in the parallel region 1304 // iteration, a "continues humongous" region might be visited 1305 // before its associated "starts humongous". 1306 return false; 1307 } 1308 1309 HeapWord* ntams = hr->next_top_at_mark_start(); 1310 HeapWord* start = hr->bottom(); 1311 1312 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1313 err_msg("Preconditions not met - " 1314 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1315 p2i(start), p2i(ntams), p2i(hr->end()))); 1316 1317 // Find the first marked object at or after "start". 1318 start = _bm->getNextMarkedWordAddress(start, ntams); 1319 1320 size_t marked_bytes = 0; 1321 1322 while (start < ntams) { 1323 oop obj = oop(start); 1324 int obj_sz = obj->size(); 1325 HeapWord* obj_end = start + obj_sz; 1326 1327 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1328 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1329 1330 // Note: if we're looking at the last region in heap - obj_end 1331 // could be actually just beyond the end of the heap; end_idx 1332 // will then correspond to a (non-existent) card that is also 1333 // just beyond the heap. 1334 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1335 // end of object is not card aligned - increment to cover 1336 // all the cards spanned by the object 1337 end_idx += 1; 1338 } 1339 1340 // Set the bits in the card BM for the cards spanned by this object. 1341 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1342 1343 // Add the size of this object to the number of marked bytes. 1344 marked_bytes += (size_t)obj_sz * HeapWordSize; 1345 1346 // Find the next marked object after this one. 1347 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1348 } 1349 1350 // Mark the allocated-since-marking portion... 1351 HeapWord* top = hr->top(); 1352 if (ntams < top) { 1353 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1354 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1355 1356 // Note: if we're looking at the last region in heap - top 1357 // could be actually just beyond the end of the heap; end_idx 1358 // will then correspond to a (non-existent) card that is also 1359 // just beyond the heap. 1360 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1361 // end of object is not card aligned - increment to cover 1362 // all the cards spanned by the object 1363 end_idx += 1; 1364 } 1365 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1366 1367 // This definitely means the region has live objects. 1368 set_bit_for_region(hr); 1369 } 1370 1371 // Update the live region bitmap. 1372 if (marked_bytes > 0) { 1373 set_bit_for_region(hr); 1374 } 1375 1376 // Set the marked bytes for the current region so that 1377 // it can be queried by a calling verification routine 1378 _region_marked_bytes = marked_bytes; 1379 1380 return false; 1381 } 1382 1383 size_t region_marked_bytes() const { return _region_marked_bytes; } 1384 }; 1385 1386 // Heap region closure used for verifying the counting data 1387 // that was accumulated concurrently and aggregated during 1388 // the remark pause. This closure is applied to the heap 1389 // regions during the STW cleanup pause. 1390 1391 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1392 G1CollectedHeap* _g1h; 1393 ConcurrentMark* _cm; 1394 CalcLiveObjectsClosure _calc_cl; 1395 BitMap* _region_bm; // Region BM to be verified 1396 BitMap* _card_bm; // Card BM to be verified 1397 bool _verbose; // verbose output? 1398 1399 BitMap* _exp_region_bm; // Expected Region BM values 1400 BitMap* _exp_card_bm; // Expected card BM values 1401 1402 int _failures; 1403 1404 public: 1405 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1406 BitMap* region_bm, 1407 BitMap* card_bm, 1408 BitMap* exp_region_bm, 1409 BitMap* exp_card_bm, 1410 bool verbose) : 1411 _g1h(g1h), _cm(g1h->concurrent_mark()), 1412 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1413 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1414 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1415 _failures(0) { } 1416 1417 int failures() const { return _failures; } 1418 1419 bool doHeapRegion(HeapRegion* hr) { 1420 if (hr->is_continues_humongous()) { 1421 // We will ignore these here and process them when their 1422 // associated "starts humongous" region is processed (see 1423 // set_bit_for_heap_region()). Note that we cannot rely on their 1424 // associated "starts humongous" region to have their bit set to 1425 // 1 since, due to the region chunking in the parallel region 1426 // iteration, a "continues humongous" region might be visited 1427 // before its associated "starts humongous". 1428 return false; 1429 } 1430 1431 int failures = 0; 1432 1433 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1434 // this region and set the corresponding bits in the expected region 1435 // and card bitmaps. 1436 bool res = _calc_cl.doHeapRegion(hr); 1437 assert(res == false, "should be continuing"); 1438 1439 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1440 Mutex::_no_safepoint_check_flag); 1441 1442 // Verify the marked bytes for this region. 1443 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1444 size_t act_marked_bytes = hr->next_marked_bytes(); 1445 1446 // We're not OK if expected marked bytes > actual marked bytes. It means 1447 // we have missed accounting some objects during the actual marking. 1448 if (exp_marked_bytes > act_marked_bytes) { 1449 if (_verbose) { 1450 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1451 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1452 hr->hrm_index(), exp_marked_bytes, act_marked_bytes); 1453 } 1454 failures += 1; 1455 } 1456 1457 // Verify the bit, for this region, in the actual and expected 1458 // (which was just calculated) region bit maps. 1459 // We're not OK if the bit in the calculated expected region 1460 // bitmap is set and the bit in the actual region bitmap is not. 1461 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1462 1463 bool expected = _exp_region_bm->at(index); 1464 bool actual = _region_bm->at(index); 1465 if (expected && !actual) { 1466 if (_verbose) { 1467 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1468 "expected: %s, actual: %s", 1469 hr->hrm_index(), 1470 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1471 } 1472 failures += 1; 1473 } 1474 1475 // Verify that the card bit maps for the cards spanned by the current 1476 // region match. We have an error if we have a set bit in the expected 1477 // bit map and the corresponding bit in the actual bitmap is not set. 1478 1479 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1480 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1481 1482 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1483 expected = _exp_card_bm->at(i); 1484 actual = _card_bm->at(i); 1485 1486 if (expected && !actual) { 1487 if (_verbose) { 1488 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1489 "expected: %s, actual: %s", 1490 hr->hrm_index(), i, 1491 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1492 } 1493 failures += 1; 1494 } 1495 } 1496 1497 if (failures > 0 && _verbose) { 1498 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1499 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1500 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), 1501 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1502 } 1503 1504 _failures += failures; 1505 1506 // We could stop iteration over the heap when we 1507 // find the first violating region by returning true. 1508 return false; 1509 } 1510 }; 1511 1512 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1513 protected: 1514 G1CollectedHeap* _g1h; 1515 ConcurrentMark* _cm; 1516 BitMap* _actual_region_bm; 1517 BitMap* _actual_card_bm; 1518 1519 uint _n_workers; 1520 1521 BitMap* _expected_region_bm; 1522 BitMap* _expected_card_bm; 1523 1524 int _failures; 1525 bool _verbose; 1526 1527 HeapRegionClaimer _hrclaimer; 1528 1529 public: 1530 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1531 BitMap* region_bm, BitMap* card_bm, 1532 BitMap* expected_region_bm, BitMap* expected_card_bm) 1533 : AbstractGangTask("G1 verify final counting"), 1534 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1535 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1536 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1537 _failures(0), _verbose(false), 1538 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1539 assert(VerifyDuringGC, "don't call this otherwise"); 1540 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1541 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1542 1543 _verbose = _cm->verbose_medium(); 1544 } 1545 1546 void work(uint worker_id) { 1547 assert(worker_id < _n_workers, "invariant"); 1548 1549 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1550 _actual_region_bm, _actual_card_bm, 1551 _expected_region_bm, 1552 _expected_card_bm, 1553 _verbose); 1554 1555 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1556 1557 Atomic::add(verify_cl.failures(), &_failures); 1558 } 1559 1560 int failures() const { return _failures; } 1561 }; 1562 1563 // Closure that finalizes the liveness counting data. 1564 // Used during the cleanup pause. 1565 // Sets the bits corresponding to the interval [NTAMS, top] 1566 // (which contains the implicitly live objects) in the 1567 // card liveness bitmap. Also sets the bit for each region, 1568 // containing live data, in the region liveness bitmap. 1569 1570 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1571 public: 1572 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1573 BitMap* region_bm, 1574 BitMap* card_bm) : 1575 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1576 1577 bool doHeapRegion(HeapRegion* hr) { 1578 1579 if (hr->is_continues_humongous()) { 1580 // We will ignore these here and process them when their 1581 // associated "starts humongous" region is processed (see 1582 // set_bit_for_heap_region()). Note that we cannot rely on their 1583 // associated "starts humongous" region to have their bit set to 1584 // 1 since, due to the region chunking in the parallel region 1585 // iteration, a "continues humongous" region might be visited 1586 // before its associated "starts humongous". 1587 return false; 1588 } 1589 1590 HeapWord* ntams = hr->next_top_at_mark_start(); 1591 HeapWord* top = hr->top(); 1592 1593 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1594 1595 // Mark the allocated-since-marking portion... 1596 if (ntams < top) { 1597 // This definitely means the region has live objects. 1598 set_bit_for_region(hr); 1599 1600 // Now set the bits in the card bitmap for [ntams, top) 1601 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1602 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1603 1604 // Note: if we're looking at the last region in heap - top 1605 // could be actually just beyond the end of the heap; end_idx 1606 // will then correspond to a (non-existent) card that is also 1607 // just beyond the heap. 1608 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1609 // end of object is not card aligned - increment to cover 1610 // all the cards spanned by the object 1611 end_idx += 1; 1612 } 1613 1614 assert(end_idx <= _card_bm->size(), 1615 err_msg("oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1616 end_idx, _card_bm->size())); 1617 assert(start_idx < _card_bm->size(), 1618 err_msg("oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1619 start_idx, _card_bm->size())); 1620 1621 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1622 } 1623 1624 // Set the bit for the region if it contains live data 1625 if (hr->next_marked_bytes() > 0) { 1626 set_bit_for_region(hr); 1627 } 1628 1629 return false; 1630 } 1631 }; 1632 1633 class G1ParFinalCountTask: public AbstractGangTask { 1634 protected: 1635 G1CollectedHeap* _g1h; 1636 ConcurrentMark* _cm; 1637 BitMap* _actual_region_bm; 1638 BitMap* _actual_card_bm; 1639 1640 uint _n_workers; 1641 HeapRegionClaimer _hrclaimer; 1642 1643 public: 1644 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1645 : AbstractGangTask("G1 final counting"), 1646 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1647 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1648 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1649 } 1650 1651 void work(uint worker_id) { 1652 assert(worker_id < _n_workers, "invariant"); 1653 1654 FinalCountDataUpdateClosure final_update_cl(_g1h, 1655 _actual_region_bm, 1656 _actual_card_bm); 1657 1658 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1659 } 1660 }; 1661 1662 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1663 G1CollectedHeap* _g1; 1664 size_t _freed_bytes; 1665 FreeRegionList* _local_cleanup_list; 1666 HeapRegionSetCount _old_regions_removed; 1667 HeapRegionSetCount _humongous_regions_removed; 1668 HRRSCleanupTask* _hrrs_cleanup_task; 1669 1670 public: 1671 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1672 FreeRegionList* local_cleanup_list, 1673 HRRSCleanupTask* hrrs_cleanup_task) : 1674 _g1(g1), 1675 _freed_bytes(0), 1676 _local_cleanup_list(local_cleanup_list), 1677 _old_regions_removed(), 1678 _humongous_regions_removed(), 1679 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1680 1681 size_t freed_bytes() { return _freed_bytes; } 1682 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1683 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1684 1685 bool doHeapRegion(HeapRegion *hr) { 1686 if (hr->is_continues_humongous() || hr->is_archive()) { 1687 return false; 1688 } 1689 // We use a claim value of zero here because all regions 1690 // were claimed with value 1 in the FinalCount task. 1691 _g1->reset_gc_time_stamps(hr); 1692 hr->note_end_of_marking(); 1693 1694 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1695 _freed_bytes += hr->used(); 1696 hr->set_containing_set(NULL); 1697 if (hr->is_humongous()) { 1698 assert(hr->is_starts_humongous(), "we should only see starts humongous"); 1699 _humongous_regions_removed.increment(1u, hr->capacity()); 1700 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1701 } else { 1702 _old_regions_removed.increment(1u, hr->capacity()); 1703 _g1->free_region(hr, _local_cleanup_list, true); 1704 } 1705 } else { 1706 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1707 } 1708 1709 return false; 1710 } 1711 }; 1712 1713 class G1ParNoteEndTask: public AbstractGangTask { 1714 friend class G1NoteEndOfConcMarkClosure; 1715 1716 protected: 1717 G1CollectedHeap* _g1h; 1718 FreeRegionList* _cleanup_list; 1719 HeapRegionClaimer _hrclaimer; 1720 1721 public: 1722 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1723 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1724 } 1725 1726 void work(uint worker_id) { 1727 FreeRegionList local_cleanup_list("Local Cleanup List"); 1728 HRRSCleanupTask hrrs_cleanup_task; 1729 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1730 &hrrs_cleanup_task); 1731 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1732 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1733 1734 // Now update the lists 1735 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1736 { 1737 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1738 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1739 1740 // If we iterate over the global cleanup list at the end of 1741 // cleanup to do this printing we will not guarantee to only 1742 // generate output for the newly-reclaimed regions (the list 1743 // might not be empty at the beginning of cleanup; we might 1744 // still be working on its previous contents). So we do the 1745 // printing here, before we append the new regions to the global 1746 // cleanup list. 1747 1748 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1749 if (hr_printer->is_active()) { 1750 FreeRegionListIterator iter(&local_cleanup_list); 1751 while (iter.more_available()) { 1752 HeapRegion* hr = iter.get_next(); 1753 hr_printer->cleanup(hr); 1754 } 1755 } 1756 1757 _cleanup_list->add_ordered(&local_cleanup_list); 1758 assert(local_cleanup_list.is_empty(), "post-condition"); 1759 1760 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1761 } 1762 } 1763 }; 1764 1765 class G1ParScrubRemSetTask: public AbstractGangTask { 1766 protected: 1767 G1RemSet* _g1rs; 1768 BitMap* _region_bm; 1769 BitMap* _card_bm; 1770 HeapRegionClaimer _hrclaimer; 1771 1772 public: 1773 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1774 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1775 } 1776 1777 void work(uint worker_id) { 1778 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1779 } 1780 1781 }; 1782 1783 void ConcurrentMark::cleanup() { 1784 // world is stopped at this checkpoint 1785 assert(SafepointSynchronize::is_at_safepoint(), 1786 "world should be stopped"); 1787 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1788 1789 // If a full collection has happened, we shouldn't do this. 1790 if (has_aborted()) { 1791 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1792 return; 1793 } 1794 1795 g1h->verify_region_sets_optional(); 1796 1797 if (VerifyDuringGC) { 1798 HandleMark hm; // handle scope 1799 g1h->prepare_for_verify(); 1800 Universe::verify(VerifyOption_G1UsePrevMarking, 1801 " VerifyDuringGC:(before)"); 1802 } 1803 g1h->check_bitmaps("Cleanup Start"); 1804 1805 G1CollectorPolicy* g1p = g1h->g1_policy(); 1806 g1p->record_concurrent_mark_cleanup_start(); 1807 1808 double start = os::elapsedTime(); 1809 1810 HeapRegionRemSet::reset_for_cleanup_tasks(); 1811 1812 // Do counting once more with the world stopped for good measure. 1813 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1814 1815 g1h->workers()->run_task(&g1_par_count_task); 1816 1817 if (VerifyDuringGC) { 1818 // Verify that the counting data accumulated during marking matches 1819 // that calculated by walking the marking bitmap. 1820 1821 // Bitmaps to hold expected values 1822 BitMap expected_region_bm(_region_bm.size(), true); 1823 BitMap expected_card_bm(_card_bm.size(), true); 1824 1825 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1826 &_region_bm, 1827 &_card_bm, 1828 &expected_region_bm, 1829 &expected_card_bm); 1830 1831 g1h->workers()->run_task(&g1_par_verify_task); 1832 1833 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1834 } 1835 1836 size_t start_used_bytes = g1h->used(); 1837 g1h->collector_state()->set_mark_in_progress(false); 1838 1839 double count_end = os::elapsedTime(); 1840 double this_final_counting_time = (count_end - start); 1841 _total_counting_time += this_final_counting_time; 1842 1843 if (G1PrintRegionLivenessInfo) { 1844 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1845 _g1h->heap_region_iterate(&cl); 1846 } 1847 1848 // Install newly created mark bitMap as "prev". 1849 swapMarkBitMaps(); 1850 1851 g1h->reset_gc_time_stamp(); 1852 1853 uint n_workers = _g1h->workers()->active_workers(); 1854 1855 // Note end of marking in all heap regions. 1856 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1857 g1h->workers()->run_task(&g1_par_note_end_task); 1858 g1h->check_gc_time_stamps(); 1859 1860 if (!cleanup_list_is_empty()) { 1861 // The cleanup list is not empty, so we'll have to process it 1862 // concurrently. Notify anyone else that might be wanting free 1863 // regions that there will be more free regions coming soon. 1864 g1h->set_free_regions_coming(); 1865 } 1866 1867 // call below, since it affects the metric by which we sort the heap 1868 // regions. 1869 if (G1ScrubRemSets) { 1870 double rs_scrub_start = os::elapsedTime(); 1871 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1872 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1873 1874 double rs_scrub_end = os::elapsedTime(); 1875 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1876 _total_rs_scrub_time += this_rs_scrub_time; 1877 } 1878 1879 // this will also free any regions totally full of garbage objects, 1880 // and sort the regions. 1881 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1882 1883 // Statistics. 1884 double end = os::elapsedTime(); 1885 _cleanup_times.add((end - start) * 1000.0); 1886 1887 if (G1Log::fine()) { 1888 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1889 } 1890 1891 // Clean up will have freed any regions completely full of garbage. 1892 // Update the soft reference policy with the new heap occupancy. 1893 Universe::update_heap_info_at_gc(); 1894 1895 if (VerifyDuringGC) { 1896 HandleMark hm; // handle scope 1897 g1h->prepare_for_verify(); 1898 Universe::verify(VerifyOption_G1UsePrevMarking, 1899 " VerifyDuringGC:(after)"); 1900 } 1901 1902 g1h->check_bitmaps("Cleanup End"); 1903 1904 g1h->verify_region_sets_optional(); 1905 1906 // We need to make this be a "collection" so any collection pause that 1907 // races with it goes around and waits for completeCleanup to finish. 1908 g1h->increment_total_collections(); 1909 1910 // Clean out dead classes and update Metaspace sizes. 1911 if (ClassUnloadingWithConcurrentMark) { 1912 ClassLoaderDataGraph::purge(); 1913 } 1914 MetaspaceGC::compute_new_size(); 1915 1916 // We reclaimed old regions so we should calculate the sizes to make 1917 // sure we update the old gen/space data. 1918 g1h->g1mm()->update_sizes(); 1919 g1h->allocation_context_stats().update_after_mark(); 1920 1921 g1h->trace_heap_after_concurrent_cycle(); 1922 } 1923 1924 void ConcurrentMark::completeCleanup() { 1925 if (has_aborted()) return; 1926 1927 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1928 1929 _cleanup_list.verify_optional(); 1930 FreeRegionList tmp_free_list("Tmp Free List"); 1931 1932 if (G1ConcRegionFreeingVerbose) { 1933 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1934 "cleanup list has %u entries", 1935 _cleanup_list.length()); 1936 } 1937 1938 // No one else should be accessing the _cleanup_list at this point, 1939 // so it is not necessary to take any locks 1940 while (!_cleanup_list.is_empty()) { 1941 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1942 assert(hr != NULL, "Got NULL from a non-empty list"); 1943 hr->par_clear(); 1944 tmp_free_list.add_ordered(hr); 1945 1946 // Instead of adding one region at a time to the secondary_free_list, 1947 // we accumulate them in the local list and move them a few at a 1948 // time. This also cuts down on the number of notify_all() calls 1949 // we do during this process. We'll also append the local list when 1950 // _cleanup_list is empty (which means we just removed the last 1951 // region from the _cleanup_list). 1952 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1953 _cleanup_list.is_empty()) { 1954 if (G1ConcRegionFreeingVerbose) { 1955 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1956 "appending %u entries to the secondary_free_list, " 1957 "cleanup list still has %u entries", 1958 tmp_free_list.length(), 1959 _cleanup_list.length()); 1960 } 1961 1962 { 1963 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1964 g1h->secondary_free_list_add(&tmp_free_list); 1965 SecondaryFreeList_lock->notify_all(); 1966 } 1967 #ifndef PRODUCT 1968 if (G1StressConcRegionFreeing) { 1969 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1970 os::sleep(Thread::current(), (jlong) 1, false); 1971 } 1972 } 1973 #endif 1974 } 1975 } 1976 assert(tmp_free_list.is_empty(), "post-condition"); 1977 } 1978 1979 // Supporting Object and Oop closures for reference discovery 1980 // and processing in during marking 1981 1982 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1983 HeapWord* addr = (HeapWord*)obj; 1984 return addr != NULL && 1985 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1986 } 1987 1988 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1989 // Uses the CMTask associated with a worker thread (for serial reference 1990 // processing the CMTask for worker 0 is used) to preserve (mark) and 1991 // trace referent objects. 1992 // 1993 // Using the CMTask and embedded local queues avoids having the worker 1994 // threads operating on the global mark stack. This reduces the risk 1995 // of overflowing the stack - which we would rather avoid at this late 1996 // state. Also using the tasks' local queues removes the potential 1997 // of the workers interfering with each other that could occur if 1998 // operating on the global stack. 1999 2000 class G1CMKeepAliveAndDrainClosure: public OopClosure { 2001 ConcurrentMark* _cm; 2002 CMTask* _task; 2003 int _ref_counter_limit; 2004 int _ref_counter; 2005 bool _is_serial; 2006 public: 2007 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2008 _cm(cm), _task(task), _is_serial(is_serial), 2009 _ref_counter_limit(G1RefProcDrainInterval) { 2010 assert(_ref_counter_limit > 0, "sanity"); 2011 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2012 _ref_counter = _ref_counter_limit; 2013 } 2014 2015 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2016 virtual void do_oop( oop* p) { do_oop_work(p); } 2017 2018 template <class T> void do_oop_work(T* p) { 2019 if (!_cm->has_overflown()) { 2020 oop obj = oopDesc::load_decode_heap_oop(p); 2021 if (_cm->verbose_high()) { 2022 gclog_or_tty->print_cr("\t[%u] we're looking at location " 2023 "*" PTR_FORMAT " = " PTR_FORMAT, 2024 _task->worker_id(), p2i(p), p2i((void*) obj)); 2025 } 2026 2027 _task->deal_with_reference(obj); 2028 _ref_counter--; 2029 2030 if (_ref_counter == 0) { 2031 // We have dealt with _ref_counter_limit references, pushing them 2032 // and objects reachable from them on to the local stack (and 2033 // possibly the global stack). Call CMTask::do_marking_step() to 2034 // process these entries. 2035 // 2036 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2037 // there's nothing more to do (i.e. we're done with the entries that 2038 // were pushed as a result of the CMTask::deal_with_reference() calls 2039 // above) or we overflow. 2040 // 2041 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2042 // flag while there may still be some work to do. (See the comment at 2043 // the beginning of CMTask::do_marking_step() for those conditions - 2044 // one of which is reaching the specified time target.) It is only 2045 // when CMTask::do_marking_step() returns without setting the 2046 // has_aborted() flag that the marking step has completed. 2047 do { 2048 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2049 _task->do_marking_step(mark_step_duration_ms, 2050 false /* do_termination */, 2051 _is_serial); 2052 } while (_task->has_aborted() && !_cm->has_overflown()); 2053 _ref_counter = _ref_counter_limit; 2054 } 2055 } else { 2056 if (_cm->verbose_high()) { 2057 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id()); 2058 } 2059 } 2060 } 2061 }; 2062 2063 // 'Drain' oop closure used by both serial and parallel reference processing. 2064 // Uses the CMTask associated with a given worker thread (for serial 2065 // reference processing the CMtask for worker 0 is used). Calls the 2066 // do_marking_step routine, with an unbelievably large timeout value, 2067 // to drain the marking data structures of the remaining entries 2068 // added by the 'keep alive' oop closure above. 2069 2070 class G1CMDrainMarkingStackClosure: public VoidClosure { 2071 ConcurrentMark* _cm; 2072 CMTask* _task; 2073 bool _is_serial; 2074 public: 2075 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2076 _cm(cm), _task(task), _is_serial(is_serial) { 2077 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2078 } 2079 2080 void do_void() { 2081 do { 2082 if (_cm->verbose_high()) { 2083 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s", 2084 _task->worker_id(), BOOL_TO_STR(_is_serial)); 2085 } 2086 2087 // We call CMTask::do_marking_step() to completely drain the local 2088 // and global marking stacks of entries pushed by the 'keep alive' 2089 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2090 // 2091 // CMTask::do_marking_step() is called in a loop, which we'll exit 2092 // if there's nothing more to do (i.e. we've completely drained the 2093 // entries that were pushed as a a result of applying the 'keep alive' 2094 // closure to the entries on the discovered ref lists) or we overflow 2095 // the global marking stack. 2096 // 2097 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2098 // flag while there may still be some work to do. (See the comment at 2099 // the beginning of CMTask::do_marking_step() for those conditions - 2100 // one of which is reaching the specified time target.) It is only 2101 // when CMTask::do_marking_step() returns without setting the 2102 // has_aborted() flag that the marking step has completed. 2103 2104 _task->do_marking_step(1000000000.0 /* something very large */, 2105 true /* do_termination */, 2106 _is_serial); 2107 } while (_task->has_aborted() && !_cm->has_overflown()); 2108 } 2109 }; 2110 2111 // Implementation of AbstractRefProcTaskExecutor for parallel 2112 // reference processing at the end of G1 concurrent marking 2113 2114 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2115 private: 2116 G1CollectedHeap* _g1h; 2117 ConcurrentMark* _cm; 2118 WorkGang* _workers; 2119 uint _active_workers; 2120 2121 public: 2122 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2123 ConcurrentMark* cm, 2124 WorkGang* workers, 2125 uint n_workers) : 2126 _g1h(g1h), _cm(cm), 2127 _workers(workers), _active_workers(n_workers) { } 2128 2129 // Executes the given task using concurrent marking worker threads. 2130 virtual void execute(ProcessTask& task); 2131 virtual void execute(EnqueueTask& task); 2132 }; 2133 2134 class G1CMRefProcTaskProxy: public AbstractGangTask { 2135 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2136 ProcessTask& _proc_task; 2137 G1CollectedHeap* _g1h; 2138 ConcurrentMark* _cm; 2139 2140 public: 2141 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2142 G1CollectedHeap* g1h, 2143 ConcurrentMark* cm) : 2144 AbstractGangTask("Process reference objects in parallel"), 2145 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2146 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2147 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2148 } 2149 2150 virtual void work(uint worker_id) { 2151 ResourceMark rm; 2152 HandleMark hm; 2153 CMTask* task = _cm->task(worker_id); 2154 G1CMIsAliveClosure g1_is_alive(_g1h); 2155 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2156 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2157 2158 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2159 } 2160 }; 2161 2162 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2163 assert(_workers != NULL, "Need parallel worker threads."); 2164 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2165 2166 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2167 2168 // We need to reset the concurrency level before each 2169 // proxy task execution, so that the termination protocol 2170 // and overflow handling in CMTask::do_marking_step() knows 2171 // how many workers to wait for. 2172 _cm->set_concurrency(_active_workers); 2173 _workers->run_task(&proc_task_proxy); 2174 } 2175 2176 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2177 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2178 EnqueueTask& _enq_task; 2179 2180 public: 2181 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2182 AbstractGangTask("Enqueue reference objects in parallel"), 2183 _enq_task(enq_task) { } 2184 2185 virtual void work(uint worker_id) { 2186 _enq_task.work(worker_id); 2187 } 2188 }; 2189 2190 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2191 assert(_workers != NULL, "Need parallel worker threads."); 2192 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2193 2194 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2195 2196 // Not strictly necessary but... 2197 // 2198 // We need to reset the concurrency level before each 2199 // proxy task execution, so that the termination protocol 2200 // and overflow handling in CMTask::do_marking_step() knows 2201 // how many workers to wait for. 2202 _cm->set_concurrency(_active_workers); 2203 _workers->run_task(&enq_task_proxy); 2204 } 2205 2206 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2207 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2208 } 2209 2210 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2211 if (has_overflown()) { 2212 // Skip processing the discovered references if we have 2213 // overflown the global marking stack. Reference objects 2214 // only get discovered once so it is OK to not 2215 // de-populate the discovered reference lists. We could have, 2216 // but the only benefit would be that, when marking restarts, 2217 // less reference objects are discovered. 2218 return; 2219 } 2220 2221 ResourceMark rm; 2222 HandleMark hm; 2223 2224 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2225 2226 // Is alive closure. 2227 G1CMIsAliveClosure g1_is_alive(g1h); 2228 2229 // Inner scope to exclude the cleaning of the string and symbol 2230 // tables from the displayed time. 2231 { 2232 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2233 2234 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2235 2236 // See the comment in G1CollectedHeap::ref_processing_init() 2237 // about how reference processing currently works in G1. 2238 2239 // Set the soft reference policy 2240 rp->setup_policy(clear_all_soft_refs); 2241 assert(_markStack.isEmpty(), "mark stack should be empty"); 2242 2243 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2244 // in serial reference processing. Note these closures are also 2245 // used for serially processing (by the the current thread) the 2246 // JNI references during parallel reference processing. 2247 // 2248 // These closures do not need to synchronize with the worker 2249 // threads involved in parallel reference processing as these 2250 // instances are executed serially by the current thread (e.g. 2251 // reference processing is not multi-threaded and is thus 2252 // performed by the current thread instead of a gang worker). 2253 // 2254 // The gang tasks involved in parallel reference processing create 2255 // their own instances of these closures, which do their own 2256 // synchronization among themselves. 2257 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2258 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2259 2260 // We need at least one active thread. If reference processing 2261 // is not multi-threaded we use the current (VMThread) thread, 2262 // otherwise we use the work gang from the G1CollectedHeap and 2263 // we utilize all the worker threads we can. 2264 bool processing_is_mt = rp->processing_is_mt(); 2265 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2266 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2267 2268 // Parallel processing task executor. 2269 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2270 g1h->workers(), active_workers); 2271 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2272 2273 // Set the concurrency level. The phase was already set prior to 2274 // executing the remark task. 2275 set_concurrency(active_workers); 2276 2277 // Set the degree of MT processing here. If the discovery was done MT, 2278 // the number of threads involved during discovery could differ from 2279 // the number of active workers. This is OK as long as the discovered 2280 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2281 rp->set_active_mt_degree(active_workers); 2282 2283 // Process the weak references. 2284 const ReferenceProcessorStats& stats = 2285 rp->process_discovered_references(&g1_is_alive, 2286 &g1_keep_alive, 2287 &g1_drain_mark_stack, 2288 executor, 2289 g1h->gc_timer_cm(), 2290 concurrent_gc_id()); 2291 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2292 2293 // The do_oop work routines of the keep_alive and drain_marking_stack 2294 // oop closures will set the has_overflown flag if we overflow the 2295 // global marking stack. 2296 2297 assert(_markStack.overflow() || _markStack.isEmpty(), 2298 "mark stack should be empty (unless it overflowed)"); 2299 2300 if (_markStack.overflow()) { 2301 // This should have been done already when we tried to push an 2302 // entry on to the global mark stack. But let's do it again. 2303 set_has_overflown(); 2304 } 2305 2306 assert(rp->num_q() == active_workers, "why not"); 2307 2308 rp->enqueue_discovered_references(executor); 2309 2310 rp->verify_no_references_recorded(); 2311 assert(!rp->discovery_enabled(), "Post condition"); 2312 } 2313 2314 if (has_overflown()) { 2315 // We can not trust g1_is_alive if the marking stack overflowed 2316 return; 2317 } 2318 2319 assert(_markStack.isEmpty(), "Marking should have completed"); 2320 2321 // Unload Klasses, String, Symbols, Code Cache, etc. 2322 { 2323 G1CMTraceTime trace("Unloading", G1Log::finer()); 2324 2325 if (ClassUnloadingWithConcurrentMark) { 2326 bool purged_classes; 2327 2328 { 2329 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2330 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2331 } 2332 2333 { 2334 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2335 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2336 } 2337 } 2338 2339 if (G1StringDedup::is_enabled()) { 2340 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2341 G1StringDedup::unlink(&g1_is_alive); 2342 } 2343 } 2344 } 2345 2346 void ConcurrentMark::swapMarkBitMaps() { 2347 CMBitMapRO* temp = _prevMarkBitMap; 2348 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2349 _nextMarkBitMap = (CMBitMap*) temp; 2350 } 2351 2352 // Closure for marking entries in SATB buffers. 2353 class CMSATBBufferClosure : public SATBBufferClosure { 2354 private: 2355 CMTask* _task; 2356 G1CollectedHeap* _g1h; 2357 2358 // This is very similar to CMTask::deal_with_reference, but with 2359 // more relaxed requirements for the argument, so this must be more 2360 // circumspect about treating the argument as an object. 2361 void do_entry(void* entry) const { 2362 _task->increment_refs_reached(); 2363 HeapRegion* hr = _g1h->heap_region_containing_raw(entry); 2364 if (entry < hr->next_top_at_mark_start()) { 2365 // Until we get here, we don't know whether entry refers to a valid 2366 // object; it could instead have been a stale reference. 2367 oop obj = static_cast<oop>(entry); 2368 assert(obj->is_oop(true /* ignore mark word */), 2369 err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj))); 2370 _task->make_reference_grey(obj, hr); 2371 } 2372 } 2373 2374 public: 2375 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2376 : _task(task), _g1h(g1h) { } 2377 2378 virtual void do_buffer(void** buffer, size_t size) { 2379 for (size_t i = 0; i < size; ++i) { 2380 do_entry(buffer[i]); 2381 } 2382 } 2383 }; 2384 2385 class G1RemarkThreadsClosure : public ThreadClosure { 2386 CMSATBBufferClosure _cm_satb_cl; 2387 G1CMOopClosure _cm_cl; 2388 MarkingCodeBlobClosure _code_cl; 2389 int _thread_parity; 2390 2391 public: 2392 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2393 _cm_satb_cl(task, g1h), 2394 _cm_cl(g1h, g1h->concurrent_mark(), task), 2395 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2396 _thread_parity(Threads::thread_claim_parity()) {} 2397 2398 void do_thread(Thread* thread) { 2399 if (thread->is_Java_thread()) { 2400 if (thread->claim_oops_do(true, _thread_parity)) { 2401 JavaThread* jt = (JavaThread*)thread; 2402 2403 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2404 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2405 // * Alive if on the stack of an executing method 2406 // * Weakly reachable otherwise 2407 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2408 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2409 jt->nmethods_do(&_code_cl); 2410 2411 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2412 } 2413 } else if (thread->is_VM_thread()) { 2414 if (thread->claim_oops_do(true, _thread_parity)) { 2415 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2416 } 2417 } 2418 } 2419 }; 2420 2421 class CMRemarkTask: public AbstractGangTask { 2422 private: 2423 ConcurrentMark* _cm; 2424 public: 2425 void work(uint worker_id) { 2426 // Since all available tasks are actually started, we should 2427 // only proceed if we're supposed to be active. 2428 if (worker_id < _cm->active_tasks()) { 2429 CMTask* task = _cm->task(worker_id); 2430 task->record_start_time(); 2431 { 2432 ResourceMark rm; 2433 HandleMark hm; 2434 2435 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2436 Threads::threads_do(&threads_f); 2437 } 2438 2439 do { 2440 task->do_marking_step(1000000000.0 /* something very large */, 2441 true /* do_termination */, 2442 false /* is_serial */); 2443 } while (task->has_aborted() && !_cm->has_overflown()); 2444 // If we overflow, then we do not want to restart. We instead 2445 // want to abort remark and do concurrent marking again. 2446 task->record_end_time(); 2447 } 2448 } 2449 2450 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2451 AbstractGangTask("Par Remark"), _cm(cm) { 2452 _cm->terminator()->reset_for_reuse(active_workers); 2453 } 2454 }; 2455 2456 void ConcurrentMark::checkpointRootsFinalWork() { 2457 ResourceMark rm; 2458 HandleMark hm; 2459 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2460 2461 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2462 2463 g1h->ensure_parsability(false); 2464 2465 // this is remark, so we'll use up all active threads 2466 uint active_workers = g1h->workers()->active_workers(); 2467 set_concurrency_and_phase(active_workers, false /* concurrent */); 2468 // Leave _parallel_marking_threads at it's 2469 // value originally calculated in the ConcurrentMark 2470 // constructor and pass values of the active workers 2471 // through the gang in the task. 2472 2473 { 2474 StrongRootsScope srs(active_workers); 2475 2476 CMRemarkTask remarkTask(this, active_workers); 2477 // We will start all available threads, even if we decide that the 2478 // active_workers will be fewer. The extra ones will just bail out 2479 // immediately. 2480 g1h->workers()->run_task(&remarkTask); 2481 } 2482 2483 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2484 guarantee(has_overflown() || 2485 satb_mq_set.completed_buffers_num() == 0, 2486 err_msg("Invariant: has_overflown = %s, num buffers = %d", 2487 BOOL_TO_STR(has_overflown()), 2488 satb_mq_set.completed_buffers_num())); 2489 2490 print_stats(); 2491 } 2492 2493 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2494 // Note we are overriding the read-only view of the prev map here, via 2495 // the cast. 2496 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2497 } 2498 2499 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2500 _nextMarkBitMap->clearRange(mr); 2501 } 2502 2503 HeapRegion* 2504 ConcurrentMark::claim_region(uint worker_id) { 2505 // "checkpoint" the finger 2506 HeapWord* finger = _finger; 2507 2508 // _heap_end will not change underneath our feet; it only changes at 2509 // yield points. 2510 while (finger < _heap_end) { 2511 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2512 2513 // Note on how this code handles humongous regions. In the 2514 // normal case the finger will reach the start of a "starts 2515 // humongous" (SH) region. Its end will either be the end of the 2516 // last "continues humongous" (CH) region in the sequence, or the 2517 // standard end of the SH region (if the SH is the only region in 2518 // the sequence). That way claim_region() will skip over the CH 2519 // regions. However, there is a subtle race between a CM thread 2520 // executing this method and a mutator thread doing a humongous 2521 // object allocation. The two are not mutually exclusive as the CM 2522 // thread does not need to hold the Heap_lock when it gets 2523 // here. So there is a chance that claim_region() will come across 2524 // a free region that's in the progress of becoming a SH or a CH 2525 // region. In the former case, it will either 2526 // a) Miss the update to the region's end, in which case it will 2527 // visit every subsequent CH region, will find their bitmaps 2528 // empty, and do nothing, or 2529 // b) Will observe the update of the region's end (in which case 2530 // it will skip the subsequent CH regions). 2531 // If it comes across a region that suddenly becomes CH, the 2532 // scenario will be similar to b). So, the race between 2533 // claim_region() and a humongous object allocation might force us 2534 // to do a bit of unnecessary work (due to some unnecessary bitmap 2535 // iterations) but it should not introduce and correctness issues. 2536 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2537 2538 // Above heap_region_containing_raw may return NULL as we always scan claim 2539 // until the end of the heap. In this case, just jump to the next region. 2540 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2541 2542 // Is the gap between reading the finger and doing the CAS too long? 2543 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2544 if (res == finger && curr_region != NULL) { 2545 // we succeeded 2546 HeapWord* bottom = curr_region->bottom(); 2547 HeapWord* limit = curr_region->next_top_at_mark_start(); 2548 2549 if (verbose_low()) { 2550 gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " " 2551 "[" PTR_FORMAT ", " PTR_FORMAT "), " 2552 "limit = " PTR_FORMAT, 2553 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit)); 2554 } 2555 2556 // notice that _finger == end cannot be guaranteed here since, 2557 // someone else might have moved the finger even further 2558 assert(_finger >= end, "the finger should have moved forward"); 2559 2560 if (verbose_low()) { 2561 gclog_or_tty->print_cr("[%u] we were successful with region = " 2562 PTR_FORMAT, worker_id, p2i(curr_region)); 2563 } 2564 2565 if (limit > bottom) { 2566 if (verbose_low()) { 2567 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, " 2568 "returning it ", worker_id, p2i(curr_region)); 2569 } 2570 return curr_region; 2571 } else { 2572 assert(limit == bottom, 2573 "the region limit should be at bottom"); 2574 if (verbose_low()) { 2575 gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, " 2576 "returning NULL", worker_id, p2i(curr_region)); 2577 } 2578 // we return NULL and the caller should try calling 2579 // claim_region() again. 2580 return NULL; 2581 } 2582 } else { 2583 assert(_finger > finger, "the finger should have moved forward"); 2584 if (verbose_low()) { 2585 if (curr_region == NULL) { 2586 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, " 2587 "global finger = " PTR_FORMAT ", " 2588 "our finger = " PTR_FORMAT, 2589 worker_id, p2i(_finger), p2i(finger)); 2590 } else { 2591 gclog_or_tty->print_cr("[%u] somebody else moved the finger, " 2592 "global finger = " PTR_FORMAT ", " 2593 "our finger = " PTR_FORMAT, 2594 worker_id, p2i(_finger), p2i(finger)); 2595 } 2596 } 2597 2598 // read it again 2599 finger = _finger; 2600 } 2601 } 2602 2603 return NULL; 2604 } 2605 2606 #ifndef PRODUCT 2607 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2608 private: 2609 G1CollectedHeap* _g1h; 2610 const char* _phase; 2611 int _info; 2612 2613 public: 2614 VerifyNoCSetOops(const char* phase, int info = -1) : 2615 _g1h(G1CollectedHeap::heap()), 2616 _phase(phase), 2617 _info(info) 2618 { } 2619 2620 void operator()(oop obj) const { 2621 guarantee(obj->is_oop(), 2622 err_msg("Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2623 p2i(obj), _phase, _info)); 2624 guarantee(!_g1h->obj_in_cs(obj), 2625 err_msg("obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2626 p2i(obj), _phase, _info)); 2627 } 2628 }; 2629 2630 void ConcurrentMark::verify_no_cset_oops() { 2631 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2632 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2633 return; 2634 } 2635 2636 // Verify entries on the global mark stack 2637 _markStack.iterate(VerifyNoCSetOops("Stack")); 2638 2639 // Verify entries on the task queues 2640 for (uint i = 0; i < _max_worker_id; ++i) { 2641 CMTaskQueue* queue = _task_queues->queue(i); 2642 queue->iterate(VerifyNoCSetOops("Queue", i)); 2643 } 2644 2645 // Verify the global finger 2646 HeapWord* global_finger = finger(); 2647 if (global_finger != NULL && global_finger < _heap_end) { 2648 // The global finger always points to a heap region boundary. We 2649 // use heap_region_containing_raw() to get the containing region 2650 // given that the global finger could be pointing to a free region 2651 // which subsequently becomes continues humongous. If that 2652 // happens, heap_region_containing() will return the bottom of the 2653 // corresponding starts humongous region and the check below will 2654 // not hold any more. 2655 // Since we always iterate over all regions, we might get a NULL HeapRegion 2656 // here. 2657 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2658 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2659 err_msg("global finger: " PTR_FORMAT " region: " HR_FORMAT, 2660 p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); 2661 } 2662 2663 // Verify the task fingers 2664 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2665 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2666 CMTask* task = _tasks[i]; 2667 HeapWord* task_finger = task->finger(); 2668 if (task_finger != NULL && task_finger < _heap_end) { 2669 // See above note on the global finger verification. 2670 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2671 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2672 !task_hr->in_collection_set(), 2673 err_msg("task finger: " PTR_FORMAT " region: " HR_FORMAT, 2674 p2i(task_finger), HR_FORMAT_PARAMS(task_hr))); 2675 } 2676 } 2677 } 2678 #endif // PRODUCT 2679 2680 // Aggregate the counting data that was constructed concurrently 2681 // with marking. 2682 class AggregateCountDataHRClosure: public HeapRegionClosure { 2683 G1CollectedHeap* _g1h; 2684 ConcurrentMark* _cm; 2685 CardTableModRefBS* _ct_bs; 2686 BitMap* _cm_card_bm; 2687 uint _max_worker_id; 2688 2689 public: 2690 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2691 BitMap* cm_card_bm, 2692 uint max_worker_id) : 2693 _g1h(g1h), _cm(g1h->concurrent_mark()), 2694 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2695 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2696 2697 bool doHeapRegion(HeapRegion* hr) { 2698 if (hr->is_continues_humongous()) { 2699 // We will ignore these here and process them when their 2700 // associated "starts humongous" region is processed. 2701 // Note that we cannot rely on their associated 2702 // "starts humongous" region to have their bit set to 1 2703 // since, due to the region chunking in the parallel region 2704 // iteration, a "continues humongous" region might be visited 2705 // before its associated "starts humongous". 2706 return false; 2707 } 2708 2709 HeapWord* start = hr->bottom(); 2710 HeapWord* limit = hr->next_top_at_mark_start(); 2711 HeapWord* end = hr->end(); 2712 2713 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2714 err_msg("Preconditions not met - " 2715 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2716 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2717 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); 2718 2719 assert(hr->next_marked_bytes() == 0, "Precondition"); 2720 2721 if (start == limit) { 2722 // NTAMS of this region has not been set so nothing to do. 2723 return false; 2724 } 2725 2726 // 'start' should be in the heap. 2727 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2728 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2729 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2730 2731 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2732 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2733 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2734 2735 // If ntams is not card aligned then we bump card bitmap index 2736 // for limit so that we get the all the cards spanned by 2737 // the object ending at ntams. 2738 // Note: if this is the last region in the heap then ntams 2739 // could be actually just beyond the end of the the heap; 2740 // limit_idx will then correspond to a (non-existent) card 2741 // that is also outside the heap. 2742 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2743 limit_idx += 1; 2744 } 2745 2746 assert(limit_idx <= end_idx, "or else use atomics"); 2747 2748 // Aggregate the "stripe" in the count data associated with hr. 2749 uint hrm_index = hr->hrm_index(); 2750 size_t marked_bytes = 0; 2751 2752 for (uint i = 0; i < _max_worker_id; i += 1) { 2753 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2754 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2755 2756 // Fetch the marked_bytes in this region for task i and 2757 // add it to the running total for this region. 2758 marked_bytes += marked_bytes_array[hrm_index]; 2759 2760 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2761 // into the global card bitmap. 2762 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2763 2764 while (scan_idx < limit_idx) { 2765 assert(task_card_bm->at(scan_idx) == true, "should be"); 2766 _cm_card_bm->set_bit(scan_idx); 2767 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2768 2769 // BitMap::get_next_one_offset() can handle the case when 2770 // its left_offset parameter is greater than its right_offset 2771 // parameter. It does, however, have an early exit if 2772 // left_offset == right_offset. So let's limit the value 2773 // passed in for left offset here. 2774 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2775 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2776 } 2777 } 2778 2779 // Update the marked bytes for this region. 2780 hr->add_to_marked_bytes(marked_bytes); 2781 2782 // Next heap region 2783 return false; 2784 } 2785 }; 2786 2787 class G1AggregateCountDataTask: public AbstractGangTask { 2788 protected: 2789 G1CollectedHeap* _g1h; 2790 ConcurrentMark* _cm; 2791 BitMap* _cm_card_bm; 2792 uint _max_worker_id; 2793 uint _active_workers; 2794 HeapRegionClaimer _hrclaimer; 2795 2796 public: 2797 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2798 ConcurrentMark* cm, 2799 BitMap* cm_card_bm, 2800 uint max_worker_id, 2801 uint n_workers) : 2802 AbstractGangTask("Count Aggregation"), 2803 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2804 _max_worker_id(max_worker_id), 2805 _active_workers(n_workers), 2806 _hrclaimer(_active_workers) { 2807 } 2808 2809 void work(uint worker_id) { 2810 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2811 2812 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2813 } 2814 }; 2815 2816 2817 void ConcurrentMark::aggregate_count_data() { 2818 uint n_workers = _g1h->workers()->active_workers(); 2819 2820 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2821 _max_worker_id, n_workers); 2822 2823 _g1h->workers()->run_task(&g1_par_agg_task); 2824 } 2825 2826 // Clear the per-worker arrays used to store the per-region counting data 2827 void ConcurrentMark::clear_all_count_data() { 2828 // Clear the global card bitmap - it will be filled during 2829 // liveness count aggregation (during remark) and the 2830 // final counting task. 2831 _card_bm.clear(); 2832 2833 // Clear the global region bitmap - it will be filled as part 2834 // of the final counting task. 2835 _region_bm.clear(); 2836 2837 uint max_regions = _g1h->max_regions(); 2838 assert(_max_worker_id > 0, "uninitialized"); 2839 2840 for (uint i = 0; i < _max_worker_id; i += 1) { 2841 BitMap* task_card_bm = count_card_bitmap_for(i); 2842 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2843 2844 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2845 assert(marked_bytes_array != NULL, "uninitialized"); 2846 2847 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2848 task_card_bm->clear(); 2849 } 2850 } 2851 2852 void ConcurrentMark::print_stats() { 2853 if (verbose_stats()) { 2854 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2855 for (size_t i = 0; i < _active_tasks; ++i) { 2856 _tasks[i]->print_stats(); 2857 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2858 } 2859 } 2860 } 2861 2862 // abandon current marking iteration due to a Full GC 2863 void ConcurrentMark::abort() { 2864 if (!cmThread()->during_cycle() || _has_aborted) { 2865 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2866 return; 2867 } 2868 2869 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2870 // concurrent bitmap clearing. 2871 _nextMarkBitMap->clearAll(); 2872 2873 // Note we cannot clear the previous marking bitmap here 2874 // since VerifyDuringGC verifies the objects marked during 2875 // a full GC against the previous bitmap. 2876 2877 // Clear the liveness counting data 2878 clear_all_count_data(); 2879 // Empty mark stack 2880 reset_marking_state(); 2881 for (uint i = 0; i < _max_worker_id; ++i) { 2882 _tasks[i]->clear_region_fields(); 2883 } 2884 _first_overflow_barrier_sync.abort(); 2885 _second_overflow_barrier_sync.abort(); 2886 _aborted_gc_id = _g1h->gc_tracer_cm()->gc_id(); 2887 assert(!_aborted_gc_id.is_undefined(), "ConcurrentMark::abort() executed more than once?"); 2888 _has_aborted = true; 2889 2890 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2891 satb_mq_set.abandon_partial_marking(); 2892 // This can be called either during or outside marking, we'll read 2893 // the expected_active value from the SATB queue set. 2894 satb_mq_set.set_active_all_threads( 2895 false, /* new active value */ 2896 satb_mq_set.is_active() /* expected_active */); 2897 2898 _g1h->trace_heap_after_concurrent_cycle(); 2899 _g1h->register_concurrent_cycle_end(); 2900 } 2901 2902 const GCId& ConcurrentMark::concurrent_gc_id() { 2903 if (has_aborted()) { 2904 return _aborted_gc_id; 2905 } 2906 return _g1h->gc_tracer_cm()->gc_id(); 2907 } 2908 2909 static void print_ms_time_info(const char* prefix, const char* name, 2910 NumberSeq& ns) { 2911 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2912 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2913 if (ns.num() > 0) { 2914 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2915 prefix, ns.sd(), ns.maximum()); 2916 } 2917 } 2918 2919 void ConcurrentMark::print_summary_info() { 2920 gclog_or_tty->print_cr(" Concurrent marking:"); 2921 print_ms_time_info(" ", "init marks", _init_times); 2922 print_ms_time_info(" ", "remarks", _remark_times); 2923 { 2924 print_ms_time_info(" ", "final marks", _remark_mark_times); 2925 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2926 2927 } 2928 print_ms_time_info(" ", "cleanups", _cleanup_times); 2929 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2930 _total_counting_time, 2931 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2932 (double)_cleanup_times.num() 2933 : 0.0)); 2934 if (G1ScrubRemSets) { 2935 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2936 _total_rs_scrub_time, 2937 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2938 (double)_cleanup_times.num() 2939 : 0.0)); 2940 } 2941 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2942 (_init_times.sum() + _remark_times.sum() + 2943 _cleanup_times.sum())/1000.0); 2944 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2945 "(%8.2f s marking).", 2946 cmThread()->vtime_accum(), 2947 cmThread()->vtime_mark_accum()); 2948 } 2949 2950 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2951 _parallel_workers->print_worker_threads_on(st); 2952 } 2953 2954 void ConcurrentMark::print_on_error(outputStream* st) const { 2955 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2956 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2957 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2958 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2959 } 2960 2961 // We take a break if someone is trying to stop the world. 2962 bool ConcurrentMark::do_yield_check(uint worker_id) { 2963 if (SuspendibleThreadSet::should_yield()) { 2964 if (worker_id == 0) { 2965 _g1h->g1_policy()->record_concurrent_pause(); 2966 } 2967 SuspendibleThreadSet::yield(); 2968 return true; 2969 } else { 2970 return false; 2971 } 2972 } 2973 2974 #ifndef PRODUCT 2975 // for debugging purposes 2976 void ConcurrentMark::print_finger() { 2977 gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT, 2978 p2i(_heap_start), p2i(_heap_end), p2i(_finger)); 2979 for (uint i = 0; i < _max_worker_id; ++i) { 2980 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger())); 2981 } 2982 gclog_or_tty->cr(); 2983 } 2984 #endif 2985 2986 // Closure for iteration over bitmaps 2987 class CMBitMapClosure : public BitMapClosure { 2988 private: 2989 // the bitmap that is being iterated over 2990 CMBitMap* _nextMarkBitMap; 2991 ConcurrentMark* _cm; 2992 CMTask* _task; 2993 2994 public: 2995 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2996 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2997 2998 bool do_bit(size_t offset) { 2999 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3000 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3001 assert( addr < _cm->finger(), "invariant"); 3002 3003 statsOnly( _task->increase_objs_found_on_bitmap() ); 3004 assert(addr >= _task->finger(), "invariant"); 3005 3006 // We move that task's local finger along. 3007 _task->move_finger_to(addr); 3008 3009 _task->scan_object(oop(addr)); 3010 // we only partially drain the local queue and global stack 3011 _task->drain_local_queue(true); 3012 _task->drain_global_stack(true); 3013 3014 // if the has_aborted flag has been raised, we need to bail out of 3015 // the iteration 3016 return !_task->has_aborted(); 3017 } 3018 }; 3019 3020 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3021 ConcurrentMark* cm, 3022 CMTask* task) 3023 : _g1h(g1h), _cm(cm), _task(task) { 3024 assert(_ref_processor == NULL, "should be initialized to NULL"); 3025 3026 if (G1UseConcMarkReferenceProcessing) { 3027 _ref_processor = g1h->ref_processor_cm(); 3028 assert(_ref_processor != NULL, "should not be NULL"); 3029 } 3030 } 3031 3032 void CMTask::setup_for_region(HeapRegion* hr) { 3033 assert(hr != NULL, 3034 "claim_region() should have filtered out NULL regions"); 3035 assert(!hr->is_continues_humongous(), 3036 "claim_region() should have filtered out continues humongous regions"); 3037 3038 if (_cm->verbose_low()) { 3039 gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT, 3040 _worker_id, p2i(hr)); 3041 } 3042 3043 _curr_region = hr; 3044 _finger = hr->bottom(); 3045 update_region_limit(); 3046 } 3047 3048 void CMTask::update_region_limit() { 3049 HeapRegion* hr = _curr_region; 3050 HeapWord* bottom = hr->bottom(); 3051 HeapWord* limit = hr->next_top_at_mark_start(); 3052 3053 if (limit == bottom) { 3054 if (_cm->verbose_low()) { 3055 gclog_or_tty->print_cr("[%u] found an empty region " 3056 "[" PTR_FORMAT ", " PTR_FORMAT ")", 3057 _worker_id, p2i(bottom), p2i(limit)); 3058 } 3059 // The region was collected underneath our feet. 3060 // We set the finger to bottom to ensure that the bitmap 3061 // iteration that will follow this will not do anything. 3062 // (this is not a condition that holds when we set the region up, 3063 // as the region is not supposed to be empty in the first place) 3064 _finger = bottom; 3065 } else if (limit >= _region_limit) { 3066 assert(limit >= _finger, "peace of mind"); 3067 } else { 3068 assert(limit < _region_limit, "only way to get here"); 3069 // This can happen under some pretty unusual circumstances. An 3070 // evacuation pause empties the region underneath our feet (NTAMS 3071 // at bottom). We then do some allocation in the region (NTAMS 3072 // stays at bottom), followed by the region being used as a GC 3073 // alloc region (NTAMS will move to top() and the objects 3074 // originally below it will be grayed). All objects now marked in 3075 // the region are explicitly grayed, if below the global finger, 3076 // and we do not need in fact to scan anything else. So, we simply 3077 // set _finger to be limit to ensure that the bitmap iteration 3078 // doesn't do anything. 3079 _finger = limit; 3080 } 3081 3082 _region_limit = limit; 3083 } 3084 3085 void CMTask::giveup_current_region() { 3086 assert(_curr_region != NULL, "invariant"); 3087 if (_cm->verbose_low()) { 3088 gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT, 3089 _worker_id, p2i(_curr_region)); 3090 } 3091 clear_region_fields(); 3092 } 3093 3094 void CMTask::clear_region_fields() { 3095 // Values for these three fields that indicate that we're not 3096 // holding on to a region. 3097 _curr_region = NULL; 3098 _finger = NULL; 3099 _region_limit = NULL; 3100 } 3101 3102 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3103 if (cm_oop_closure == NULL) { 3104 assert(_cm_oop_closure != NULL, "invariant"); 3105 } else { 3106 assert(_cm_oop_closure == NULL, "invariant"); 3107 } 3108 _cm_oop_closure = cm_oop_closure; 3109 } 3110 3111 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3112 guarantee(nextMarkBitMap != NULL, "invariant"); 3113 3114 if (_cm->verbose_low()) { 3115 gclog_or_tty->print_cr("[%u] resetting", _worker_id); 3116 } 3117 3118 _nextMarkBitMap = nextMarkBitMap; 3119 clear_region_fields(); 3120 3121 _calls = 0; 3122 _elapsed_time_ms = 0.0; 3123 _termination_time_ms = 0.0; 3124 _termination_start_time_ms = 0.0; 3125 3126 #if _MARKING_STATS_ 3127 _aborted = 0; 3128 _aborted_overflow = 0; 3129 _aborted_cm_aborted = 0; 3130 _aborted_yield = 0; 3131 _aborted_timed_out = 0; 3132 _aborted_satb = 0; 3133 _aborted_termination = 0; 3134 _steal_attempts = 0; 3135 _steals = 0; 3136 _local_pushes = 0; 3137 _local_pops = 0; 3138 _local_max_size = 0; 3139 _objs_scanned = 0; 3140 _global_pushes = 0; 3141 _global_pops = 0; 3142 _global_max_size = 0; 3143 _global_transfers_to = 0; 3144 _global_transfers_from = 0; 3145 _regions_claimed = 0; 3146 _objs_found_on_bitmap = 0; 3147 _satb_buffers_processed = 0; 3148 #endif // _MARKING_STATS_ 3149 } 3150 3151 bool CMTask::should_exit_termination() { 3152 regular_clock_call(); 3153 // This is called when we are in the termination protocol. We should 3154 // quit if, for some reason, this task wants to abort or the global 3155 // stack is not empty (this means that we can get work from it). 3156 return !_cm->mark_stack_empty() || has_aborted(); 3157 } 3158 3159 void CMTask::reached_limit() { 3160 assert(_words_scanned >= _words_scanned_limit || 3161 _refs_reached >= _refs_reached_limit , 3162 "shouldn't have been called otherwise"); 3163 regular_clock_call(); 3164 } 3165 3166 void CMTask::regular_clock_call() { 3167 if (has_aborted()) return; 3168 3169 // First, we need to recalculate the words scanned and refs reached 3170 // limits for the next clock call. 3171 recalculate_limits(); 3172 3173 // During the regular clock call we do the following 3174 3175 // (1) If an overflow has been flagged, then we abort. 3176 if (_cm->has_overflown()) { 3177 set_has_aborted(); 3178 return; 3179 } 3180 3181 // If we are not concurrent (i.e. we're doing remark) we don't need 3182 // to check anything else. The other steps are only needed during 3183 // the concurrent marking phase. 3184 if (!concurrent()) return; 3185 3186 // (2) If marking has been aborted for Full GC, then we also abort. 3187 if (_cm->has_aborted()) { 3188 set_has_aborted(); 3189 statsOnly( ++_aborted_cm_aborted ); 3190 return; 3191 } 3192 3193 double curr_time_ms = os::elapsedVTime() * 1000.0; 3194 3195 // (3) If marking stats are enabled, then we update the step history. 3196 #if _MARKING_STATS_ 3197 if (_words_scanned >= _words_scanned_limit) { 3198 ++_clock_due_to_scanning; 3199 } 3200 if (_refs_reached >= _refs_reached_limit) { 3201 ++_clock_due_to_marking; 3202 } 3203 3204 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3205 _interval_start_time_ms = curr_time_ms; 3206 _all_clock_intervals_ms.add(last_interval_ms); 3207 3208 if (_cm->verbose_medium()) { 3209 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, " 3210 "scanned = " SIZE_FORMAT "%s, refs reached = " SIZE_FORMAT "%s", 3211 _worker_id, last_interval_ms, 3212 _words_scanned, 3213 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3214 _refs_reached, 3215 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3216 } 3217 #endif // _MARKING_STATS_ 3218 3219 // (4) We check whether we should yield. If we have to, then we abort. 3220 if (SuspendibleThreadSet::should_yield()) { 3221 // We should yield. To do this we abort the task. The caller is 3222 // responsible for yielding. 3223 set_has_aborted(); 3224 statsOnly( ++_aborted_yield ); 3225 return; 3226 } 3227 3228 // (5) We check whether we've reached our time quota. If we have, 3229 // then we abort. 3230 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3231 if (elapsed_time_ms > _time_target_ms) { 3232 set_has_aborted(); 3233 _has_timed_out = true; 3234 statsOnly( ++_aborted_timed_out ); 3235 return; 3236 } 3237 3238 // (6) Finally, we check whether there are enough completed STAB 3239 // buffers available for processing. If there are, we abort. 3240 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3241 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3242 if (_cm->verbose_low()) { 3243 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers", 3244 _worker_id); 3245 } 3246 // we do need to process SATB buffers, we'll abort and restart 3247 // the marking task to do so 3248 set_has_aborted(); 3249 statsOnly( ++_aborted_satb ); 3250 return; 3251 } 3252 } 3253 3254 void CMTask::recalculate_limits() { 3255 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3256 _words_scanned_limit = _real_words_scanned_limit; 3257 3258 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3259 _refs_reached_limit = _real_refs_reached_limit; 3260 } 3261 3262 void CMTask::decrease_limits() { 3263 // This is called when we believe that we're going to do an infrequent 3264 // operation which will increase the per byte scanned cost (i.e. move 3265 // entries to/from the global stack). It basically tries to decrease the 3266 // scanning limit so that the clock is called earlier. 3267 3268 if (_cm->verbose_medium()) { 3269 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id); 3270 } 3271 3272 _words_scanned_limit = _real_words_scanned_limit - 3273 3 * words_scanned_period / 4; 3274 _refs_reached_limit = _real_refs_reached_limit - 3275 3 * refs_reached_period / 4; 3276 } 3277 3278 void CMTask::move_entries_to_global_stack() { 3279 // local array where we'll store the entries that will be popped 3280 // from the local queue 3281 oop buffer[global_stack_transfer_size]; 3282 3283 int n = 0; 3284 oop obj; 3285 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3286 buffer[n] = obj; 3287 ++n; 3288 } 3289 3290 if (n > 0) { 3291 // we popped at least one entry from the local queue 3292 3293 statsOnly( ++_global_transfers_to; _local_pops += n ); 3294 3295 if (!_cm->mark_stack_push(buffer, n)) { 3296 if (_cm->verbose_low()) { 3297 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow", 3298 _worker_id); 3299 } 3300 set_has_aborted(); 3301 } else { 3302 // the transfer was successful 3303 3304 if (_cm->verbose_medium()) { 3305 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack", 3306 _worker_id, n); 3307 } 3308 statsOnly( size_t tmp_size = _cm->mark_stack_size(); 3309 if (tmp_size > _global_max_size) { 3310 _global_max_size = tmp_size; 3311 } 3312 _global_pushes += n ); 3313 } 3314 } 3315 3316 // this operation was quite expensive, so decrease the limits 3317 decrease_limits(); 3318 } 3319 3320 void CMTask::get_entries_from_global_stack() { 3321 // local array where we'll store the entries that will be popped 3322 // from the global stack. 3323 oop buffer[global_stack_transfer_size]; 3324 int n; 3325 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3326 assert(n <= global_stack_transfer_size, 3327 "we should not pop more than the given limit"); 3328 if (n > 0) { 3329 // yes, we did actually pop at least one entry 3330 3331 statsOnly( ++_global_transfers_from; _global_pops += n ); 3332 if (_cm->verbose_medium()) { 3333 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack", 3334 _worker_id, n); 3335 } 3336 for (int i = 0; i < n; ++i) { 3337 bool success = _task_queue->push(buffer[i]); 3338 // We only call this when the local queue is empty or under a 3339 // given target limit. So, we do not expect this push to fail. 3340 assert(success, "invariant"); 3341 } 3342 3343 statsOnly( size_t tmp_size = (size_t)_task_queue->size(); 3344 if (tmp_size > _local_max_size) { 3345 _local_max_size = tmp_size; 3346 } 3347 _local_pushes += n ); 3348 } 3349 3350 // this operation was quite expensive, so decrease the limits 3351 decrease_limits(); 3352 } 3353 3354 void CMTask::drain_local_queue(bool partially) { 3355 if (has_aborted()) return; 3356 3357 // Decide what the target size is, depending whether we're going to 3358 // drain it partially (so that other tasks can steal if they run out 3359 // of things to do) or totally (at the very end). 3360 size_t target_size; 3361 if (partially) { 3362 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3363 } else { 3364 target_size = 0; 3365 } 3366 3367 if (_task_queue->size() > target_size) { 3368 if (_cm->verbose_high()) { 3369 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT, 3370 _worker_id, target_size); 3371 } 3372 3373 oop obj; 3374 bool ret = _task_queue->pop_local(obj); 3375 while (ret) { 3376 statsOnly( ++_local_pops ); 3377 3378 if (_cm->verbose_high()) { 3379 gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id, 3380 p2i((void*) obj)); 3381 } 3382 3383 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3384 assert(!_g1h->is_on_master_free_list( 3385 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3386 3387 scan_object(obj); 3388 3389 if (_task_queue->size() <= target_size || has_aborted()) { 3390 ret = false; 3391 } else { 3392 ret = _task_queue->pop_local(obj); 3393 } 3394 } 3395 3396 if (_cm->verbose_high()) { 3397 gclog_or_tty->print_cr("[%u] drained local queue, size = %u", 3398 _worker_id, _task_queue->size()); 3399 } 3400 } 3401 } 3402 3403 void CMTask::drain_global_stack(bool partially) { 3404 if (has_aborted()) return; 3405 3406 // We have a policy to drain the local queue before we attempt to 3407 // drain the global stack. 3408 assert(partially || _task_queue->size() == 0, "invariant"); 3409 3410 // Decide what the target size is, depending whether we're going to 3411 // drain it partially (so that other tasks can steal if they run out 3412 // of things to do) or totally (at the very end). Notice that, 3413 // because we move entries from the global stack in chunks or 3414 // because another task might be doing the same, we might in fact 3415 // drop below the target. But, this is not a problem. 3416 size_t target_size; 3417 if (partially) { 3418 target_size = _cm->partial_mark_stack_size_target(); 3419 } else { 3420 target_size = 0; 3421 } 3422 3423 if (_cm->mark_stack_size() > target_size) { 3424 if (_cm->verbose_low()) { 3425 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT, 3426 _worker_id, target_size); 3427 } 3428 3429 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3430 get_entries_from_global_stack(); 3431 drain_local_queue(partially); 3432 } 3433 3434 if (_cm->verbose_low()) { 3435 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT, 3436 _worker_id, _cm->mark_stack_size()); 3437 } 3438 } 3439 } 3440 3441 // SATB Queue has several assumptions on whether to call the par or 3442 // non-par versions of the methods. this is why some of the code is 3443 // replicated. We should really get rid of the single-threaded version 3444 // of the code to simplify things. 3445 void CMTask::drain_satb_buffers() { 3446 if (has_aborted()) return; 3447 3448 // We set this so that the regular clock knows that we're in the 3449 // middle of draining buffers and doesn't set the abort flag when it 3450 // notices that SATB buffers are available for draining. It'd be 3451 // very counter productive if it did that. :-) 3452 _draining_satb_buffers = true; 3453 3454 CMSATBBufferClosure satb_cl(this, _g1h); 3455 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3456 3457 // This keeps claiming and applying the closure to completed buffers 3458 // until we run out of buffers or we need to abort. 3459 while (!has_aborted() && 3460 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3461 if (_cm->verbose_medium()) { 3462 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id); 3463 } 3464 statsOnly( ++_satb_buffers_processed ); 3465 regular_clock_call(); 3466 } 3467 3468 _draining_satb_buffers = false; 3469 3470 assert(has_aborted() || 3471 concurrent() || 3472 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3473 3474 // again, this was a potentially expensive operation, decrease the 3475 // limits to get the regular clock call early 3476 decrease_limits(); 3477 } 3478 3479 void CMTask::print_stats() { 3480 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3481 _worker_id, _calls); 3482 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3483 _elapsed_time_ms, _termination_time_ms); 3484 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3485 _step_times_ms.num(), _step_times_ms.avg(), 3486 _step_times_ms.sd()); 3487 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3488 _step_times_ms.maximum(), _step_times_ms.sum()); 3489 3490 #if _MARKING_STATS_ 3491 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3492 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3493 _all_clock_intervals_ms.sd()); 3494 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3495 _all_clock_intervals_ms.maximum(), 3496 _all_clock_intervals_ms.sum()); 3497 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT, 3498 _clock_due_to_scanning, _clock_due_to_marking); 3499 gclog_or_tty->print_cr(" Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT, 3500 _objs_scanned, _objs_found_on_bitmap); 3501 gclog_or_tty->print_cr(" Local Queue: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3502 _local_pushes, _local_pops, _local_max_size); 3503 gclog_or_tty->print_cr(" Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT, 3504 _global_pushes, _global_pops, _global_max_size); 3505 gclog_or_tty->print_cr(" transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT, 3506 _global_transfers_to,_global_transfers_from); 3507 gclog_or_tty->print_cr(" Regions: claimed = " SIZE_FORMAT, _regions_claimed); 3508 gclog_or_tty->print_cr(" SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed); 3509 gclog_or_tty->print_cr(" Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT, 3510 _steal_attempts, _steals); 3511 gclog_or_tty->print_cr(" Aborted: " SIZE_FORMAT ", due to", _aborted); 3512 gclog_or_tty->print_cr(" overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT, 3513 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3514 gclog_or_tty->print_cr(" time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT, 3515 _aborted_timed_out, _aborted_satb, _aborted_termination); 3516 #endif // _MARKING_STATS_ 3517 } 3518 3519 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3520 return _task_queues->steal(worker_id, hash_seed, obj); 3521 } 3522 3523 /***************************************************************************** 3524 3525 The do_marking_step(time_target_ms, ...) method is the building 3526 block of the parallel marking framework. It can be called in parallel 3527 with other invocations of do_marking_step() on different tasks 3528 (but only one per task, obviously) and concurrently with the 3529 mutator threads, or during remark, hence it eliminates the need 3530 for two versions of the code. When called during remark, it will 3531 pick up from where the task left off during the concurrent marking 3532 phase. Interestingly, tasks are also claimable during evacuation 3533 pauses too, since do_marking_step() ensures that it aborts before 3534 it needs to yield. 3535 3536 The data structures that it uses to do marking work are the 3537 following: 3538 3539 (1) Marking Bitmap. If there are gray objects that appear only 3540 on the bitmap (this happens either when dealing with an overflow 3541 or when the initial marking phase has simply marked the roots 3542 and didn't push them on the stack), then tasks claim heap 3543 regions whose bitmap they then scan to find gray objects. A 3544 global finger indicates where the end of the last claimed region 3545 is. A local finger indicates how far into the region a task has 3546 scanned. The two fingers are used to determine how to gray an 3547 object (i.e. whether simply marking it is OK, as it will be 3548 visited by a task in the future, or whether it needs to be also 3549 pushed on a stack). 3550 3551 (2) Local Queue. The local queue of the task which is accessed 3552 reasonably efficiently by the task. Other tasks can steal from 3553 it when they run out of work. Throughout the marking phase, a 3554 task attempts to keep its local queue short but not totally 3555 empty, so that entries are available for stealing by other 3556 tasks. Only when there is no more work, a task will totally 3557 drain its local queue. 3558 3559 (3) Global Mark Stack. This handles local queue overflow. During 3560 marking only sets of entries are moved between it and the local 3561 queues, as access to it requires a mutex and more fine-grain 3562 interaction with it which might cause contention. If it 3563 overflows, then the marking phase should restart and iterate 3564 over the bitmap to identify gray objects. Throughout the marking 3565 phase, tasks attempt to keep the global mark stack at a small 3566 length but not totally empty, so that entries are available for 3567 popping by other tasks. Only when there is no more work, tasks 3568 will totally drain the global mark stack. 3569 3570 (4) SATB Buffer Queue. This is where completed SATB buffers are 3571 made available. Buffers are regularly removed from this queue 3572 and scanned for roots, so that the queue doesn't get too 3573 long. During remark, all completed buffers are processed, as 3574 well as the filled in parts of any uncompleted buffers. 3575 3576 The do_marking_step() method tries to abort when the time target 3577 has been reached. There are a few other cases when the 3578 do_marking_step() method also aborts: 3579 3580 (1) When the marking phase has been aborted (after a Full GC). 3581 3582 (2) When a global overflow (on the global stack) has been 3583 triggered. Before the task aborts, it will actually sync up with 3584 the other tasks to ensure that all the marking data structures 3585 (local queues, stacks, fingers etc.) are re-initialized so that 3586 when do_marking_step() completes, the marking phase can 3587 immediately restart. 3588 3589 (3) When enough completed SATB buffers are available. The 3590 do_marking_step() method only tries to drain SATB buffers right 3591 at the beginning. So, if enough buffers are available, the 3592 marking step aborts and the SATB buffers are processed at 3593 the beginning of the next invocation. 3594 3595 (4) To yield. when we have to yield then we abort and yield 3596 right at the end of do_marking_step(). This saves us from a lot 3597 of hassle as, by yielding we might allow a Full GC. If this 3598 happens then objects will be compacted underneath our feet, the 3599 heap might shrink, etc. We save checking for this by just 3600 aborting and doing the yield right at the end. 3601 3602 From the above it follows that the do_marking_step() method should 3603 be called in a loop (or, otherwise, regularly) until it completes. 3604 3605 If a marking step completes without its has_aborted() flag being 3606 true, it means it has completed the current marking phase (and 3607 also all other marking tasks have done so and have all synced up). 3608 3609 A method called regular_clock_call() is invoked "regularly" (in 3610 sub ms intervals) throughout marking. It is this clock method that 3611 checks all the abort conditions which were mentioned above and 3612 decides when the task should abort. A work-based scheme is used to 3613 trigger this clock method: when the number of object words the 3614 marking phase has scanned or the number of references the marking 3615 phase has visited reach a given limit. Additional invocations to 3616 the method clock have been planted in a few other strategic places 3617 too. The initial reason for the clock method was to avoid calling 3618 vtime too regularly, as it is quite expensive. So, once it was in 3619 place, it was natural to piggy-back all the other conditions on it 3620 too and not constantly check them throughout the code. 3621 3622 If do_termination is true then do_marking_step will enter its 3623 termination protocol. 3624 3625 The value of is_serial must be true when do_marking_step is being 3626 called serially (i.e. by the VMThread) and do_marking_step should 3627 skip any synchronization in the termination and overflow code. 3628 Examples include the serial remark code and the serial reference 3629 processing closures. 3630 3631 The value of is_serial must be false when do_marking_step is 3632 being called by any of the worker threads in a work gang. 3633 Examples include the concurrent marking code (CMMarkingTask), 3634 the MT remark code, and the MT reference processing closures. 3635 3636 *****************************************************************************/ 3637 3638 void CMTask::do_marking_step(double time_target_ms, 3639 bool do_termination, 3640 bool is_serial) { 3641 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3642 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3643 3644 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3645 assert(_task_queues != NULL, "invariant"); 3646 assert(_task_queue != NULL, "invariant"); 3647 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3648 3649 assert(!_claimed, 3650 "only one thread should claim this task at any one time"); 3651 3652 // OK, this doesn't safeguard again all possible scenarios, as it is 3653 // possible for two threads to set the _claimed flag at the same 3654 // time. But it is only for debugging purposes anyway and it will 3655 // catch most problems. 3656 _claimed = true; 3657 3658 _start_time_ms = os::elapsedVTime() * 1000.0; 3659 statsOnly( _interval_start_time_ms = _start_time_ms ); 3660 3661 // If do_stealing is true then do_marking_step will attempt to 3662 // steal work from the other CMTasks. It only makes sense to 3663 // enable stealing when the termination protocol is enabled 3664 // and do_marking_step() is not being called serially. 3665 bool do_stealing = do_termination && !is_serial; 3666 3667 double diff_prediction_ms = 3668 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3669 _time_target_ms = time_target_ms - diff_prediction_ms; 3670 3671 // set up the variables that are used in the work-based scheme to 3672 // call the regular clock method 3673 _words_scanned = 0; 3674 _refs_reached = 0; 3675 recalculate_limits(); 3676 3677 // clear all flags 3678 clear_has_aborted(); 3679 _has_timed_out = false; 3680 _draining_satb_buffers = false; 3681 3682 ++_calls; 3683 3684 if (_cm->verbose_low()) { 3685 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, " 3686 "target = %1.2lfms >>>>>>>>>>", 3687 _worker_id, _calls, _time_target_ms); 3688 } 3689 3690 // Set up the bitmap and oop closures. Anything that uses them is 3691 // eventually called from this method, so it is OK to allocate these 3692 // statically. 3693 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3694 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3695 set_cm_oop_closure(&cm_oop_closure); 3696 3697 if (_cm->has_overflown()) { 3698 // This can happen if the mark stack overflows during a GC pause 3699 // and this task, after a yield point, restarts. We have to abort 3700 // as we need to get into the overflow protocol which happens 3701 // right at the end of this task. 3702 set_has_aborted(); 3703 } 3704 3705 // First drain any available SATB buffers. After this, we will not 3706 // look at SATB buffers before the next invocation of this method. 3707 // If enough completed SATB buffers are queued up, the regular clock 3708 // will abort this task so that it restarts. 3709 drain_satb_buffers(); 3710 // ...then partially drain the local queue and the global stack 3711 drain_local_queue(true); 3712 drain_global_stack(true); 3713 3714 do { 3715 if (!has_aborted() && _curr_region != NULL) { 3716 // This means that we're already holding on to a region. 3717 assert(_finger != NULL, "if region is not NULL, then the finger " 3718 "should not be NULL either"); 3719 3720 // We might have restarted this task after an evacuation pause 3721 // which might have evacuated the region we're holding on to 3722 // underneath our feet. Let's read its limit again to make sure 3723 // that we do not iterate over a region of the heap that 3724 // contains garbage (update_region_limit() will also move 3725 // _finger to the start of the region if it is found empty). 3726 update_region_limit(); 3727 // We will start from _finger not from the start of the region, 3728 // as we might be restarting this task after aborting half-way 3729 // through scanning this region. In this case, _finger points to 3730 // the address where we last found a marked object. If this is a 3731 // fresh region, _finger points to start(). 3732 MemRegion mr = MemRegion(_finger, _region_limit); 3733 3734 if (_cm->verbose_low()) { 3735 gclog_or_tty->print_cr("[%u] we're scanning part " 3736 "[" PTR_FORMAT ", " PTR_FORMAT ") " 3737 "of region " HR_FORMAT, 3738 _worker_id, p2i(_finger), p2i(_region_limit), 3739 HR_FORMAT_PARAMS(_curr_region)); 3740 } 3741 3742 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3743 "humongous regions should go around loop once only"); 3744 3745 // Some special cases: 3746 // If the memory region is empty, we can just give up the region. 3747 // If the current region is humongous then we only need to check 3748 // the bitmap for the bit associated with the start of the object, 3749 // scan the object if it's live, and give up the region. 3750 // Otherwise, let's iterate over the bitmap of the part of the region 3751 // that is left. 3752 // If the iteration is successful, give up the region. 3753 if (mr.is_empty()) { 3754 giveup_current_region(); 3755 regular_clock_call(); 3756 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3757 if (_nextMarkBitMap->isMarked(mr.start())) { 3758 // The object is marked - apply the closure 3759 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3760 bitmap_closure.do_bit(offset); 3761 } 3762 // Even if this task aborted while scanning the humongous object 3763 // we can (and should) give up the current region. 3764 giveup_current_region(); 3765 regular_clock_call(); 3766 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3767 giveup_current_region(); 3768 regular_clock_call(); 3769 } else { 3770 assert(has_aborted(), "currently the only way to do so"); 3771 // The only way to abort the bitmap iteration is to return 3772 // false from the do_bit() method. However, inside the 3773 // do_bit() method we move the _finger to point to the 3774 // object currently being looked at. So, if we bail out, we 3775 // have definitely set _finger to something non-null. 3776 assert(_finger != NULL, "invariant"); 3777 3778 // Region iteration was actually aborted. So now _finger 3779 // points to the address of the object we last scanned. If we 3780 // leave it there, when we restart this task, we will rescan 3781 // the object. It is easy to avoid this. We move the finger by 3782 // enough to point to the next possible object header (the 3783 // bitmap knows by how much we need to move it as it knows its 3784 // granularity). 3785 assert(_finger < _region_limit, "invariant"); 3786 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3787 // Check if bitmap iteration was aborted while scanning the last object 3788 if (new_finger >= _region_limit) { 3789 giveup_current_region(); 3790 } else { 3791 move_finger_to(new_finger); 3792 } 3793 } 3794 } 3795 // At this point we have either completed iterating over the 3796 // region we were holding on to, or we have aborted. 3797 3798 // We then partially drain the local queue and the global stack. 3799 // (Do we really need this?) 3800 drain_local_queue(true); 3801 drain_global_stack(true); 3802 3803 // Read the note on the claim_region() method on why it might 3804 // return NULL with potentially more regions available for 3805 // claiming and why we have to check out_of_regions() to determine 3806 // whether we're done or not. 3807 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3808 // We are going to try to claim a new region. We should have 3809 // given up on the previous one. 3810 // Separated the asserts so that we know which one fires. 3811 assert(_curr_region == NULL, "invariant"); 3812 assert(_finger == NULL, "invariant"); 3813 assert(_region_limit == NULL, "invariant"); 3814 if (_cm->verbose_low()) { 3815 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id); 3816 } 3817 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3818 if (claimed_region != NULL) { 3819 // Yes, we managed to claim one 3820 statsOnly( ++_regions_claimed ); 3821 3822 if (_cm->verbose_low()) { 3823 gclog_or_tty->print_cr("[%u] we successfully claimed " 3824 "region " PTR_FORMAT, 3825 _worker_id, p2i(claimed_region)); 3826 } 3827 3828 setup_for_region(claimed_region); 3829 assert(_curr_region == claimed_region, "invariant"); 3830 } 3831 // It is important to call the regular clock here. It might take 3832 // a while to claim a region if, for example, we hit a large 3833 // block of empty regions. So we need to call the regular clock 3834 // method once round the loop to make sure it's called 3835 // frequently enough. 3836 regular_clock_call(); 3837 } 3838 3839 if (!has_aborted() && _curr_region == NULL) { 3840 assert(_cm->out_of_regions(), 3841 "at this point we should be out of regions"); 3842 } 3843 } while ( _curr_region != NULL && !has_aborted()); 3844 3845 if (!has_aborted()) { 3846 // We cannot check whether the global stack is empty, since other 3847 // tasks might be pushing objects to it concurrently. 3848 assert(_cm->out_of_regions(), 3849 "at this point we should be out of regions"); 3850 3851 if (_cm->verbose_low()) { 3852 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id); 3853 } 3854 3855 // Try to reduce the number of available SATB buffers so that 3856 // remark has less work to do. 3857 drain_satb_buffers(); 3858 } 3859 3860 // Since we've done everything else, we can now totally drain the 3861 // local queue and global stack. 3862 drain_local_queue(false); 3863 drain_global_stack(false); 3864 3865 // Attempt at work stealing from other task's queues. 3866 if (do_stealing && !has_aborted()) { 3867 // We have not aborted. This means that we have finished all that 3868 // we could. Let's try to do some stealing... 3869 3870 // We cannot check whether the global stack is empty, since other 3871 // tasks might be pushing objects to it concurrently. 3872 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3873 "only way to reach here"); 3874 3875 if (_cm->verbose_low()) { 3876 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id); 3877 } 3878 3879 while (!has_aborted()) { 3880 oop obj; 3881 statsOnly( ++_steal_attempts ); 3882 3883 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3884 if (_cm->verbose_medium()) { 3885 gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully", 3886 _worker_id, p2i((void*) obj)); 3887 } 3888 3889 statsOnly( ++_steals ); 3890 3891 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3892 "any stolen object should be marked"); 3893 scan_object(obj); 3894 3895 // And since we're towards the end, let's totally drain the 3896 // local queue and global stack. 3897 drain_local_queue(false); 3898 drain_global_stack(false); 3899 } else { 3900 break; 3901 } 3902 } 3903 } 3904 3905 // If we are about to wrap up and go into termination, check if we 3906 // should raise the overflow flag. 3907 if (do_termination && !has_aborted()) { 3908 if (_cm->force_overflow()->should_force()) { 3909 _cm->set_has_overflown(); 3910 regular_clock_call(); 3911 } 3912 } 3913 3914 // We still haven't aborted. Now, let's try to get into the 3915 // termination protocol. 3916 if (do_termination && !has_aborted()) { 3917 // We cannot check whether the global stack is empty, since other 3918 // tasks might be concurrently pushing objects on it. 3919 // Separated the asserts so that we know which one fires. 3920 assert(_cm->out_of_regions(), "only way to reach here"); 3921 assert(_task_queue->size() == 0, "only way to reach here"); 3922 3923 if (_cm->verbose_low()) { 3924 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id); 3925 } 3926 3927 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3928 3929 // The CMTask class also extends the TerminatorTerminator class, 3930 // hence its should_exit_termination() method will also decide 3931 // whether to exit the termination protocol or not. 3932 bool finished = (is_serial || 3933 _cm->terminator()->offer_termination(this)); 3934 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3935 _termination_time_ms += 3936 termination_end_time_ms - _termination_start_time_ms; 3937 3938 if (finished) { 3939 // We're all done. 3940 3941 if (_worker_id == 0) { 3942 // let's allow task 0 to do this 3943 if (concurrent()) { 3944 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3945 // we need to set this to false before the next 3946 // safepoint. This way we ensure that the marking phase 3947 // doesn't observe any more heap expansions. 3948 _cm->clear_concurrent_marking_in_progress(); 3949 } 3950 } 3951 3952 // We can now guarantee that the global stack is empty, since 3953 // all other tasks have finished. We separated the guarantees so 3954 // that, if a condition is false, we can immediately find out 3955 // which one. 3956 guarantee(_cm->out_of_regions(), "only way to reach here"); 3957 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3958 guarantee(_task_queue->size() == 0, "only way to reach here"); 3959 guarantee(!_cm->has_overflown(), "only way to reach here"); 3960 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3961 3962 if (_cm->verbose_low()) { 3963 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id); 3964 } 3965 } else { 3966 // Apparently there's more work to do. Let's abort this task. It 3967 // will restart it and we can hopefully find more things to do. 3968 3969 if (_cm->verbose_low()) { 3970 gclog_or_tty->print_cr("[%u] apparently there is more work to do", 3971 _worker_id); 3972 } 3973 3974 set_has_aborted(); 3975 statsOnly( ++_aborted_termination ); 3976 } 3977 } 3978 3979 // Mainly for debugging purposes to make sure that a pointer to the 3980 // closure which was statically allocated in this frame doesn't 3981 // escape it by accident. 3982 set_cm_oop_closure(NULL); 3983 double end_time_ms = os::elapsedVTime() * 1000.0; 3984 double elapsed_time_ms = end_time_ms - _start_time_ms; 3985 // Update the step history. 3986 _step_times_ms.add(elapsed_time_ms); 3987 3988 if (has_aborted()) { 3989 // The task was aborted for some reason. 3990 3991 statsOnly( ++_aborted ); 3992 3993 if (_has_timed_out) { 3994 double diff_ms = elapsed_time_ms - _time_target_ms; 3995 // Keep statistics of how well we did with respect to hitting 3996 // our target only if we actually timed out (if we aborted for 3997 // other reasons, then the results might get skewed). 3998 _marking_step_diffs_ms.add(diff_ms); 3999 } 4000 4001 if (_cm->has_overflown()) { 4002 // This is the interesting one. We aborted because a global 4003 // overflow was raised. This means we have to restart the 4004 // marking phase and start iterating over regions. However, in 4005 // order to do this we have to make sure that all tasks stop 4006 // what they are doing and re-initialize in a safe manner. We 4007 // will achieve this with the use of two barrier sync points. 4008 4009 if (_cm->verbose_low()) { 4010 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id); 4011 } 4012 4013 if (!is_serial) { 4014 // We only need to enter the sync barrier if being called 4015 // from a parallel context 4016 _cm->enter_first_sync_barrier(_worker_id); 4017 4018 // When we exit this sync barrier we know that all tasks have 4019 // stopped doing marking work. So, it's now safe to 4020 // re-initialize our data structures. At the end of this method, 4021 // task 0 will clear the global data structures. 4022 } 4023 4024 statsOnly( ++_aborted_overflow ); 4025 4026 // We clear the local state of this task... 4027 clear_region_fields(); 4028 4029 if (!is_serial) { 4030 // ...and enter the second barrier. 4031 _cm->enter_second_sync_barrier(_worker_id); 4032 } 4033 // At this point, if we're during the concurrent phase of 4034 // marking, everything has been re-initialized and we're 4035 // ready to restart. 4036 } 4037 4038 if (_cm->verbose_low()) { 4039 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4040 "elapsed = %1.2lfms <<<<<<<<<<", 4041 _worker_id, _time_target_ms, elapsed_time_ms); 4042 if (_cm->has_aborted()) { 4043 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========", 4044 _worker_id); 4045 } 4046 } 4047 } else { 4048 if (_cm->verbose_low()) { 4049 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4050 "elapsed = %1.2lfms <<<<<<<<<<", 4051 _worker_id, _time_target_ms, elapsed_time_ms); 4052 } 4053 } 4054 4055 _claimed = false; 4056 } 4057 4058 CMTask::CMTask(uint worker_id, 4059 ConcurrentMark* cm, 4060 size_t* marked_bytes, 4061 BitMap* card_bm, 4062 CMTaskQueue* task_queue, 4063 CMTaskQueueSet* task_queues) 4064 : _g1h(G1CollectedHeap::heap()), 4065 _worker_id(worker_id), _cm(cm), 4066 _claimed(false), 4067 _nextMarkBitMap(NULL), _hash_seed(17), 4068 _task_queue(task_queue), 4069 _task_queues(task_queues), 4070 _cm_oop_closure(NULL), 4071 _marked_bytes_array(marked_bytes), 4072 _card_bm(card_bm) { 4073 guarantee(task_queue != NULL, "invariant"); 4074 guarantee(task_queues != NULL, "invariant"); 4075 4076 statsOnly( _clock_due_to_scanning = 0; 4077 _clock_due_to_marking = 0 ); 4078 4079 _marking_step_diffs_ms.add(0.5); 4080 } 4081 4082 // These are formatting macros that are used below to ensure 4083 // consistent formatting. The *_H_* versions are used to format the 4084 // header for a particular value and they should be kept consistent 4085 // with the corresponding macro. Also note that most of the macros add 4086 // the necessary white space (as a prefix) which makes them a bit 4087 // easier to compose. 4088 4089 // All the output lines are prefixed with this string to be able to 4090 // identify them easily in a large log file. 4091 #define G1PPRL_LINE_PREFIX "###" 4092 4093 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 4094 #ifdef _LP64 4095 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4096 #else // _LP64 4097 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4098 #endif // _LP64 4099 4100 // For per-region info 4101 #define G1PPRL_TYPE_FORMAT " %-4s" 4102 #define G1PPRL_TYPE_H_FORMAT " %4s" 4103 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 4104 #define G1PPRL_BYTE_H_FORMAT " %9s" 4105 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4106 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4107 4108 // For summary info 4109 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 4110 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 4111 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 4112 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 4113 4114 G1PrintRegionLivenessInfoClosure:: 4115 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4116 : _out(out), 4117 _total_used_bytes(0), _total_capacity_bytes(0), 4118 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4119 _hum_used_bytes(0), _hum_capacity_bytes(0), 4120 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 4121 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 4122 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4123 MemRegion g1_reserved = g1h->g1_reserved(); 4124 double now = os::elapsedTime(); 4125 4126 // Print the header of the output. 4127 _out->cr(); 4128 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4129 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4130 G1PPRL_SUM_ADDR_FORMAT("reserved") 4131 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4132 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 4133 HeapRegion::GrainBytes); 4134 _out->print_cr(G1PPRL_LINE_PREFIX); 4135 _out->print_cr(G1PPRL_LINE_PREFIX 4136 G1PPRL_TYPE_H_FORMAT 4137 G1PPRL_ADDR_BASE_H_FORMAT 4138 G1PPRL_BYTE_H_FORMAT 4139 G1PPRL_BYTE_H_FORMAT 4140 G1PPRL_BYTE_H_FORMAT 4141 G1PPRL_DOUBLE_H_FORMAT 4142 G1PPRL_BYTE_H_FORMAT 4143 G1PPRL_BYTE_H_FORMAT, 4144 "type", "address-range", 4145 "used", "prev-live", "next-live", "gc-eff", 4146 "remset", "code-roots"); 4147 _out->print_cr(G1PPRL_LINE_PREFIX 4148 G1PPRL_TYPE_H_FORMAT 4149 G1PPRL_ADDR_BASE_H_FORMAT 4150 G1PPRL_BYTE_H_FORMAT 4151 G1PPRL_BYTE_H_FORMAT 4152 G1PPRL_BYTE_H_FORMAT 4153 G1PPRL_DOUBLE_H_FORMAT 4154 G1PPRL_BYTE_H_FORMAT 4155 G1PPRL_BYTE_H_FORMAT, 4156 "", "", 4157 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 4158 "(bytes)", "(bytes)"); 4159 } 4160 4161 // It takes as a parameter a reference to one of the _hum_* fields, it 4162 // deduces the corresponding value for a region in a humongous region 4163 // series (either the region size, or what's left if the _hum_* field 4164 // is < the region size), and updates the _hum_* field accordingly. 4165 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4166 size_t bytes = 0; 4167 // The > 0 check is to deal with the prev and next live bytes which 4168 // could be 0. 4169 if (*hum_bytes > 0) { 4170 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4171 *hum_bytes -= bytes; 4172 } 4173 return bytes; 4174 } 4175 4176 // It deduces the values for a region in a humongous region series 4177 // from the _hum_* fields and updates those accordingly. It assumes 4178 // that that _hum_* fields have already been set up from the "starts 4179 // humongous" region and we visit the regions in address order. 4180 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4181 size_t* capacity_bytes, 4182 size_t* prev_live_bytes, 4183 size_t* next_live_bytes) { 4184 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4185 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4186 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4187 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4188 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4189 } 4190 4191 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4192 const char* type = r->get_type_str(); 4193 HeapWord* bottom = r->bottom(); 4194 HeapWord* end = r->end(); 4195 size_t capacity_bytes = r->capacity(); 4196 size_t used_bytes = r->used(); 4197 size_t prev_live_bytes = r->live_bytes(); 4198 size_t next_live_bytes = r->next_live_bytes(); 4199 double gc_eff = r->gc_efficiency(); 4200 size_t remset_bytes = r->rem_set()->mem_size(); 4201 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 4202 4203 if (r->is_starts_humongous()) { 4204 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4205 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4206 "they should have been zeroed after the last time we used them"); 4207 // Set up the _hum_* fields. 4208 _hum_capacity_bytes = capacity_bytes; 4209 _hum_used_bytes = used_bytes; 4210 _hum_prev_live_bytes = prev_live_bytes; 4211 _hum_next_live_bytes = next_live_bytes; 4212 get_hum_bytes(&used_bytes, &capacity_bytes, 4213 &prev_live_bytes, &next_live_bytes); 4214 end = bottom + HeapRegion::GrainWords; 4215 } else if (r->is_continues_humongous()) { 4216 get_hum_bytes(&used_bytes, &capacity_bytes, 4217 &prev_live_bytes, &next_live_bytes); 4218 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4219 } 4220 4221 _total_used_bytes += used_bytes; 4222 _total_capacity_bytes += capacity_bytes; 4223 _total_prev_live_bytes += prev_live_bytes; 4224 _total_next_live_bytes += next_live_bytes; 4225 _total_remset_bytes += remset_bytes; 4226 _total_strong_code_roots_bytes += strong_code_roots_bytes; 4227 4228 // Print a line for this particular region. 4229 _out->print_cr(G1PPRL_LINE_PREFIX 4230 G1PPRL_TYPE_FORMAT 4231 G1PPRL_ADDR_BASE_FORMAT 4232 G1PPRL_BYTE_FORMAT 4233 G1PPRL_BYTE_FORMAT 4234 G1PPRL_BYTE_FORMAT 4235 G1PPRL_DOUBLE_FORMAT 4236 G1PPRL_BYTE_FORMAT 4237 G1PPRL_BYTE_FORMAT, 4238 type, p2i(bottom), p2i(end), 4239 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 4240 remset_bytes, strong_code_roots_bytes); 4241 4242 return false; 4243 } 4244 4245 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4246 // add static memory usages to remembered set sizes 4247 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 4248 // Print the footer of the output. 4249 _out->print_cr(G1PPRL_LINE_PREFIX); 4250 _out->print_cr(G1PPRL_LINE_PREFIX 4251 " SUMMARY" 4252 G1PPRL_SUM_MB_FORMAT("capacity") 4253 G1PPRL_SUM_MB_PERC_FORMAT("used") 4254 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4255 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 4256 G1PPRL_SUM_MB_FORMAT("remset") 4257 G1PPRL_SUM_MB_FORMAT("code-roots"), 4258 bytes_to_mb(_total_capacity_bytes), 4259 bytes_to_mb(_total_used_bytes), 4260 perc(_total_used_bytes, _total_capacity_bytes), 4261 bytes_to_mb(_total_prev_live_bytes), 4262 perc(_total_prev_live_bytes, _total_capacity_bytes), 4263 bytes_to_mb(_total_next_live_bytes), 4264 perc(_total_next_live_bytes, _total_capacity_bytes), 4265 bytes_to_mb(_total_remset_bytes), 4266 bytes_to_mb(_total_strong_code_roots_bytes)); 4267 _out->cr(); 4268 }