1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1RemSet.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionManager.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.inline.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 61 // Concurrent marking bit map wrapper 62 63 CMBitMapRO::CMBitMapRO(int shifter) : 64 _bm(), 65 _shifter(shifter) { 66 _bmStartWord = 0; 67 _bmWordSize = 0; 68 } 69 70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 71 const HeapWord* limit) const { 72 // First we must round addr *up* to a possible object boundary. 73 addr = (HeapWord*)align_size_up((intptr_t)addr, 74 HeapWordSize << _shifter); 75 size_t addrOffset = heapWordToOffset(addr); 76 if (limit == NULL) { 77 limit = _bmStartWord + _bmWordSize; 78 } 79 size_t limitOffset = heapWordToOffset(limit); 80 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 81 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 82 assert(nextAddr >= addr, "get_next_one postcondition"); 83 assert(nextAddr == limit || isMarked(nextAddr), 84 "get_next_one postcondition"); 85 return nextAddr; 86 } 87 88 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 89 const HeapWord* limit) const { 90 size_t addrOffset = heapWordToOffset(addr); 91 if (limit == NULL) { 92 limit = _bmStartWord + _bmWordSize; 93 } 94 size_t limitOffset = heapWordToOffset(limit); 95 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 96 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 97 assert(nextAddr >= addr, "get_next_one postcondition"); 98 assert(nextAddr == limit || !isMarked(nextAddr), 99 "get_next_one postcondition"); 100 return nextAddr; 101 } 102 103 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 104 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 105 return (int) (diff >> _shifter); 106 } 107 108 #ifndef PRODUCT 109 bool CMBitMapRO::covers(MemRegion heap_rs) const { 110 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 111 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 112 "size inconsistency"); 113 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 114 _bmWordSize == heap_rs.word_size(); 115 } 116 #endif 117 118 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 119 _bm.print_on_error(st, prefix); 120 } 121 122 size_t CMBitMap::compute_size(size_t heap_size) { 123 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 124 } 125 126 size_t CMBitMap::mark_distance() { 127 return MinObjAlignmentInBytes * BitsPerByte; 128 } 129 130 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 131 _bmStartWord = heap.start(); 132 _bmWordSize = heap.word_size(); 133 134 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 135 _bm.set_size(_bmWordSize >> _shifter); 136 137 storage->set_mapping_changed_listener(&_listener); 138 } 139 140 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 141 if (zero_filled) { 142 return; 143 } 144 // We need to clear the bitmap on commit, removing any existing information. 145 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 146 _bm->clearRange(mr); 147 } 148 149 // Closure used for clearing the given mark bitmap. 150 class ClearBitmapHRClosure : public HeapRegionClosure { 151 private: 152 ConcurrentMark* _cm; 153 CMBitMap* _bitmap; 154 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 155 public: 156 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 157 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 158 } 159 160 virtual bool doHeapRegion(HeapRegion* r) { 161 size_t const chunk_size_in_words = M / HeapWordSize; 162 163 HeapWord* cur = r->bottom(); 164 HeapWord* const end = r->end(); 165 166 while (cur < end) { 167 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 168 _bitmap->clearRange(mr); 169 170 cur += chunk_size_in_words; 171 172 // Abort iteration if after yielding the marking has been aborted. 173 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 174 return true; 175 } 176 // Repeat the asserts from before the start of the closure. We will do them 177 // as asserts here to minimize their overhead on the product. However, we 178 // will have them as guarantees at the beginning / end of the bitmap 179 // clearing to get some checking in the product. 180 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 181 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 182 } 183 184 return false; 185 } 186 }; 187 188 class ParClearNextMarkBitmapTask : public AbstractGangTask { 189 ClearBitmapHRClosure* _cl; 190 HeapRegionClaimer _hrclaimer; 191 bool _suspendible; // If the task is suspendible, workers must join the STS. 192 193 public: 194 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 195 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 196 197 void work(uint worker_id) { 198 SuspendibleThreadSetJoiner sts_join(_suspendible); 199 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 200 } 201 }; 202 203 void CMBitMap::clearAll() { 204 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 205 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 206 uint n_workers = g1h->workers()->active_workers(); 207 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 208 g1h->workers()->run_task(&task); 209 guarantee(cl.complete(), "Must have completed iteration."); 210 return; 211 } 212 213 void CMBitMap::markRange(MemRegion mr) { 214 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 215 assert(!mr.is_empty(), "unexpected empty region"); 216 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 217 ((HeapWord *) mr.end())), 218 "markRange memory region end is not card aligned"); 219 // convert address range into offset range 220 _bm.at_put_range(heapWordToOffset(mr.start()), 221 heapWordToOffset(mr.end()), true); 222 } 223 224 void CMBitMap::clearRange(MemRegion mr) { 225 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 226 assert(!mr.is_empty(), "unexpected empty region"); 227 // convert address range into offset range 228 _bm.at_put_range(heapWordToOffset(mr.start()), 229 heapWordToOffset(mr.end()), false); 230 } 231 232 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 233 HeapWord* end_addr) { 234 HeapWord* start = getNextMarkedWordAddress(addr); 235 start = MIN2(start, end_addr); 236 HeapWord* end = getNextUnmarkedWordAddress(start); 237 end = MIN2(end, end_addr); 238 assert(start <= end, "Consistency check"); 239 MemRegion mr(start, end); 240 if (!mr.is_empty()) { 241 clearRange(mr); 242 } 243 return mr; 244 } 245 246 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 247 _base(NULL), _cm(cm) 248 {} 249 250 bool CMMarkStack::allocate(size_t capacity) { 251 // allocate a stack of the requisite depth 252 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 253 if (!rs.is_reserved()) { 254 warning("ConcurrentMark MarkStack allocation failure"); 255 return false; 256 } 257 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 258 if (!_virtual_space.initialize(rs, rs.size())) { 259 warning("ConcurrentMark MarkStack backing store failure"); 260 // Release the virtual memory reserved for the marking stack 261 rs.release(); 262 return false; 263 } 264 assert(_virtual_space.committed_size() == rs.size(), 265 "Didn't reserve backing store for all of ConcurrentMark stack?"); 266 _base = (oop*) _virtual_space.low(); 267 setEmpty(); 268 _capacity = (jint) capacity; 269 _saved_index = -1; 270 _should_expand = false; 271 return true; 272 } 273 274 void CMMarkStack::expand() { 275 // Called, during remark, if we've overflown the marking stack during marking. 276 assert(isEmpty(), "stack should been emptied while handling overflow"); 277 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 278 // Clear expansion flag 279 _should_expand = false; 280 if (_capacity == (jint) MarkStackSizeMax) { 281 log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit"); 282 return; 283 } 284 // Double capacity if possible 285 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 286 // Do not give up existing stack until we have managed to 287 // get the double capacity that we desired. 288 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 289 sizeof(oop))); 290 if (rs.is_reserved()) { 291 // Release the backing store associated with old stack 292 _virtual_space.release(); 293 // Reinitialize virtual space for new stack 294 if (!_virtual_space.initialize(rs, rs.size())) { 295 fatal("Not enough swap for expanded marking stack capacity"); 296 } 297 _base = (oop*)(_virtual_space.low()); 298 _index = 0; 299 _capacity = new_capacity; 300 } else { 301 // Failed to double capacity, continue; 302 log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K", 303 _capacity / K, new_capacity / K); 304 } 305 } 306 307 void CMMarkStack::set_should_expand() { 308 // If we're resetting the marking state because of an 309 // marking stack overflow, record that we should, if 310 // possible, expand the stack. 311 _should_expand = _cm->has_overflown(); 312 } 313 314 CMMarkStack::~CMMarkStack() { 315 if (_base != NULL) { 316 _base = NULL; 317 _virtual_space.release(); 318 } 319 } 320 321 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 322 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 323 jint start = _index; 324 jint next_index = start + n; 325 if (next_index > _capacity) { 326 _overflow = true; 327 return; 328 } 329 // Otherwise. 330 _index = next_index; 331 for (int i = 0; i < n; i++) { 332 int ind = start + i; 333 assert(ind < _capacity, "By overflow test above."); 334 _base[ind] = ptr_arr[i]; 335 } 336 } 337 338 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 339 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 340 jint index = _index; 341 if (index == 0) { 342 *n = 0; 343 return false; 344 } else { 345 int k = MIN2(max, index); 346 jint new_ind = index - k; 347 for (int j = 0; j < k; j++) { 348 ptr_arr[j] = _base[new_ind + j]; 349 } 350 _index = new_ind; 351 *n = k; 352 return true; 353 } 354 } 355 356 void CMMarkStack::note_start_of_gc() { 357 assert(_saved_index == -1, 358 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 359 _saved_index = _index; 360 } 361 362 void CMMarkStack::note_end_of_gc() { 363 // This is intentionally a guarantee, instead of an assert. If we 364 // accidentally add something to the mark stack during GC, it 365 // will be a correctness issue so it's better if we crash. we'll 366 // only check this once per GC anyway, so it won't be a performance 367 // issue in any way. 368 guarantee(_saved_index == _index, 369 "saved index: %d index: %d", _saved_index, _index); 370 _saved_index = -1; 371 } 372 373 CMRootRegions::CMRootRegions() : 374 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 375 _should_abort(false), _next_survivor(NULL) { } 376 377 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 378 _young_list = g1h->young_list(); 379 _cm = cm; 380 } 381 382 void CMRootRegions::prepare_for_scan() { 383 assert(!scan_in_progress(), "pre-condition"); 384 385 // Currently, only survivors can be root regions. 386 assert(_next_survivor == NULL, "pre-condition"); 387 _next_survivor = _young_list->first_survivor_region(); 388 _scan_in_progress = (_next_survivor != NULL); 389 _should_abort = false; 390 } 391 392 HeapRegion* CMRootRegions::claim_next() { 393 if (_should_abort) { 394 // If someone has set the should_abort flag, we return NULL to 395 // force the caller to bail out of their loop. 396 return NULL; 397 } 398 399 // Currently, only survivors can be root regions. 400 HeapRegion* res = _next_survivor; 401 if (res != NULL) { 402 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 403 // Read it again in case it changed while we were waiting for the lock. 404 res = _next_survivor; 405 if (res != NULL) { 406 if (res == _young_list->last_survivor_region()) { 407 // We just claimed the last survivor so store NULL to indicate 408 // that we're done. 409 _next_survivor = NULL; 410 } else { 411 _next_survivor = res->get_next_young_region(); 412 } 413 } else { 414 // Someone else claimed the last survivor while we were trying 415 // to take the lock so nothing else to do. 416 } 417 } 418 assert(res == NULL || res->is_survivor(), "post-condition"); 419 420 return res; 421 } 422 423 void CMRootRegions::scan_finished() { 424 assert(scan_in_progress(), "pre-condition"); 425 426 // Currently, only survivors can be root regions. 427 if (!_should_abort) { 428 assert(_next_survivor == NULL, "we should have claimed all survivors"); 429 } 430 _next_survivor = NULL; 431 432 { 433 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 434 _scan_in_progress = false; 435 RootRegionScan_lock->notify_all(); 436 } 437 } 438 439 bool CMRootRegions::wait_until_scan_finished() { 440 if (!scan_in_progress()) return false; 441 442 { 443 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 444 while (scan_in_progress()) { 445 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 446 } 447 } 448 return true; 449 } 450 451 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 452 return MAX2((n_par_threads + 2) / 4, 1U); 453 } 454 455 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 456 _g1h(g1h), 457 _markBitMap1(), 458 _markBitMap2(), 459 _parallel_marking_threads(0), 460 _max_parallel_marking_threads(0), 461 _sleep_factor(0.0), 462 _marking_task_overhead(1.0), 463 _cleanup_sleep_factor(0.0), 464 _cleanup_task_overhead(1.0), 465 _cleanup_list("Cleanup List"), 466 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 467 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 468 CardTableModRefBS::card_shift, 469 false /* in_resource_area*/), 470 471 _prevMarkBitMap(&_markBitMap1), 472 _nextMarkBitMap(&_markBitMap2), 473 474 _markStack(this), 475 // _finger set in set_non_marking_state 476 477 _max_worker_id(ParallelGCThreads), 478 // _active_tasks set in set_non_marking_state 479 // _tasks set inside the constructor 480 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 481 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 482 483 _has_overflown(false), 484 _concurrent(false), 485 _has_aborted(false), 486 _restart_for_overflow(false), 487 _concurrent_marking_in_progress(false), 488 489 // _verbose_level set below 490 491 _init_times(), 492 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 493 _cleanup_times(), 494 _total_counting_time(0.0), 495 _total_rs_scrub_time(0.0), 496 497 _parallel_workers(NULL), 498 499 _count_card_bitmaps(NULL), 500 _count_marked_bytes(NULL), 501 _completed_initialization(false) { 502 503 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 504 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 505 506 // Create & start a ConcurrentMark thread. 507 _cmThread = new ConcurrentMarkThread(this); 508 assert(cmThread() != NULL, "CM Thread should have been created"); 509 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 510 if (_cmThread->osthread() == NULL) { 511 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 512 } 513 514 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 515 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 516 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 517 518 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 519 satb_qs.set_buffer_size(G1SATBBufferSize); 520 521 _root_regions.init(_g1h, this); 522 523 if (ConcGCThreads > ParallelGCThreads) { 524 warning("Can't have more ConcGCThreads (%u) " 525 "than ParallelGCThreads (%u).", 526 ConcGCThreads, ParallelGCThreads); 527 return; 528 } 529 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 530 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 531 // if both are set 532 _sleep_factor = 0.0; 533 _marking_task_overhead = 1.0; 534 } else if (G1MarkingOverheadPercent > 0) { 535 // We will calculate the number of parallel marking threads based 536 // on a target overhead with respect to the soft real-time goal 537 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 538 double overall_cm_overhead = 539 (double) MaxGCPauseMillis * marking_overhead / 540 (double) GCPauseIntervalMillis; 541 double cpu_ratio = 1.0 / (double) os::processor_count(); 542 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 543 double marking_task_overhead = 544 overall_cm_overhead / marking_thread_num * 545 (double) os::processor_count(); 546 double sleep_factor = 547 (1.0 - marking_task_overhead) / marking_task_overhead; 548 549 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 550 _sleep_factor = sleep_factor; 551 _marking_task_overhead = marking_task_overhead; 552 } else { 553 // Calculate the number of parallel marking threads by scaling 554 // the number of parallel GC threads. 555 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 556 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 557 _sleep_factor = 0.0; 558 _marking_task_overhead = 1.0; 559 } 560 561 assert(ConcGCThreads > 0, "Should have been set"); 562 _parallel_marking_threads = ConcGCThreads; 563 _max_parallel_marking_threads = _parallel_marking_threads; 564 565 if (parallel_marking_threads() > 1) { 566 _cleanup_task_overhead = 1.0; 567 } else { 568 _cleanup_task_overhead = marking_task_overhead(); 569 } 570 _cleanup_sleep_factor = 571 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 572 573 _parallel_workers = new WorkGang("G1 Marker", 574 _max_parallel_marking_threads, false, true); 575 if (_parallel_workers == NULL) { 576 vm_exit_during_initialization("Failed necessary allocation."); 577 } else { 578 _parallel_workers->initialize_workers(); 579 } 580 581 if (FLAG_IS_DEFAULT(MarkStackSize)) { 582 size_t mark_stack_size = 583 MIN2(MarkStackSizeMax, 584 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 585 // Verify that the calculated value for MarkStackSize is in range. 586 // It would be nice to use the private utility routine from Arguments. 587 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 588 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 589 "must be between 1 and " SIZE_FORMAT, 590 mark_stack_size, MarkStackSizeMax); 591 return; 592 } 593 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 594 } else { 595 // Verify MarkStackSize is in range. 596 if (FLAG_IS_CMDLINE(MarkStackSize)) { 597 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 598 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 599 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 600 "must be between 1 and " SIZE_FORMAT, 601 MarkStackSize, MarkStackSizeMax); 602 return; 603 } 604 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 605 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 606 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 607 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 608 MarkStackSize, MarkStackSizeMax); 609 return; 610 } 611 } 612 } 613 } 614 615 if (!_markStack.allocate(MarkStackSize)) { 616 warning("Failed to allocate CM marking stack"); 617 return; 618 } 619 620 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 621 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 622 623 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 624 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 625 626 BitMap::idx_t card_bm_size = _card_bm.size(); 627 628 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 629 _active_tasks = _max_worker_id; 630 631 uint max_regions = _g1h->max_regions(); 632 for (uint i = 0; i < _max_worker_id; ++i) { 633 CMTaskQueue* task_queue = new CMTaskQueue(); 634 task_queue->initialize(); 635 _task_queues->register_queue(i, task_queue); 636 637 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 638 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 639 640 _tasks[i] = new CMTask(i, this, 641 _count_marked_bytes[i], 642 &_count_card_bitmaps[i], 643 task_queue, _task_queues); 644 645 _accum_task_vtime[i] = 0.0; 646 } 647 648 // Calculate the card number for the bottom of the heap. Used 649 // in biasing indexes into the accounting card bitmaps. 650 _heap_bottom_card_num = 651 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 652 CardTableModRefBS::card_shift); 653 654 // Clear all the liveness counting data 655 clear_all_count_data(); 656 657 // so that the call below can read a sensible value 658 _heap_start = g1h->reserved_region().start(); 659 set_non_marking_state(); 660 _completed_initialization = true; 661 } 662 663 void ConcurrentMark::reset() { 664 // Starting values for these two. This should be called in a STW 665 // phase. 666 MemRegion reserved = _g1h->g1_reserved(); 667 _heap_start = reserved.start(); 668 _heap_end = reserved.end(); 669 670 // Separated the asserts so that we know which one fires. 671 assert(_heap_start != NULL, "heap bounds should look ok"); 672 assert(_heap_end != NULL, "heap bounds should look ok"); 673 assert(_heap_start < _heap_end, "heap bounds should look ok"); 674 675 // Reset all the marking data structures and any necessary flags 676 reset_marking_state(); 677 678 // We do reset all of them, since different phases will use 679 // different number of active threads. So, it's easiest to have all 680 // of them ready. 681 for (uint i = 0; i < _max_worker_id; ++i) { 682 _tasks[i]->reset(_nextMarkBitMap); 683 } 684 685 // we need this to make sure that the flag is on during the evac 686 // pause with initial mark piggy-backed 687 set_concurrent_marking_in_progress(); 688 } 689 690 691 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 692 _markStack.set_should_expand(); 693 _markStack.setEmpty(); // Also clears the _markStack overflow flag 694 if (clear_overflow) { 695 clear_has_overflown(); 696 } else { 697 assert(has_overflown(), "pre-condition"); 698 } 699 _finger = _heap_start; 700 701 for (uint i = 0; i < _max_worker_id; ++i) { 702 CMTaskQueue* queue = _task_queues->queue(i); 703 queue->set_empty(); 704 } 705 } 706 707 void ConcurrentMark::set_concurrency(uint active_tasks) { 708 assert(active_tasks <= _max_worker_id, "we should not have more"); 709 710 _active_tasks = active_tasks; 711 // Need to update the three data structures below according to the 712 // number of active threads for this phase. 713 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 714 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 715 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 716 } 717 718 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 719 set_concurrency(active_tasks); 720 721 _concurrent = concurrent; 722 // We propagate this to all tasks, not just the active ones. 723 for (uint i = 0; i < _max_worker_id; ++i) 724 _tasks[i]->set_concurrent(concurrent); 725 726 if (concurrent) { 727 set_concurrent_marking_in_progress(); 728 } else { 729 // We currently assume that the concurrent flag has been set to 730 // false before we start remark. At this point we should also be 731 // in a STW phase. 732 assert(!concurrent_marking_in_progress(), "invariant"); 733 assert(out_of_regions(), 734 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 735 p2i(_finger), p2i(_heap_end)); 736 } 737 } 738 739 void ConcurrentMark::set_non_marking_state() { 740 // We set the global marking state to some default values when we're 741 // not doing marking. 742 reset_marking_state(); 743 _active_tasks = 0; 744 clear_concurrent_marking_in_progress(); 745 } 746 747 ConcurrentMark::~ConcurrentMark() { 748 // The ConcurrentMark instance is never freed. 749 ShouldNotReachHere(); 750 } 751 752 void ConcurrentMark::clearNextBitmap() { 753 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 754 755 // Make sure that the concurrent mark thread looks to still be in 756 // the current cycle. 757 guarantee(cmThread()->during_cycle(), "invariant"); 758 759 // We are finishing up the current cycle by clearing the next 760 // marking bitmap and getting it ready for the next cycle. During 761 // this time no other cycle can start. So, let's make sure that this 762 // is the case. 763 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 764 765 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 766 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 767 _parallel_workers->run_task(&task); 768 769 // Clear the liveness counting data. If the marking has been aborted, the abort() 770 // call already did that. 771 if (cl.complete()) { 772 clear_all_count_data(); 773 } 774 775 // Repeat the asserts from above. 776 guarantee(cmThread()->during_cycle(), "invariant"); 777 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 778 } 779 780 class CheckBitmapClearHRClosure : public HeapRegionClosure { 781 CMBitMap* _bitmap; 782 bool _error; 783 public: 784 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 785 } 786 787 virtual bool doHeapRegion(HeapRegion* r) { 788 // This closure can be called concurrently to the mutator, so we must make sure 789 // that the result of the getNextMarkedWordAddress() call is compared to the 790 // value passed to it as limit to detect any found bits. 791 // end never changes in G1. 792 HeapWord* end = r->end(); 793 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 794 } 795 }; 796 797 bool ConcurrentMark::nextMarkBitmapIsClear() { 798 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 799 _g1h->heap_region_iterate(&cl); 800 return cl.complete(); 801 } 802 803 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 804 public: 805 bool doHeapRegion(HeapRegion* r) { 806 r->note_start_of_marking(); 807 return false; 808 } 809 }; 810 811 void ConcurrentMark::checkpointRootsInitialPre() { 812 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 813 G1CollectorPolicy* g1p = g1h->g1_policy(); 814 815 _has_aborted = false; 816 817 // Initialize marking structures. This has to be done in a STW phase. 818 reset(); 819 820 // For each region note start of marking. 821 NoteStartOfMarkHRClosure startcl; 822 g1h->heap_region_iterate(&startcl); 823 } 824 825 826 void ConcurrentMark::checkpointRootsInitialPost() { 827 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 828 829 // If we force an overflow during remark, the remark operation will 830 // actually abort and we'll restart concurrent marking. If we always 831 // force an overflow during remark we'll never actually complete the 832 // marking phase. So, we initialize this here, at the start of the 833 // cycle, so that at the remaining overflow number will decrease at 834 // every remark and we'll eventually not need to cause one. 835 force_overflow_stw()->init(); 836 837 // Start Concurrent Marking weak-reference discovery. 838 ReferenceProcessor* rp = g1h->ref_processor_cm(); 839 // enable ("weak") refs discovery 840 rp->enable_discovery(); 841 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 842 843 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 844 // This is the start of the marking cycle, we're expected all 845 // threads to have SATB queues with active set to false. 846 satb_mq_set.set_active_all_threads(true, /* new active value */ 847 false /* expected_active */); 848 849 _root_regions.prepare_for_scan(); 850 851 // update_g1_committed() will be called at the end of an evac pause 852 // when marking is on. So, it's also called at the end of the 853 // initial-mark pause to update the heap end, if the heap expands 854 // during it. No need to call it here. 855 } 856 857 /* 858 * Notice that in the next two methods, we actually leave the STS 859 * during the barrier sync and join it immediately afterwards. If we 860 * do not do this, the following deadlock can occur: one thread could 861 * be in the barrier sync code, waiting for the other thread to also 862 * sync up, whereas another one could be trying to yield, while also 863 * waiting for the other threads to sync up too. 864 * 865 * Note, however, that this code is also used during remark and in 866 * this case we should not attempt to leave / enter the STS, otherwise 867 * we'll either hit an assert (debug / fastdebug) or deadlock 868 * (product). So we should only leave / enter the STS if we are 869 * operating concurrently. 870 * 871 * Because the thread that does the sync barrier has left the STS, it 872 * is possible to be suspended for a Full GC or an evacuation pause 873 * could occur. This is actually safe, since the entering the sync 874 * barrier is one of the last things do_marking_step() does, and it 875 * doesn't manipulate any data structures afterwards. 876 */ 877 878 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 879 bool barrier_aborted; 880 { 881 SuspendibleThreadSetLeaver sts_leave(concurrent()); 882 barrier_aborted = !_first_overflow_barrier_sync.enter(); 883 } 884 885 // at this point everyone should have synced up and not be doing any 886 // more work 887 888 if (barrier_aborted) { 889 // If the barrier aborted we ignore the overflow condition and 890 // just abort the whole marking phase as quickly as possible. 891 return; 892 } 893 894 // If we're executing the concurrent phase of marking, reset the marking 895 // state; otherwise the marking state is reset after reference processing, 896 // during the remark pause. 897 // If we reset here as a result of an overflow during the remark we will 898 // see assertion failures from any subsequent set_concurrency_and_phase() 899 // calls. 900 if (concurrent()) { 901 // let the task associated with with worker 0 do this 902 if (worker_id == 0) { 903 // task 0 is responsible for clearing the global data structures 904 // We should be here because of an overflow. During STW we should 905 // not clear the overflow flag since we rely on it being true when 906 // we exit this method to abort the pause and restart concurrent 907 // marking. 908 reset_marking_state(true /* clear_overflow */); 909 force_overflow()->update(); 910 911 log_info(gc)("Concurrent Mark reset for overflow"); 912 } 913 } 914 915 // after this, each task should reset its own data structures then 916 // then go into the second barrier 917 } 918 919 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 920 SuspendibleThreadSetLeaver sts_leave(concurrent()); 921 _second_overflow_barrier_sync.enter(); 922 923 // at this point everything should be re-initialized and ready to go 924 } 925 926 #ifndef PRODUCT 927 void ForceOverflowSettings::init() { 928 _num_remaining = G1ConcMarkForceOverflow; 929 _force = false; 930 update(); 931 } 932 933 void ForceOverflowSettings::update() { 934 if (_num_remaining > 0) { 935 _num_remaining -= 1; 936 _force = true; 937 } else { 938 _force = false; 939 } 940 } 941 942 bool ForceOverflowSettings::should_force() { 943 if (_force) { 944 _force = false; 945 return true; 946 } else { 947 return false; 948 } 949 } 950 #endif // !PRODUCT 951 952 class CMConcurrentMarkingTask: public AbstractGangTask { 953 private: 954 ConcurrentMark* _cm; 955 ConcurrentMarkThread* _cmt; 956 957 public: 958 void work(uint worker_id) { 959 assert(Thread::current()->is_ConcurrentGC_thread(), 960 "this should only be done by a conc GC thread"); 961 ResourceMark rm; 962 963 double start_vtime = os::elapsedVTime(); 964 965 { 966 SuspendibleThreadSetJoiner sts_join; 967 968 assert(worker_id < _cm->active_tasks(), "invariant"); 969 CMTask* the_task = _cm->task(worker_id); 970 the_task->record_start_time(); 971 if (!_cm->has_aborted()) { 972 do { 973 double start_vtime_sec = os::elapsedVTime(); 974 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 975 976 the_task->do_marking_step(mark_step_duration_ms, 977 true /* do_termination */, 978 false /* is_serial*/); 979 980 double end_vtime_sec = os::elapsedVTime(); 981 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 982 _cm->clear_has_overflown(); 983 984 _cm->do_yield_check(worker_id); 985 986 jlong sleep_time_ms; 987 if (!_cm->has_aborted() && the_task->has_aborted()) { 988 sleep_time_ms = 989 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 990 { 991 SuspendibleThreadSetLeaver sts_leave; 992 os::sleep(Thread::current(), sleep_time_ms, false); 993 } 994 } 995 } while (!_cm->has_aborted() && the_task->has_aborted()); 996 } 997 the_task->record_end_time(); 998 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 999 } 1000 1001 double end_vtime = os::elapsedVTime(); 1002 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1003 } 1004 1005 CMConcurrentMarkingTask(ConcurrentMark* cm, 1006 ConcurrentMarkThread* cmt) : 1007 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1008 1009 ~CMConcurrentMarkingTask() { } 1010 }; 1011 1012 // Calculates the number of active workers for a concurrent 1013 // phase. 1014 uint ConcurrentMark::calc_parallel_marking_threads() { 1015 uint n_conc_workers = 0; 1016 if (!UseDynamicNumberOfGCThreads || 1017 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1018 !ForceDynamicNumberOfGCThreads)) { 1019 n_conc_workers = max_parallel_marking_threads(); 1020 } else { 1021 n_conc_workers = 1022 AdaptiveSizePolicy::calc_default_active_workers( 1023 max_parallel_marking_threads(), 1024 1, /* Minimum workers */ 1025 parallel_marking_threads(), 1026 Threads::number_of_non_daemon_threads()); 1027 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1028 // that scaling has already gone into "_max_parallel_marking_threads". 1029 } 1030 assert(n_conc_workers > 0, "Always need at least 1"); 1031 return n_conc_workers; 1032 } 1033 1034 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1035 // Currently, only survivors can be root regions. 1036 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1037 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1038 1039 const uintx interval = PrefetchScanIntervalInBytes; 1040 HeapWord* curr = hr->bottom(); 1041 const HeapWord* end = hr->top(); 1042 while (curr < end) { 1043 Prefetch::read(curr, interval); 1044 oop obj = oop(curr); 1045 int size = obj->oop_iterate_size(&cl); 1046 assert(size == obj->size(), "sanity"); 1047 curr += size; 1048 } 1049 } 1050 1051 class CMRootRegionScanTask : public AbstractGangTask { 1052 private: 1053 ConcurrentMark* _cm; 1054 1055 public: 1056 CMRootRegionScanTask(ConcurrentMark* cm) : 1057 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1058 1059 void work(uint worker_id) { 1060 assert(Thread::current()->is_ConcurrentGC_thread(), 1061 "this should only be done by a conc GC thread"); 1062 1063 CMRootRegions* root_regions = _cm->root_regions(); 1064 HeapRegion* hr = root_regions->claim_next(); 1065 while (hr != NULL) { 1066 _cm->scanRootRegion(hr, worker_id); 1067 hr = root_regions->claim_next(); 1068 } 1069 } 1070 }; 1071 1072 void ConcurrentMark::scanRootRegions() { 1073 // Start of concurrent marking. 1074 ClassLoaderDataGraph::clear_claimed_marks(); 1075 1076 // scan_in_progress() will have been set to true only if there was 1077 // at least one root region to scan. So, if it's false, we 1078 // should not attempt to do any further work. 1079 if (root_regions()->scan_in_progress()) { 1080 GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan"); 1081 1082 _parallel_marking_threads = calc_parallel_marking_threads(); 1083 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1084 "Maximum number of marking threads exceeded"); 1085 uint active_workers = MAX2(1U, parallel_marking_threads()); 1086 1087 CMRootRegionScanTask task(this); 1088 _parallel_workers->set_active_workers(active_workers); 1089 _parallel_workers->run_task(&task); 1090 1091 // It's possible that has_aborted() is true here without actually 1092 // aborting the survivor scan earlier. This is OK as it's 1093 // mainly used for sanity checking. 1094 root_regions()->scan_finished(); 1095 } 1096 } 1097 1098 void ConcurrentMark::markFromRoots() { 1099 // we might be tempted to assert that: 1100 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1101 // "inconsistent argument?"); 1102 // However that wouldn't be right, because it's possible that 1103 // a safepoint is indeed in progress as a younger generation 1104 // stop-the-world GC happens even as we mark in this generation. 1105 1106 _restart_for_overflow = false; 1107 force_overflow_conc()->init(); 1108 1109 // _g1h has _n_par_threads 1110 _parallel_marking_threads = calc_parallel_marking_threads(); 1111 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1112 "Maximum number of marking threads exceeded"); 1113 1114 uint active_workers = MAX2(1U, parallel_marking_threads()); 1115 assert(active_workers > 0, "Should have been set"); 1116 1117 // Parallel task terminator is set in "set_concurrency_and_phase()" 1118 set_concurrency_and_phase(active_workers, true /* concurrent */); 1119 1120 CMConcurrentMarkingTask markingTask(this, cmThread()); 1121 _parallel_workers->set_active_workers(active_workers); 1122 _parallel_workers->run_task(&markingTask); 1123 print_stats(); 1124 } 1125 1126 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1127 // world is stopped at this checkpoint 1128 assert(SafepointSynchronize::is_at_safepoint(), 1129 "world should be stopped"); 1130 1131 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1132 1133 // If a full collection has happened, we shouldn't do this. 1134 if (has_aborted()) { 1135 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1136 return; 1137 } 1138 1139 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1140 1141 if (VerifyDuringGC) { 1142 HandleMark hm; // handle scope 1143 g1h->prepare_for_verify(); 1144 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1145 } 1146 g1h->check_bitmaps("Remark Start"); 1147 1148 G1CollectorPolicy* g1p = g1h->g1_policy(); 1149 g1p->record_concurrent_mark_remark_start(); 1150 1151 double start = os::elapsedTime(); 1152 1153 checkpointRootsFinalWork(); 1154 1155 double mark_work_end = os::elapsedTime(); 1156 1157 weakRefsWork(clear_all_soft_refs); 1158 1159 if (has_overflown()) { 1160 // Oops. We overflowed. Restart concurrent marking. 1161 _restart_for_overflow = true; 1162 log_develop(gc)("Remark led to restart for overflow."); 1163 1164 // Verify the heap w.r.t. the previous marking bitmap. 1165 if (VerifyDuringGC) { 1166 HandleMark hm; // handle scope 1167 g1h->prepare_for_verify(); 1168 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1169 } 1170 1171 // Clear the marking state because we will be restarting 1172 // marking due to overflowing the global mark stack. 1173 reset_marking_state(); 1174 } else { 1175 { 1176 GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm()); 1177 1178 // Aggregate the per-task counting data that we have accumulated 1179 // while marking. 1180 aggregate_count_data(); 1181 } 1182 1183 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1184 // We're done with marking. 1185 // This is the end of the marking cycle, we're expected all 1186 // threads to have SATB queues with active set to true. 1187 satb_mq_set.set_active_all_threads(false, /* new active value */ 1188 true /* expected_active */); 1189 1190 if (VerifyDuringGC) { 1191 HandleMark hm; // handle scope 1192 g1h->prepare_for_verify(); 1193 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1194 } 1195 g1h->check_bitmaps("Remark End"); 1196 assert(!restart_for_overflow(), "sanity"); 1197 // Completely reset the marking state since marking completed 1198 set_non_marking_state(); 1199 } 1200 1201 // Expand the marking stack, if we have to and if we can. 1202 if (_markStack.should_expand()) { 1203 _markStack.expand(); 1204 } 1205 1206 // Statistics 1207 double now = os::elapsedTime(); 1208 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1209 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1210 _remark_times.add((now - start) * 1000.0); 1211 1212 g1p->record_concurrent_mark_remark_end(); 1213 1214 G1CMIsAliveClosure is_alive(g1h); 1215 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1216 } 1217 1218 // Base class of the closures that finalize and verify the 1219 // liveness counting data. 1220 class CMCountDataClosureBase: public HeapRegionClosure { 1221 protected: 1222 G1CollectedHeap* _g1h; 1223 ConcurrentMark* _cm; 1224 CardTableModRefBS* _ct_bs; 1225 1226 BitMap* _region_bm; 1227 BitMap* _card_bm; 1228 1229 // Takes a region that's not empty (i.e., it has at least one 1230 // live object in it and sets its corresponding bit on the region 1231 // bitmap to 1. 1232 void set_bit_for_region(HeapRegion* hr) { 1233 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1234 _region_bm->par_at_put(index, true); 1235 } 1236 1237 public: 1238 CMCountDataClosureBase(G1CollectedHeap* g1h, 1239 BitMap* region_bm, BitMap* card_bm): 1240 _g1h(g1h), _cm(g1h->concurrent_mark()), 1241 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1242 _region_bm(region_bm), _card_bm(card_bm) { } 1243 }; 1244 1245 // Closure that calculates the # live objects per region. Used 1246 // for verification purposes during the cleanup pause. 1247 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1248 CMBitMapRO* _bm; 1249 size_t _region_marked_bytes; 1250 1251 public: 1252 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1253 BitMap* region_bm, BitMap* card_bm) : 1254 CMCountDataClosureBase(g1h, region_bm, card_bm), 1255 _bm(bm), _region_marked_bytes(0) { } 1256 1257 bool doHeapRegion(HeapRegion* hr) { 1258 HeapWord* ntams = hr->next_top_at_mark_start(); 1259 HeapWord* start = hr->bottom(); 1260 1261 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1262 "Preconditions not met - " 1263 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1264 p2i(start), p2i(ntams), p2i(hr->end())); 1265 1266 // Find the first marked object at or after "start". 1267 start = _bm->getNextMarkedWordAddress(start, ntams); 1268 1269 size_t marked_bytes = 0; 1270 1271 while (start < ntams) { 1272 oop obj = oop(start); 1273 int obj_sz = obj->size(); 1274 HeapWord* obj_end = start + obj_sz; 1275 1276 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1277 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1278 1279 // Note: if we're looking at the last region in heap - obj_end 1280 // could be actually just beyond the end of the heap; end_idx 1281 // will then correspond to a (non-existent) card that is also 1282 // just beyond the heap. 1283 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1284 // end of object is not card aligned - increment to cover 1285 // all the cards spanned by the object 1286 end_idx += 1; 1287 } 1288 1289 // Set the bits in the card BM for the cards spanned by this object. 1290 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1291 1292 // Add the size of this object to the number of marked bytes. 1293 marked_bytes += (size_t)obj_sz * HeapWordSize; 1294 1295 // This will happen if we are handling a humongous object that spans 1296 // several heap regions. 1297 if (obj_end > hr->end()) { 1298 break; 1299 } 1300 // Find the next marked object after this one. 1301 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1302 } 1303 1304 // Mark the allocated-since-marking portion... 1305 HeapWord* top = hr->top(); 1306 if (ntams < top) { 1307 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1308 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1309 1310 // Note: if we're looking at the last region in heap - top 1311 // could be actually just beyond the end of the heap; end_idx 1312 // will then correspond to a (non-existent) card that is also 1313 // just beyond the heap. 1314 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1315 // end of object is not card aligned - increment to cover 1316 // all the cards spanned by the object 1317 end_idx += 1; 1318 } 1319 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1320 1321 // This definitely means the region has live objects. 1322 set_bit_for_region(hr); 1323 } 1324 1325 // Update the live region bitmap. 1326 if (marked_bytes > 0) { 1327 set_bit_for_region(hr); 1328 } 1329 1330 // Set the marked bytes for the current region so that 1331 // it can be queried by a calling verification routine 1332 _region_marked_bytes = marked_bytes; 1333 1334 return false; 1335 } 1336 1337 size_t region_marked_bytes() const { return _region_marked_bytes; } 1338 }; 1339 1340 // Heap region closure used for verifying the counting data 1341 // that was accumulated concurrently and aggregated during 1342 // the remark pause. This closure is applied to the heap 1343 // regions during the STW cleanup pause. 1344 1345 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1346 G1CollectedHeap* _g1h; 1347 ConcurrentMark* _cm; 1348 CalcLiveObjectsClosure _calc_cl; 1349 BitMap* _region_bm; // Region BM to be verified 1350 BitMap* _card_bm; // Card BM to be verified 1351 1352 BitMap* _exp_region_bm; // Expected Region BM values 1353 BitMap* _exp_card_bm; // Expected card BM values 1354 1355 int _failures; 1356 1357 public: 1358 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1359 BitMap* region_bm, 1360 BitMap* card_bm, 1361 BitMap* exp_region_bm, 1362 BitMap* exp_card_bm) : 1363 _g1h(g1h), _cm(g1h->concurrent_mark()), 1364 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1365 _region_bm(region_bm), _card_bm(card_bm), 1366 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1367 _failures(0) { } 1368 1369 int failures() const { return _failures; } 1370 1371 bool doHeapRegion(HeapRegion* hr) { 1372 int failures = 0; 1373 1374 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1375 // this region and set the corresponding bits in the expected region 1376 // and card bitmaps. 1377 bool res = _calc_cl.doHeapRegion(hr); 1378 assert(res == false, "should be continuing"); 1379 1380 // Verify the marked bytes for this region. 1381 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1382 size_t act_marked_bytes = hr->next_marked_bytes(); 1383 1384 if (exp_marked_bytes > act_marked_bytes) { 1385 if (hr->is_starts_humongous()) { 1386 // For start_humongous regions, the size of the whole object will be 1387 // in exp_marked_bytes. 1388 HeapRegion* region = hr; 1389 int num_regions; 1390 for (num_regions = 0; region != NULL; num_regions++) { 1391 region = _g1h->next_region_in_humongous(region); 1392 } 1393 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1394 failures += 1; 1395 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1396 failures += 1; 1397 } 1398 } else { 1399 // We're not OK if expected marked bytes > actual marked bytes. It means 1400 // we have missed accounting some objects during the actual marking. 1401 failures += 1; 1402 } 1403 } 1404 1405 // Verify the bit, for this region, in the actual and expected 1406 // (which was just calculated) region bit maps. 1407 // We're not OK if the bit in the calculated expected region 1408 // bitmap is set and the bit in the actual region bitmap is not. 1409 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1410 1411 bool expected = _exp_region_bm->at(index); 1412 bool actual = _region_bm->at(index); 1413 if (expected && !actual) { 1414 failures += 1; 1415 } 1416 1417 // Verify that the card bit maps for the cards spanned by the current 1418 // region match. We have an error if we have a set bit in the expected 1419 // bit map and the corresponding bit in the actual bitmap is not set. 1420 1421 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1422 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1423 1424 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1425 expected = _exp_card_bm->at(i); 1426 actual = _card_bm->at(i); 1427 1428 if (expected && !actual) { 1429 failures += 1; 1430 } 1431 } 1432 1433 _failures += failures; 1434 1435 // We could stop iteration over the heap when we 1436 // find the first violating region by returning true. 1437 return false; 1438 } 1439 }; 1440 1441 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1442 protected: 1443 G1CollectedHeap* _g1h; 1444 ConcurrentMark* _cm; 1445 BitMap* _actual_region_bm; 1446 BitMap* _actual_card_bm; 1447 1448 uint _n_workers; 1449 1450 BitMap* _expected_region_bm; 1451 BitMap* _expected_card_bm; 1452 1453 int _failures; 1454 1455 HeapRegionClaimer _hrclaimer; 1456 1457 public: 1458 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1459 BitMap* region_bm, BitMap* card_bm, 1460 BitMap* expected_region_bm, BitMap* expected_card_bm) 1461 : AbstractGangTask("G1 verify final counting"), 1462 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1463 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1464 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1465 _failures(0), 1466 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1467 assert(VerifyDuringGC, "don't call this otherwise"); 1468 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1469 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1470 } 1471 1472 void work(uint worker_id) { 1473 assert(worker_id < _n_workers, "invariant"); 1474 1475 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1476 _actual_region_bm, _actual_card_bm, 1477 _expected_region_bm, 1478 _expected_card_bm); 1479 1480 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1481 1482 Atomic::add(verify_cl.failures(), &_failures); 1483 } 1484 1485 int failures() const { return _failures; } 1486 }; 1487 1488 // Closure that finalizes the liveness counting data. 1489 // Used during the cleanup pause. 1490 // Sets the bits corresponding to the interval [NTAMS, top] 1491 // (which contains the implicitly live objects) in the 1492 // card liveness bitmap. Also sets the bit for each region, 1493 // containing live data, in the region liveness bitmap. 1494 1495 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1496 public: 1497 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1498 BitMap* region_bm, 1499 BitMap* card_bm) : 1500 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1501 1502 bool doHeapRegion(HeapRegion* hr) { 1503 HeapWord* ntams = hr->next_top_at_mark_start(); 1504 HeapWord* top = hr->top(); 1505 1506 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1507 1508 // Mark the allocated-since-marking portion... 1509 if (ntams < top) { 1510 // This definitely means the region has live objects. 1511 set_bit_for_region(hr); 1512 1513 // Now set the bits in the card bitmap for [ntams, top) 1514 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1515 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1516 1517 // Note: if we're looking at the last region in heap - top 1518 // could be actually just beyond the end of the heap; end_idx 1519 // will then correspond to a (non-existent) card that is also 1520 // just beyond the heap. 1521 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1522 // end of object is not card aligned - increment to cover 1523 // all the cards spanned by the object 1524 end_idx += 1; 1525 } 1526 1527 assert(end_idx <= _card_bm->size(), 1528 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1529 end_idx, _card_bm->size()); 1530 assert(start_idx < _card_bm->size(), 1531 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1532 start_idx, _card_bm->size()); 1533 1534 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1535 } 1536 1537 // Set the bit for the region if it contains live data 1538 if (hr->next_marked_bytes() > 0) { 1539 set_bit_for_region(hr); 1540 } 1541 1542 return false; 1543 } 1544 }; 1545 1546 class G1ParFinalCountTask: public AbstractGangTask { 1547 protected: 1548 G1CollectedHeap* _g1h; 1549 ConcurrentMark* _cm; 1550 BitMap* _actual_region_bm; 1551 BitMap* _actual_card_bm; 1552 1553 uint _n_workers; 1554 HeapRegionClaimer _hrclaimer; 1555 1556 public: 1557 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1558 : AbstractGangTask("G1 final counting"), 1559 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1560 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1561 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1562 } 1563 1564 void work(uint worker_id) { 1565 assert(worker_id < _n_workers, "invariant"); 1566 1567 FinalCountDataUpdateClosure final_update_cl(_g1h, 1568 _actual_region_bm, 1569 _actual_card_bm); 1570 1571 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1572 } 1573 }; 1574 1575 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1576 G1CollectedHeap* _g1; 1577 size_t _freed_bytes; 1578 FreeRegionList* _local_cleanup_list; 1579 HeapRegionSetCount _old_regions_removed; 1580 HeapRegionSetCount _humongous_regions_removed; 1581 HRRSCleanupTask* _hrrs_cleanup_task; 1582 1583 public: 1584 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1585 FreeRegionList* local_cleanup_list, 1586 HRRSCleanupTask* hrrs_cleanup_task) : 1587 _g1(g1), 1588 _freed_bytes(0), 1589 _local_cleanup_list(local_cleanup_list), 1590 _old_regions_removed(), 1591 _humongous_regions_removed(), 1592 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1593 1594 size_t freed_bytes() { return _freed_bytes; } 1595 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1596 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1597 1598 bool doHeapRegion(HeapRegion *hr) { 1599 if (hr->is_archive()) { 1600 return false; 1601 } 1602 // We use a claim value of zero here because all regions 1603 // were claimed with value 1 in the FinalCount task. 1604 _g1->reset_gc_time_stamps(hr); 1605 hr->note_end_of_marking(); 1606 1607 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1608 _freed_bytes += hr->used(); 1609 hr->set_containing_set(NULL); 1610 if (hr->is_humongous()) { 1611 _humongous_regions_removed.increment(1u, hr->capacity()); 1612 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1613 } else { 1614 _old_regions_removed.increment(1u, hr->capacity()); 1615 _g1->free_region(hr, _local_cleanup_list, true); 1616 } 1617 } else { 1618 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1619 } 1620 1621 return false; 1622 } 1623 }; 1624 1625 class G1ParNoteEndTask: public AbstractGangTask { 1626 friend class G1NoteEndOfConcMarkClosure; 1627 1628 protected: 1629 G1CollectedHeap* _g1h; 1630 FreeRegionList* _cleanup_list; 1631 HeapRegionClaimer _hrclaimer; 1632 1633 public: 1634 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1635 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1636 } 1637 1638 void work(uint worker_id) { 1639 FreeRegionList local_cleanup_list("Local Cleanup List"); 1640 HRRSCleanupTask hrrs_cleanup_task; 1641 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1642 &hrrs_cleanup_task); 1643 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1644 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1645 1646 // Now update the lists 1647 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1648 { 1649 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1650 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1651 1652 // If we iterate over the global cleanup list at the end of 1653 // cleanup to do this printing we will not guarantee to only 1654 // generate output for the newly-reclaimed regions (the list 1655 // might not be empty at the beginning of cleanup; we might 1656 // still be working on its previous contents). So we do the 1657 // printing here, before we append the new regions to the global 1658 // cleanup list. 1659 1660 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1661 if (hr_printer->is_active()) { 1662 FreeRegionListIterator iter(&local_cleanup_list); 1663 while (iter.more_available()) { 1664 HeapRegion* hr = iter.get_next(); 1665 hr_printer->cleanup(hr); 1666 } 1667 } 1668 1669 _cleanup_list->add_ordered(&local_cleanup_list); 1670 assert(local_cleanup_list.is_empty(), "post-condition"); 1671 1672 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1673 } 1674 } 1675 }; 1676 1677 class G1ParScrubRemSetTask: public AbstractGangTask { 1678 protected: 1679 G1RemSet* _g1rs; 1680 BitMap* _region_bm; 1681 BitMap* _card_bm; 1682 HeapRegionClaimer _hrclaimer; 1683 1684 public: 1685 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1686 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1687 } 1688 1689 void work(uint worker_id) { 1690 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1691 } 1692 1693 }; 1694 1695 void ConcurrentMark::cleanup() { 1696 // world is stopped at this checkpoint 1697 assert(SafepointSynchronize::is_at_safepoint(), 1698 "world should be stopped"); 1699 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1700 1701 // If a full collection has happened, we shouldn't do this. 1702 if (has_aborted()) { 1703 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1704 return; 1705 } 1706 1707 g1h->verify_region_sets_optional(); 1708 1709 if (VerifyDuringGC) { 1710 HandleMark hm; // handle scope 1711 g1h->prepare_for_verify(); 1712 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1713 } 1714 g1h->check_bitmaps("Cleanup Start"); 1715 1716 G1CollectorPolicy* g1p = g1h->g1_policy(); 1717 g1p->record_concurrent_mark_cleanup_start(); 1718 1719 double start = os::elapsedTime(); 1720 1721 HeapRegionRemSet::reset_for_cleanup_tasks(); 1722 1723 // Do counting once more with the world stopped for good measure. 1724 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1725 1726 g1h->workers()->run_task(&g1_par_count_task); 1727 1728 if (VerifyDuringGC) { 1729 // Verify that the counting data accumulated during marking matches 1730 // that calculated by walking the marking bitmap. 1731 1732 // Bitmaps to hold expected values 1733 BitMap expected_region_bm(_region_bm.size(), true); 1734 BitMap expected_card_bm(_card_bm.size(), true); 1735 1736 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1737 &_region_bm, 1738 &_card_bm, 1739 &expected_region_bm, 1740 &expected_card_bm); 1741 1742 g1h->workers()->run_task(&g1_par_verify_task); 1743 1744 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1745 } 1746 1747 size_t start_used_bytes = g1h->used(); 1748 g1h->collector_state()->set_mark_in_progress(false); 1749 1750 double count_end = os::elapsedTime(); 1751 double this_final_counting_time = (count_end - start); 1752 _total_counting_time += this_final_counting_time; 1753 1754 if (Log<LOG_TAGS(gc, liveness)>::is_trace()) { 1755 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1756 _g1h->heap_region_iterate(&cl); 1757 } 1758 1759 // Install newly created mark bitMap as "prev". 1760 swapMarkBitMaps(); 1761 1762 g1h->reset_gc_time_stamp(); 1763 1764 uint n_workers = _g1h->workers()->active_workers(); 1765 1766 // Note end of marking in all heap regions. 1767 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1768 g1h->workers()->run_task(&g1_par_note_end_task); 1769 g1h->check_gc_time_stamps(); 1770 1771 if (!cleanup_list_is_empty()) { 1772 // The cleanup list is not empty, so we'll have to process it 1773 // concurrently. Notify anyone else that might be wanting free 1774 // regions that there will be more free regions coming soon. 1775 g1h->set_free_regions_coming(); 1776 } 1777 1778 // call below, since it affects the metric by which we sort the heap 1779 // regions. 1780 if (G1ScrubRemSets) { 1781 double rs_scrub_start = os::elapsedTime(); 1782 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1783 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1784 1785 double rs_scrub_end = os::elapsedTime(); 1786 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1787 _total_rs_scrub_time += this_rs_scrub_time; 1788 } 1789 1790 // this will also free any regions totally full of garbage objects, 1791 // and sort the regions. 1792 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1793 1794 // Statistics. 1795 double end = os::elapsedTime(); 1796 _cleanup_times.add((end - start) * 1000.0); 1797 1798 // Clean up will have freed any regions completely full of garbage. 1799 // Update the soft reference policy with the new heap occupancy. 1800 Universe::update_heap_info_at_gc(); 1801 1802 if (VerifyDuringGC) { 1803 HandleMark hm; // handle scope 1804 g1h->prepare_for_verify(); 1805 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1806 } 1807 1808 g1h->check_bitmaps("Cleanup End"); 1809 1810 g1h->verify_region_sets_optional(); 1811 1812 // We need to make this be a "collection" so any collection pause that 1813 // races with it goes around and waits for completeCleanup to finish. 1814 g1h->increment_total_collections(); 1815 1816 // Clean out dead classes and update Metaspace sizes. 1817 if (ClassUnloadingWithConcurrentMark) { 1818 ClassLoaderDataGraph::purge(); 1819 } 1820 MetaspaceGC::compute_new_size(); 1821 1822 // We reclaimed old regions so we should calculate the sizes to make 1823 // sure we update the old gen/space data. 1824 g1h->g1mm()->update_sizes(); 1825 g1h->allocation_context_stats().update_after_mark(); 1826 1827 g1h->trace_heap_after_concurrent_cycle(); 1828 } 1829 1830 void ConcurrentMark::completeCleanup() { 1831 if (has_aborted()) return; 1832 1833 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1834 1835 _cleanup_list.verify_optional(); 1836 FreeRegionList tmp_free_list("Tmp Free List"); 1837 1838 log_develop(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1839 "cleanup list has %u entries", 1840 _cleanup_list.length()); 1841 1842 // No one else should be accessing the _cleanup_list at this point, 1843 // so it is not necessary to take any locks 1844 while (!_cleanup_list.is_empty()) { 1845 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1846 assert(hr != NULL, "Got NULL from a non-empty list"); 1847 hr->par_clear(); 1848 tmp_free_list.add_ordered(hr); 1849 1850 // Instead of adding one region at a time to the secondary_free_list, 1851 // we accumulate them in the local list and move them a few at a 1852 // time. This also cuts down on the number of notify_all() calls 1853 // we do during this process. We'll also append the local list when 1854 // _cleanup_list is empty (which means we just removed the last 1855 // region from the _cleanup_list). 1856 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1857 _cleanup_list.is_empty()) { 1858 log_develop(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1859 "appending %u entries to the secondary_free_list, " 1860 "cleanup list still has %u entries", 1861 tmp_free_list.length(), 1862 _cleanup_list.length()); 1863 1864 { 1865 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1866 g1h->secondary_free_list_add(&tmp_free_list); 1867 SecondaryFreeList_lock->notify_all(); 1868 } 1869 #ifndef PRODUCT 1870 if (G1StressConcRegionFreeing) { 1871 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1872 os::sleep(Thread::current(), (jlong) 1, false); 1873 } 1874 } 1875 #endif 1876 } 1877 } 1878 assert(tmp_free_list.is_empty(), "post-condition"); 1879 } 1880 1881 // Supporting Object and Oop closures for reference discovery 1882 // and processing in during marking 1883 1884 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1885 HeapWord* addr = (HeapWord*)obj; 1886 return addr != NULL && 1887 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1888 } 1889 1890 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1891 // Uses the CMTask associated with a worker thread (for serial reference 1892 // processing the CMTask for worker 0 is used) to preserve (mark) and 1893 // trace referent objects. 1894 // 1895 // Using the CMTask and embedded local queues avoids having the worker 1896 // threads operating on the global mark stack. This reduces the risk 1897 // of overflowing the stack - which we would rather avoid at this late 1898 // state. Also using the tasks' local queues removes the potential 1899 // of the workers interfering with each other that could occur if 1900 // operating on the global stack. 1901 1902 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1903 ConcurrentMark* _cm; 1904 CMTask* _task; 1905 int _ref_counter_limit; 1906 int _ref_counter; 1907 bool _is_serial; 1908 public: 1909 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1910 _cm(cm), _task(task), _is_serial(is_serial), 1911 _ref_counter_limit(G1RefProcDrainInterval) { 1912 assert(_ref_counter_limit > 0, "sanity"); 1913 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1914 _ref_counter = _ref_counter_limit; 1915 } 1916 1917 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1918 virtual void do_oop( oop* p) { do_oop_work(p); } 1919 1920 template <class T> void do_oop_work(T* p) { 1921 if (!_cm->has_overflown()) { 1922 oop obj = oopDesc::load_decode_heap_oop(p); 1923 _task->deal_with_reference(obj); 1924 _ref_counter--; 1925 1926 if (_ref_counter == 0) { 1927 // We have dealt with _ref_counter_limit references, pushing them 1928 // and objects reachable from them on to the local stack (and 1929 // possibly the global stack). Call CMTask::do_marking_step() to 1930 // process these entries. 1931 // 1932 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1933 // there's nothing more to do (i.e. we're done with the entries that 1934 // were pushed as a result of the CMTask::deal_with_reference() calls 1935 // above) or we overflow. 1936 // 1937 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1938 // flag while there may still be some work to do. (See the comment at 1939 // the beginning of CMTask::do_marking_step() for those conditions - 1940 // one of which is reaching the specified time target.) It is only 1941 // when CMTask::do_marking_step() returns without setting the 1942 // has_aborted() flag that the marking step has completed. 1943 do { 1944 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1945 _task->do_marking_step(mark_step_duration_ms, 1946 false /* do_termination */, 1947 _is_serial); 1948 } while (_task->has_aborted() && !_cm->has_overflown()); 1949 _ref_counter = _ref_counter_limit; 1950 } 1951 } 1952 } 1953 }; 1954 1955 // 'Drain' oop closure used by both serial and parallel reference processing. 1956 // Uses the CMTask associated with a given worker thread (for serial 1957 // reference processing the CMtask for worker 0 is used). Calls the 1958 // do_marking_step routine, with an unbelievably large timeout value, 1959 // to drain the marking data structures of the remaining entries 1960 // added by the 'keep alive' oop closure above. 1961 1962 class G1CMDrainMarkingStackClosure: public VoidClosure { 1963 ConcurrentMark* _cm; 1964 CMTask* _task; 1965 bool _is_serial; 1966 public: 1967 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1968 _cm(cm), _task(task), _is_serial(is_serial) { 1969 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1970 } 1971 1972 void do_void() { 1973 do { 1974 // We call CMTask::do_marking_step() to completely drain the local 1975 // and global marking stacks of entries pushed by the 'keep alive' 1976 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1977 // 1978 // CMTask::do_marking_step() is called in a loop, which we'll exit 1979 // if there's nothing more to do (i.e. we've completely drained the 1980 // entries that were pushed as a a result of applying the 'keep alive' 1981 // closure to the entries on the discovered ref lists) or we overflow 1982 // the global marking stack. 1983 // 1984 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1985 // flag while there may still be some work to do. (See the comment at 1986 // the beginning of CMTask::do_marking_step() for those conditions - 1987 // one of which is reaching the specified time target.) It is only 1988 // when CMTask::do_marking_step() returns without setting the 1989 // has_aborted() flag that the marking step has completed. 1990 1991 _task->do_marking_step(1000000000.0 /* something very large */, 1992 true /* do_termination */, 1993 _is_serial); 1994 } while (_task->has_aborted() && !_cm->has_overflown()); 1995 } 1996 }; 1997 1998 // Implementation of AbstractRefProcTaskExecutor for parallel 1999 // reference processing at the end of G1 concurrent marking 2000 2001 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2002 private: 2003 G1CollectedHeap* _g1h; 2004 ConcurrentMark* _cm; 2005 WorkGang* _workers; 2006 uint _active_workers; 2007 2008 public: 2009 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2010 ConcurrentMark* cm, 2011 WorkGang* workers, 2012 uint n_workers) : 2013 _g1h(g1h), _cm(cm), 2014 _workers(workers), _active_workers(n_workers) { } 2015 2016 // Executes the given task using concurrent marking worker threads. 2017 virtual void execute(ProcessTask& task); 2018 virtual void execute(EnqueueTask& task); 2019 }; 2020 2021 class G1CMRefProcTaskProxy: public AbstractGangTask { 2022 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2023 ProcessTask& _proc_task; 2024 G1CollectedHeap* _g1h; 2025 ConcurrentMark* _cm; 2026 2027 public: 2028 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2029 G1CollectedHeap* g1h, 2030 ConcurrentMark* cm) : 2031 AbstractGangTask("Process reference objects in parallel"), 2032 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2033 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2034 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2035 } 2036 2037 virtual void work(uint worker_id) { 2038 ResourceMark rm; 2039 HandleMark hm; 2040 CMTask* task = _cm->task(worker_id); 2041 G1CMIsAliveClosure g1_is_alive(_g1h); 2042 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2043 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2044 2045 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2046 } 2047 }; 2048 2049 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2050 assert(_workers != NULL, "Need parallel worker threads."); 2051 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2052 2053 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2054 2055 // We need to reset the concurrency level before each 2056 // proxy task execution, so that the termination protocol 2057 // and overflow handling in CMTask::do_marking_step() knows 2058 // how many workers to wait for. 2059 _cm->set_concurrency(_active_workers); 2060 _workers->run_task(&proc_task_proxy); 2061 } 2062 2063 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2064 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2065 EnqueueTask& _enq_task; 2066 2067 public: 2068 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2069 AbstractGangTask("Enqueue reference objects in parallel"), 2070 _enq_task(enq_task) { } 2071 2072 virtual void work(uint worker_id) { 2073 _enq_task.work(worker_id); 2074 } 2075 }; 2076 2077 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2078 assert(_workers != NULL, "Need parallel worker threads."); 2079 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2080 2081 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2082 2083 // Not strictly necessary but... 2084 // 2085 // We need to reset the concurrency level before each 2086 // proxy task execution, so that the termination protocol 2087 // and overflow handling in CMTask::do_marking_step() knows 2088 // how many workers to wait for. 2089 _cm->set_concurrency(_active_workers); 2090 _workers->run_task(&enq_task_proxy); 2091 } 2092 2093 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2094 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2095 } 2096 2097 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2098 if (has_overflown()) { 2099 // Skip processing the discovered references if we have 2100 // overflown the global marking stack. Reference objects 2101 // only get discovered once so it is OK to not 2102 // de-populate the discovered reference lists. We could have, 2103 // but the only benefit would be that, when marking restarts, 2104 // less reference objects are discovered. 2105 return; 2106 } 2107 2108 ResourceMark rm; 2109 HandleMark hm; 2110 2111 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2112 2113 // Is alive closure. 2114 G1CMIsAliveClosure g1_is_alive(g1h); 2115 2116 // Inner scope to exclude the cleaning of the string and symbol 2117 // tables from the displayed time. 2118 { 2119 GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm()); 2120 2121 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2122 2123 // See the comment in G1CollectedHeap::ref_processing_init() 2124 // about how reference processing currently works in G1. 2125 2126 // Set the soft reference policy 2127 rp->setup_policy(clear_all_soft_refs); 2128 assert(_markStack.isEmpty(), "mark stack should be empty"); 2129 2130 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2131 // in serial reference processing. Note these closures are also 2132 // used for serially processing (by the the current thread) the 2133 // JNI references during parallel reference processing. 2134 // 2135 // These closures do not need to synchronize with the worker 2136 // threads involved in parallel reference processing as these 2137 // instances are executed serially by the current thread (e.g. 2138 // reference processing is not multi-threaded and is thus 2139 // performed by the current thread instead of a gang worker). 2140 // 2141 // The gang tasks involved in parallel reference processing create 2142 // their own instances of these closures, which do their own 2143 // synchronization among themselves. 2144 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2145 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2146 2147 // We need at least one active thread. If reference processing 2148 // is not multi-threaded we use the current (VMThread) thread, 2149 // otherwise we use the work gang from the G1CollectedHeap and 2150 // we utilize all the worker threads we can. 2151 bool processing_is_mt = rp->processing_is_mt(); 2152 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2153 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2154 2155 // Parallel processing task executor. 2156 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2157 g1h->workers(), active_workers); 2158 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2159 2160 // Set the concurrency level. The phase was already set prior to 2161 // executing the remark task. 2162 set_concurrency(active_workers); 2163 2164 // Set the degree of MT processing here. If the discovery was done MT, 2165 // the number of threads involved during discovery could differ from 2166 // the number of active workers. This is OK as long as the discovered 2167 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2168 rp->set_active_mt_degree(active_workers); 2169 2170 // Process the weak references. 2171 const ReferenceProcessorStats& stats = 2172 rp->process_discovered_references(&g1_is_alive, 2173 &g1_keep_alive, 2174 &g1_drain_mark_stack, 2175 executor, 2176 g1h->gc_timer_cm()); 2177 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2178 2179 // The do_oop work routines of the keep_alive and drain_marking_stack 2180 // oop closures will set the has_overflown flag if we overflow the 2181 // global marking stack. 2182 2183 assert(_markStack.overflow() || _markStack.isEmpty(), 2184 "mark stack should be empty (unless it overflowed)"); 2185 2186 if (_markStack.overflow()) { 2187 // This should have been done already when we tried to push an 2188 // entry on to the global mark stack. But let's do it again. 2189 set_has_overflown(); 2190 } 2191 2192 assert(rp->num_q() == active_workers, "why not"); 2193 2194 rp->enqueue_discovered_references(executor); 2195 2196 rp->verify_no_references_recorded(); 2197 assert(!rp->discovery_enabled(), "Post condition"); 2198 } 2199 2200 if (has_overflown()) { 2201 // We can not trust g1_is_alive if the marking stack overflowed 2202 return; 2203 } 2204 2205 assert(_markStack.isEmpty(), "Marking should have completed"); 2206 2207 // Unload Klasses, String, Symbols, Code Cache, etc. 2208 { 2209 GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm()); 2210 2211 if (ClassUnloadingWithConcurrentMark) { 2212 bool purged_classes; 2213 2214 { 2215 GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm()); 2216 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2217 } 2218 2219 { 2220 GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm()); 2221 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2222 } 2223 } 2224 2225 if (G1StringDedup::is_enabled()) { 2226 GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm()); 2227 G1StringDedup::unlink(&g1_is_alive); 2228 } 2229 } 2230 } 2231 2232 void ConcurrentMark::swapMarkBitMaps() { 2233 CMBitMapRO* temp = _prevMarkBitMap; 2234 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2235 _nextMarkBitMap = (CMBitMap*) temp; 2236 } 2237 2238 // Closure for marking entries in SATB buffers. 2239 class CMSATBBufferClosure : public SATBBufferClosure { 2240 private: 2241 CMTask* _task; 2242 G1CollectedHeap* _g1h; 2243 2244 // This is very similar to CMTask::deal_with_reference, but with 2245 // more relaxed requirements for the argument, so this must be more 2246 // circumspect about treating the argument as an object. 2247 void do_entry(void* entry) const { 2248 _task->increment_refs_reached(); 2249 HeapRegion* hr = _g1h->heap_region_containing(entry); 2250 if (entry < hr->next_top_at_mark_start()) { 2251 // Until we get here, we don't know whether entry refers to a valid 2252 // object; it could instead have been a stale reference. 2253 oop obj = static_cast<oop>(entry); 2254 assert(obj->is_oop(true /* ignore mark word */), 2255 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2256 _task->make_reference_grey(obj, hr); 2257 } 2258 } 2259 2260 public: 2261 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2262 : _task(task), _g1h(g1h) { } 2263 2264 virtual void do_buffer(void** buffer, size_t size) { 2265 for (size_t i = 0; i < size; ++i) { 2266 do_entry(buffer[i]); 2267 } 2268 } 2269 }; 2270 2271 class G1RemarkThreadsClosure : public ThreadClosure { 2272 CMSATBBufferClosure _cm_satb_cl; 2273 G1CMOopClosure _cm_cl; 2274 MarkingCodeBlobClosure _code_cl; 2275 int _thread_parity; 2276 2277 public: 2278 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2279 _cm_satb_cl(task, g1h), 2280 _cm_cl(g1h, g1h->concurrent_mark(), task), 2281 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2282 _thread_parity(Threads::thread_claim_parity()) {} 2283 2284 void do_thread(Thread* thread) { 2285 if (thread->is_Java_thread()) { 2286 if (thread->claim_oops_do(true, _thread_parity)) { 2287 JavaThread* jt = (JavaThread*)thread; 2288 2289 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2290 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2291 // * Alive if on the stack of an executing method 2292 // * Weakly reachable otherwise 2293 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2294 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2295 jt->nmethods_do(&_code_cl); 2296 2297 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2298 } 2299 } else if (thread->is_VM_thread()) { 2300 if (thread->claim_oops_do(true, _thread_parity)) { 2301 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2302 } 2303 } 2304 } 2305 }; 2306 2307 class CMRemarkTask: public AbstractGangTask { 2308 private: 2309 ConcurrentMark* _cm; 2310 public: 2311 void work(uint worker_id) { 2312 // Since all available tasks are actually started, we should 2313 // only proceed if we're supposed to be active. 2314 if (worker_id < _cm->active_tasks()) { 2315 CMTask* task = _cm->task(worker_id); 2316 task->record_start_time(); 2317 { 2318 ResourceMark rm; 2319 HandleMark hm; 2320 2321 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2322 Threads::threads_do(&threads_f); 2323 } 2324 2325 do { 2326 task->do_marking_step(1000000000.0 /* something very large */, 2327 true /* do_termination */, 2328 false /* is_serial */); 2329 } while (task->has_aborted() && !_cm->has_overflown()); 2330 // If we overflow, then we do not want to restart. We instead 2331 // want to abort remark and do concurrent marking again. 2332 task->record_end_time(); 2333 } 2334 } 2335 2336 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2337 AbstractGangTask("Par Remark"), _cm(cm) { 2338 _cm->terminator()->reset_for_reuse(active_workers); 2339 } 2340 }; 2341 2342 void ConcurrentMark::checkpointRootsFinalWork() { 2343 ResourceMark rm; 2344 HandleMark hm; 2345 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2346 2347 GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm()); 2348 2349 g1h->ensure_parsability(false); 2350 2351 // this is remark, so we'll use up all active threads 2352 uint active_workers = g1h->workers()->active_workers(); 2353 set_concurrency_and_phase(active_workers, false /* concurrent */); 2354 // Leave _parallel_marking_threads at it's 2355 // value originally calculated in the ConcurrentMark 2356 // constructor and pass values of the active workers 2357 // through the gang in the task. 2358 2359 { 2360 StrongRootsScope srs(active_workers); 2361 2362 CMRemarkTask remarkTask(this, active_workers); 2363 // We will start all available threads, even if we decide that the 2364 // active_workers will be fewer. The extra ones will just bail out 2365 // immediately. 2366 g1h->workers()->run_task(&remarkTask); 2367 } 2368 2369 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2370 guarantee(has_overflown() || 2371 satb_mq_set.completed_buffers_num() == 0, 2372 "Invariant: has_overflown = %s, num buffers = %d", 2373 BOOL_TO_STR(has_overflown()), 2374 satb_mq_set.completed_buffers_num()); 2375 2376 print_stats(); 2377 } 2378 2379 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2380 // Note we are overriding the read-only view of the prev map here, via 2381 // the cast. 2382 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2383 } 2384 2385 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2386 _nextMarkBitMap->clearRange(mr); 2387 } 2388 2389 HeapRegion* 2390 ConcurrentMark::claim_region(uint worker_id) { 2391 // "checkpoint" the finger 2392 HeapWord* finger = _finger; 2393 2394 // _heap_end will not change underneath our feet; it only changes at 2395 // yield points. 2396 while (finger < _heap_end) { 2397 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2398 2399 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2400 2401 // Above heap_region_containing may return NULL as we always scan claim 2402 // until the end of the heap. In this case, just jump to the next region. 2403 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2404 2405 // Is the gap between reading the finger and doing the CAS too long? 2406 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2407 if (res == finger && curr_region != NULL) { 2408 // we succeeded 2409 HeapWord* bottom = curr_region->bottom(); 2410 HeapWord* limit = curr_region->next_top_at_mark_start(); 2411 2412 // notice that _finger == end cannot be guaranteed here since, 2413 // someone else might have moved the finger even further 2414 assert(_finger >= end, "the finger should have moved forward"); 2415 2416 if (limit > bottom) { 2417 return curr_region; 2418 } else { 2419 assert(limit == bottom, 2420 "the region limit should be at bottom"); 2421 // we return NULL and the caller should try calling 2422 // claim_region() again. 2423 return NULL; 2424 } 2425 } else { 2426 assert(_finger > finger, "the finger should have moved forward"); 2427 // read it again 2428 finger = _finger; 2429 } 2430 } 2431 2432 return NULL; 2433 } 2434 2435 #ifndef PRODUCT 2436 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2437 private: 2438 G1CollectedHeap* _g1h; 2439 const char* _phase; 2440 int _info; 2441 2442 public: 2443 VerifyNoCSetOops(const char* phase, int info = -1) : 2444 _g1h(G1CollectedHeap::heap()), 2445 _phase(phase), 2446 _info(info) 2447 { } 2448 2449 void operator()(oop obj) const { 2450 guarantee(obj->is_oop(), 2451 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2452 p2i(obj), _phase, _info); 2453 guarantee(!_g1h->obj_in_cs(obj), 2454 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2455 p2i(obj), _phase, _info); 2456 } 2457 }; 2458 2459 void ConcurrentMark::verify_no_cset_oops() { 2460 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2461 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2462 return; 2463 } 2464 2465 // Verify entries on the global mark stack 2466 _markStack.iterate(VerifyNoCSetOops("Stack")); 2467 2468 // Verify entries on the task queues 2469 for (uint i = 0; i < _max_worker_id; ++i) { 2470 CMTaskQueue* queue = _task_queues->queue(i); 2471 queue->iterate(VerifyNoCSetOops("Queue", i)); 2472 } 2473 2474 // Verify the global finger 2475 HeapWord* global_finger = finger(); 2476 if (global_finger != NULL && global_finger < _heap_end) { 2477 // Since we always iterate over all regions, we might get a NULL HeapRegion 2478 // here. 2479 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2480 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2481 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2482 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2483 } 2484 2485 // Verify the task fingers 2486 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2487 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2488 CMTask* task = _tasks[i]; 2489 HeapWord* task_finger = task->finger(); 2490 if (task_finger != NULL && task_finger < _heap_end) { 2491 // See above note on the global finger verification. 2492 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2493 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2494 !task_hr->in_collection_set(), 2495 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2496 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2497 } 2498 } 2499 } 2500 #endif // PRODUCT 2501 2502 // Aggregate the counting data that was constructed concurrently 2503 // with marking. 2504 class AggregateCountDataHRClosure: public HeapRegionClosure { 2505 G1CollectedHeap* _g1h; 2506 ConcurrentMark* _cm; 2507 CardTableModRefBS* _ct_bs; 2508 BitMap* _cm_card_bm; 2509 uint _max_worker_id; 2510 2511 public: 2512 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2513 BitMap* cm_card_bm, 2514 uint max_worker_id) : 2515 _g1h(g1h), _cm(g1h->concurrent_mark()), 2516 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2517 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2518 2519 bool doHeapRegion(HeapRegion* hr) { 2520 HeapWord* start = hr->bottom(); 2521 HeapWord* limit = hr->next_top_at_mark_start(); 2522 HeapWord* end = hr->end(); 2523 2524 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2525 "Preconditions not met - " 2526 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2527 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2528 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2529 2530 assert(hr->next_marked_bytes() == 0, "Precondition"); 2531 2532 if (start == limit) { 2533 // NTAMS of this region has not been set so nothing to do. 2534 return false; 2535 } 2536 2537 // 'start' should be in the heap. 2538 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2539 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2540 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2541 2542 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2543 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2544 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2545 2546 // If ntams is not card aligned then we bump card bitmap index 2547 // for limit so that we get the all the cards spanned by 2548 // the object ending at ntams. 2549 // Note: if this is the last region in the heap then ntams 2550 // could be actually just beyond the end of the the heap; 2551 // limit_idx will then correspond to a (non-existent) card 2552 // that is also outside the heap. 2553 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2554 limit_idx += 1; 2555 } 2556 2557 assert(limit_idx <= end_idx, "or else use atomics"); 2558 2559 // Aggregate the "stripe" in the count data associated with hr. 2560 uint hrm_index = hr->hrm_index(); 2561 size_t marked_bytes = 0; 2562 2563 for (uint i = 0; i < _max_worker_id; i += 1) { 2564 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2565 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2566 2567 // Fetch the marked_bytes in this region for task i and 2568 // add it to the running total for this region. 2569 marked_bytes += marked_bytes_array[hrm_index]; 2570 2571 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2572 // into the global card bitmap. 2573 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2574 2575 while (scan_idx < limit_idx) { 2576 assert(task_card_bm->at(scan_idx) == true, "should be"); 2577 _cm_card_bm->set_bit(scan_idx); 2578 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2579 2580 // BitMap::get_next_one_offset() can handle the case when 2581 // its left_offset parameter is greater than its right_offset 2582 // parameter. It does, however, have an early exit if 2583 // left_offset == right_offset. So let's limit the value 2584 // passed in for left offset here. 2585 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2586 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2587 } 2588 } 2589 2590 // Update the marked bytes for this region. 2591 hr->add_to_marked_bytes(marked_bytes); 2592 2593 // Next heap region 2594 return false; 2595 } 2596 }; 2597 2598 class G1AggregateCountDataTask: public AbstractGangTask { 2599 protected: 2600 G1CollectedHeap* _g1h; 2601 ConcurrentMark* _cm; 2602 BitMap* _cm_card_bm; 2603 uint _max_worker_id; 2604 uint _active_workers; 2605 HeapRegionClaimer _hrclaimer; 2606 2607 public: 2608 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2609 ConcurrentMark* cm, 2610 BitMap* cm_card_bm, 2611 uint max_worker_id, 2612 uint n_workers) : 2613 AbstractGangTask("Count Aggregation"), 2614 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2615 _max_worker_id(max_worker_id), 2616 _active_workers(n_workers), 2617 _hrclaimer(_active_workers) { 2618 } 2619 2620 void work(uint worker_id) { 2621 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2622 2623 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2624 } 2625 }; 2626 2627 2628 void ConcurrentMark::aggregate_count_data() { 2629 uint n_workers = _g1h->workers()->active_workers(); 2630 2631 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2632 _max_worker_id, n_workers); 2633 2634 _g1h->workers()->run_task(&g1_par_agg_task); 2635 } 2636 2637 // Clear the per-worker arrays used to store the per-region counting data 2638 void ConcurrentMark::clear_all_count_data() { 2639 // Clear the global card bitmap - it will be filled during 2640 // liveness count aggregation (during remark) and the 2641 // final counting task. 2642 _card_bm.clear(); 2643 2644 // Clear the global region bitmap - it will be filled as part 2645 // of the final counting task. 2646 _region_bm.clear(); 2647 2648 uint max_regions = _g1h->max_regions(); 2649 assert(_max_worker_id > 0, "uninitialized"); 2650 2651 for (uint i = 0; i < _max_worker_id; i += 1) { 2652 BitMap* task_card_bm = count_card_bitmap_for(i); 2653 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2654 2655 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2656 assert(marked_bytes_array != NULL, "uninitialized"); 2657 2658 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2659 task_card_bm->clear(); 2660 } 2661 } 2662 2663 void ConcurrentMark::print_stats() { 2664 if (!log_is_enabled(Debug, gc, stats)) { 2665 return; 2666 } 2667 log_debug(gc, stats)("---------------------------------------------------------------------"); 2668 for (size_t i = 0; i < _active_tasks; ++i) { 2669 _tasks[i]->print_stats(); 2670 log_debug(gc, stats)("---------------------------------------------------------------------"); 2671 } 2672 } 2673 2674 // abandon current marking iteration due to a Full GC 2675 void ConcurrentMark::abort() { 2676 if (!cmThread()->during_cycle() || _has_aborted) { 2677 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2678 return; 2679 } 2680 2681 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2682 // concurrent bitmap clearing. 2683 _nextMarkBitMap->clearAll(); 2684 2685 // Note we cannot clear the previous marking bitmap here 2686 // since VerifyDuringGC verifies the objects marked during 2687 // a full GC against the previous bitmap. 2688 2689 // Clear the liveness counting data 2690 clear_all_count_data(); 2691 // Empty mark stack 2692 reset_marking_state(); 2693 for (uint i = 0; i < _max_worker_id; ++i) { 2694 _tasks[i]->clear_region_fields(); 2695 } 2696 _first_overflow_barrier_sync.abort(); 2697 _second_overflow_barrier_sync.abort(); 2698 _has_aborted = true; 2699 2700 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2701 satb_mq_set.abandon_partial_marking(); 2702 // This can be called either during or outside marking, we'll read 2703 // the expected_active value from the SATB queue set. 2704 satb_mq_set.set_active_all_threads( 2705 false, /* new active value */ 2706 satb_mq_set.is_active() /* expected_active */); 2707 2708 _g1h->trace_heap_after_concurrent_cycle(); 2709 _g1h->register_concurrent_cycle_end(); 2710 } 2711 2712 static void print_ms_time_info(const char* prefix, const char* name, 2713 NumberSeq& ns) { 2714 log_trace(gc, marking, stats, exit)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2715 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2716 if (ns.num() > 0) { 2717 log_trace(gc, marking, stats, exit)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2718 prefix, ns.sd(), ns.maximum()); 2719 } 2720 } 2721 2722 void ConcurrentMark::print_summary_info() { 2723 LogHandle(gc, marking, stats, exit) log; 2724 if (!log.is_trace()) { 2725 return; 2726 } 2727 2728 log.trace(" Concurrent marking:"); 2729 print_ms_time_info(" ", "init marks", _init_times); 2730 print_ms_time_info(" ", "remarks", _remark_times); 2731 { 2732 print_ms_time_info(" ", "final marks", _remark_mark_times); 2733 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2734 2735 } 2736 print_ms_time_info(" ", "cleanups", _cleanup_times); 2737 log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2738 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2739 if (G1ScrubRemSets) { 2740 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2741 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2742 } 2743 log.trace(" Total stop_world time = %8.2f s.", 2744 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2745 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2746 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2747 } 2748 2749 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2750 _parallel_workers->print_worker_threads_on(st); 2751 } 2752 2753 void ConcurrentMark::print_on_error(outputStream* st) const { 2754 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2755 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2756 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2757 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2758 } 2759 2760 // We take a break if someone is trying to stop the world. 2761 bool ConcurrentMark::do_yield_check(uint worker_id) { 2762 if (SuspendibleThreadSet::should_yield()) { 2763 if (worker_id == 0) { 2764 _g1h->g1_policy()->record_concurrent_pause(); 2765 } 2766 SuspendibleThreadSet::yield(); 2767 return true; 2768 } else { 2769 return false; 2770 } 2771 } 2772 2773 // Closure for iteration over bitmaps 2774 class CMBitMapClosure : public BitMapClosure { 2775 private: 2776 // the bitmap that is being iterated over 2777 CMBitMap* _nextMarkBitMap; 2778 ConcurrentMark* _cm; 2779 CMTask* _task; 2780 2781 public: 2782 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2783 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2784 2785 bool do_bit(size_t offset) { 2786 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2787 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2788 assert( addr < _cm->finger(), "invariant"); 2789 assert(addr >= _task->finger(), "invariant"); 2790 2791 // We move that task's local finger along. 2792 _task->move_finger_to(addr); 2793 2794 _task->scan_object(oop(addr)); 2795 // we only partially drain the local queue and global stack 2796 _task->drain_local_queue(true); 2797 _task->drain_global_stack(true); 2798 2799 // if the has_aborted flag has been raised, we need to bail out of 2800 // the iteration 2801 return !_task->has_aborted(); 2802 } 2803 }; 2804 2805 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2806 ReferenceProcessor* result = NULL; 2807 if (G1UseConcMarkReferenceProcessing) { 2808 result = g1h->ref_processor_cm(); 2809 assert(result != NULL, "should not be NULL"); 2810 } 2811 return result; 2812 } 2813 2814 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2815 ConcurrentMark* cm, 2816 CMTask* task) 2817 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2818 _g1h(g1h), _cm(cm), _task(task) 2819 { } 2820 2821 void CMTask::setup_for_region(HeapRegion* hr) { 2822 assert(hr != NULL, 2823 "claim_region() should have filtered out NULL regions"); 2824 _curr_region = hr; 2825 _finger = hr->bottom(); 2826 update_region_limit(); 2827 } 2828 2829 void CMTask::update_region_limit() { 2830 HeapRegion* hr = _curr_region; 2831 HeapWord* bottom = hr->bottom(); 2832 HeapWord* limit = hr->next_top_at_mark_start(); 2833 2834 if (limit == bottom) { 2835 // The region was collected underneath our feet. 2836 // We set the finger to bottom to ensure that the bitmap 2837 // iteration that will follow this will not do anything. 2838 // (this is not a condition that holds when we set the region up, 2839 // as the region is not supposed to be empty in the first place) 2840 _finger = bottom; 2841 } else if (limit >= _region_limit) { 2842 assert(limit >= _finger, "peace of mind"); 2843 } else { 2844 assert(limit < _region_limit, "only way to get here"); 2845 // This can happen under some pretty unusual circumstances. An 2846 // evacuation pause empties the region underneath our feet (NTAMS 2847 // at bottom). We then do some allocation in the region (NTAMS 2848 // stays at bottom), followed by the region being used as a GC 2849 // alloc region (NTAMS will move to top() and the objects 2850 // originally below it will be grayed). All objects now marked in 2851 // the region are explicitly grayed, if below the global finger, 2852 // and we do not need in fact to scan anything else. So, we simply 2853 // set _finger to be limit to ensure that the bitmap iteration 2854 // doesn't do anything. 2855 _finger = limit; 2856 } 2857 2858 _region_limit = limit; 2859 } 2860 2861 void CMTask::giveup_current_region() { 2862 assert(_curr_region != NULL, "invariant"); 2863 clear_region_fields(); 2864 } 2865 2866 void CMTask::clear_region_fields() { 2867 // Values for these three fields that indicate that we're not 2868 // holding on to a region. 2869 _curr_region = NULL; 2870 _finger = NULL; 2871 _region_limit = NULL; 2872 } 2873 2874 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2875 if (cm_oop_closure == NULL) { 2876 assert(_cm_oop_closure != NULL, "invariant"); 2877 } else { 2878 assert(_cm_oop_closure == NULL, "invariant"); 2879 } 2880 _cm_oop_closure = cm_oop_closure; 2881 } 2882 2883 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2884 guarantee(nextMarkBitMap != NULL, "invariant"); 2885 _nextMarkBitMap = nextMarkBitMap; 2886 clear_region_fields(); 2887 2888 _calls = 0; 2889 _elapsed_time_ms = 0.0; 2890 _termination_time_ms = 0.0; 2891 _termination_start_time_ms = 0.0; 2892 } 2893 2894 bool CMTask::should_exit_termination() { 2895 regular_clock_call(); 2896 // This is called when we are in the termination protocol. We should 2897 // quit if, for some reason, this task wants to abort or the global 2898 // stack is not empty (this means that we can get work from it). 2899 return !_cm->mark_stack_empty() || has_aborted(); 2900 } 2901 2902 void CMTask::reached_limit() { 2903 assert(_words_scanned >= _words_scanned_limit || 2904 _refs_reached >= _refs_reached_limit , 2905 "shouldn't have been called otherwise"); 2906 regular_clock_call(); 2907 } 2908 2909 void CMTask::regular_clock_call() { 2910 if (has_aborted()) return; 2911 2912 // First, we need to recalculate the words scanned and refs reached 2913 // limits for the next clock call. 2914 recalculate_limits(); 2915 2916 // During the regular clock call we do the following 2917 2918 // (1) If an overflow has been flagged, then we abort. 2919 if (_cm->has_overflown()) { 2920 set_has_aborted(); 2921 return; 2922 } 2923 2924 // If we are not concurrent (i.e. we're doing remark) we don't need 2925 // to check anything else. The other steps are only needed during 2926 // the concurrent marking phase. 2927 if (!concurrent()) return; 2928 2929 // (2) If marking has been aborted for Full GC, then we also abort. 2930 if (_cm->has_aborted()) { 2931 set_has_aborted(); 2932 return; 2933 } 2934 2935 double curr_time_ms = os::elapsedVTime() * 1000.0; 2936 2937 // (4) We check whether we should yield. If we have to, then we abort. 2938 if (SuspendibleThreadSet::should_yield()) { 2939 // We should yield. To do this we abort the task. The caller is 2940 // responsible for yielding. 2941 set_has_aborted(); 2942 return; 2943 } 2944 2945 // (5) We check whether we've reached our time quota. If we have, 2946 // then we abort. 2947 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2948 if (elapsed_time_ms > _time_target_ms) { 2949 set_has_aborted(); 2950 _has_timed_out = true; 2951 return; 2952 } 2953 2954 // (6) Finally, we check whether there are enough completed STAB 2955 // buffers available for processing. If there are, we abort. 2956 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2957 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2958 // we do need to process SATB buffers, we'll abort and restart 2959 // the marking task to do so 2960 set_has_aborted(); 2961 return; 2962 } 2963 } 2964 2965 void CMTask::recalculate_limits() { 2966 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2967 _words_scanned_limit = _real_words_scanned_limit; 2968 2969 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2970 _refs_reached_limit = _real_refs_reached_limit; 2971 } 2972 2973 void CMTask::decrease_limits() { 2974 // This is called when we believe that we're going to do an infrequent 2975 // operation which will increase the per byte scanned cost (i.e. move 2976 // entries to/from the global stack). It basically tries to decrease the 2977 // scanning limit so that the clock is called earlier. 2978 2979 _words_scanned_limit = _real_words_scanned_limit - 2980 3 * words_scanned_period / 4; 2981 _refs_reached_limit = _real_refs_reached_limit - 2982 3 * refs_reached_period / 4; 2983 } 2984 2985 void CMTask::move_entries_to_global_stack() { 2986 // local array where we'll store the entries that will be popped 2987 // from the local queue 2988 oop buffer[global_stack_transfer_size]; 2989 2990 int n = 0; 2991 oop obj; 2992 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 2993 buffer[n] = obj; 2994 ++n; 2995 } 2996 2997 if (n > 0) { 2998 // we popped at least one entry from the local queue 2999 3000 if (!_cm->mark_stack_push(buffer, n)) { 3001 set_has_aborted(); 3002 } 3003 } 3004 3005 // this operation was quite expensive, so decrease the limits 3006 decrease_limits(); 3007 } 3008 3009 void CMTask::get_entries_from_global_stack() { 3010 // local array where we'll store the entries that will be popped 3011 // from the global stack. 3012 oop buffer[global_stack_transfer_size]; 3013 int n; 3014 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3015 assert(n <= global_stack_transfer_size, 3016 "we should not pop more than the given limit"); 3017 if (n > 0) { 3018 // yes, we did actually pop at least one entry 3019 for (int i = 0; i < n; ++i) { 3020 bool success = _task_queue->push(buffer[i]); 3021 // We only call this when the local queue is empty or under a 3022 // given target limit. So, we do not expect this push to fail. 3023 assert(success, "invariant"); 3024 } 3025 } 3026 3027 // this operation was quite expensive, so decrease the limits 3028 decrease_limits(); 3029 } 3030 3031 void CMTask::drain_local_queue(bool partially) { 3032 if (has_aborted()) return; 3033 3034 // Decide what the target size is, depending whether we're going to 3035 // drain it partially (so that other tasks can steal if they run out 3036 // of things to do) or totally (at the very end). 3037 size_t target_size; 3038 if (partially) { 3039 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3040 } else { 3041 target_size = 0; 3042 } 3043 3044 if (_task_queue->size() > target_size) { 3045 oop obj; 3046 bool ret = _task_queue->pop_local(obj); 3047 while (ret) { 3048 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3049 assert(!_g1h->is_on_master_free_list( 3050 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3051 3052 scan_object(obj); 3053 3054 if (_task_queue->size() <= target_size || has_aborted()) { 3055 ret = false; 3056 } else { 3057 ret = _task_queue->pop_local(obj); 3058 } 3059 } 3060 } 3061 } 3062 3063 void CMTask::drain_global_stack(bool partially) { 3064 if (has_aborted()) return; 3065 3066 // We have a policy to drain the local queue before we attempt to 3067 // drain the global stack. 3068 assert(partially || _task_queue->size() == 0, "invariant"); 3069 3070 // Decide what the target size is, depending whether we're going to 3071 // drain it partially (so that other tasks can steal if they run out 3072 // of things to do) or totally (at the very end). Notice that, 3073 // because we move entries from the global stack in chunks or 3074 // because another task might be doing the same, we might in fact 3075 // drop below the target. But, this is not a problem. 3076 size_t target_size; 3077 if (partially) { 3078 target_size = _cm->partial_mark_stack_size_target(); 3079 } else { 3080 target_size = 0; 3081 } 3082 3083 if (_cm->mark_stack_size() > target_size) { 3084 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3085 get_entries_from_global_stack(); 3086 drain_local_queue(partially); 3087 } 3088 } 3089 } 3090 3091 // SATB Queue has several assumptions on whether to call the par or 3092 // non-par versions of the methods. this is why some of the code is 3093 // replicated. We should really get rid of the single-threaded version 3094 // of the code to simplify things. 3095 void CMTask::drain_satb_buffers() { 3096 if (has_aborted()) return; 3097 3098 // We set this so that the regular clock knows that we're in the 3099 // middle of draining buffers and doesn't set the abort flag when it 3100 // notices that SATB buffers are available for draining. It'd be 3101 // very counter productive if it did that. :-) 3102 _draining_satb_buffers = true; 3103 3104 CMSATBBufferClosure satb_cl(this, _g1h); 3105 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3106 3107 // This keeps claiming and applying the closure to completed buffers 3108 // until we run out of buffers or we need to abort. 3109 while (!has_aborted() && 3110 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3111 regular_clock_call(); 3112 } 3113 3114 _draining_satb_buffers = false; 3115 3116 assert(has_aborted() || 3117 concurrent() || 3118 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3119 3120 // again, this was a potentially expensive operation, decrease the 3121 // limits to get the regular clock call early 3122 decrease_limits(); 3123 } 3124 3125 void CMTask::print_stats() { 3126 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 3127 _worker_id, _calls); 3128 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3129 _elapsed_time_ms, _termination_time_ms); 3130 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3131 _step_times_ms.num(), _step_times_ms.avg(), 3132 _step_times_ms.sd()); 3133 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 3134 _step_times_ms.maximum(), _step_times_ms.sum()); 3135 } 3136 3137 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3138 return _task_queues->steal(worker_id, hash_seed, obj); 3139 } 3140 3141 /***************************************************************************** 3142 3143 The do_marking_step(time_target_ms, ...) method is the building 3144 block of the parallel marking framework. It can be called in parallel 3145 with other invocations of do_marking_step() on different tasks 3146 (but only one per task, obviously) and concurrently with the 3147 mutator threads, or during remark, hence it eliminates the need 3148 for two versions of the code. When called during remark, it will 3149 pick up from where the task left off during the concurrent marking 3150 phase. Interestingly, tasks are also claimable during evacuation 3151 pauses too, since do_marking_step() ensures that it aborts before 3152 it needs to yield. 3153 3154 The data structures that it uses to do marking work are the 3155 following: 3156 3157 (1) Marking Bitmap. If there are gray objects that appear only 3158 on the bitmap (this happens either when dealing with an overflow 3159 or when the initial marking phase has simply marked the roots 3160 and didn't push them on the stack), then tasks claim heap 3161 regions whose bitmap they then scan to find gray objects. A 3162 global finger indicates where the end of the last claimed region 3163 is. A local finger indicates how far into the region a task has 3164 scanned. The two fingers are used to determine how to gray an 3165 object (i.e. whether simply marking it is OK, as it will be 3166 visited by a task in the future, or whether it needs to be also 3167 pushed on a stack). 3168 3169 (2) Local Queue. The local queue of the task which is accessed 3170 reasonably efficiently by the task. Other tasks can steal from 3171 it when they run out of work. Throughout the marking phase, a 3172 task attempts to keep its local queue short but not totally 3173 empty, so that entries are available for stealing by other 3174 tasks. Only when there is no more work, a task will totally 3175 drain its local queue. 3176 3177 (3) Global Mark Stack. This handles local queue overflow. During 3178 marking only sets of entries are moved between it and the local 3179 queues, as access to it requires a mutex and more fine-grain 3180 interaction with it which might cause contention. If it 3181 overflows, then the marking phase should restart and iterate 3182 over the bitmap to identify gray objects. Throughout the marking 3183 phase, tasks attempt to keep the global mark stack at a small 3184 length but not totally empty, so that entries are available for 3185 popping by other tasks. Only when there is no more work, tasks 3186 will totally drain the global mark stack. 3187 3188 (4) SATB Buffer Queue. This is where completed SATB buffers are 3189 made available. Buffers are regularly removed from this queue 3190 and scanned for roots, so that the queue doesn't get too 3191 long. During remark, all completed buffers are processed, as 3192 well as the filled in parts of any uncompleted buffers. 3193 3194 The do_marking_step() method tries to abort when the time target 3195 has been reached. There are a few other cases when the 3196 do_marking_step() method also aborts: 3197 3198 (1) When the marking phase has been aborted (after a Full GC). 3199 3200 (2) When a global overflow (on the global stack) has been 3201 triggered. Before the task aborts, it will actually sync up with 3202 the other tasks to ensure that all the marking data structures 3203 (local queues, stacks, fingers etc.) are re-initialized so that 3204 when do_marking_step() completes, the marking phase can 3205 immediately restart. 3206 3207 (3) When enough completed SATB buffers are available. The 3208 do_marking_step() method only tries to drain SATB buffers right 3209 at the beginning. So, if enough buffers are available, the 3210 marking step aborts and the SATB buffers are processed at 3211 the beginning of the next invocation. 3212 3213 (4) To yield. when we have to yield then we abort and yield 3214 right at the end of do_marking_step(). This saves us from a lot 3215 of hassle as, by yielding we might allow a Full GC. If this 3216 happens then objects will be compacted underneath our feet, the 3217 heap might shrink, etc. We save checking for this by just 3218 aborting and doing the yield right at the end. 3219 3220 From the above it follows that the do_marking_step() method should 3221 be called in a loop (or, otherwise, regularly) until it completes. 3222 3223 If a marking step completes without its has_aborted() flag being 3224 true, it means it has completed the current marking phase (and 3225 also all other marking tasks have done so and have all synced up). 3226 3227 A method called regular_clock_call() is invoked "regularly" (in 3228 sub ms intervals) throughout marking. It is this clock method that 3229 checks all the abort conditions which were mentioned above and 3230 decides when the task should abort. A work-based scheme is used to 3231 trigger this clock method: when the number of object words the 3232 marking phase has scanned or the number of references the marking 3233 phase has visited reach a given limit. Additional invocations to 3234 the method clock have been planted in a few other strategic places 3235 too. The initial reason for the clock method was to avoid calling 3236 vtime too regularly, as it is quite expensive. So, once it was in 3237 place, it was natural to piggy-back all the other conditions on it 3238 too and not constantly check them throughout the code. 3239 3240 If do_termination is true then do_marking_step will enter its 3241 termination protocol. 3242 3243 The value of is_serial must be true when do_marking_step is being 3244 called serially (i.e. by the VMThread) and do_marking_step should 3245 skip any synchronization in the termination and overflow code. 3246 Examples include the serial remark code and the serial reference 3247 processing closures. 3248 3249 The value of is_serial must be false when do_marking_step is 3250 being called by any of the worker threads in a work gang. 3251 Examples include the concurrent marking code (CMMarkingTask), 3252 the MT remark code, and the MT reference processing closures. 3253 3254 *****************************************************************************/ 3255 3256 void CMTask::do_marking_step(double time_target_ms, 3257 bool do_termination, 3258 bool is_serial) { 3259 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3260 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3261 3262 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3263 assert(_task_queues != NULL, "invariant"); 3264 assert(_task_queue != NULL, "invariant"); 3265 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3266 3267 assert(!_claimed, 3268 "only one thread should claim this task at any one time"); 3269 3270 // OK, this doesn't safeguard again all possible scenarios, as it is 3271 // possible for two threads to set the _claimed flag at the same 3272 // time. But it is only for debugging purposes anyway and it will 3273 // catch most problems. 3274 _claimed = true; 3275 3276 _start_time_ms = os::elapsedVTime() * 1000.0; 3277 3278 // If do_stealing is true then do_marking_step will attempt to 3279 // steal work from the other CMTasks. It only makes sense to 3280 // enable stealing when the termination protocol is enabled 3281 // and do_marking_step() is not being called serially. 3282 bool do_stealing = do_termination && !is_serial; 3283 3284 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3285 _time_target_ms = time_target_ms - diff_prediction_ms; 3286 3287 // set up the variables that are used in the work-based scheme to 3288 // call the regular clock method 3289 _words_scanned = 0; 3290 _refs_reached = 0; 3291 recalculate_limits(); 3292 3293 // clear all flags 3294 clear_has_aborted(); 3295 _has_timed_out = false; 3296 _draining_satb_buffers = false; 3297 3298 ++_calls; 3299 3300 // Set up the bitmap and oop closures. Anything that uses them is 3301 // eventually called from this method, so it is OK to allocate these 3302 // statically. 3303 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3304 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3305 set_cm_oop_closure(&cm_oop_closure); 3306 3307 if (_cm->has_overflown()) { 3308 // This can happen if the mark stack overflows during a GC pause 3309 // and this task, after a yield point, restarts. We have to abort 3310 // as we need to get into the overflow protocol which happens 3311 // right at the end of this task. 3312 set_has_aborted(); 3313 } 3314 3315 // First drain any available SATB buffers. After this, we will not 3316 // look at SATB buffers before the next invocation of this method. 3317 // If enough completed SATB buffers are queued up, the regular clock 3318 // will abort this task so that it restarts. 3319 drain_satb_buffers(); 3320 // ...then partially drain the local queue and the global stack 3321 drain_local_queue(true); 3322 drain_global_stack(true); 3323 3324 do { 3325 if (!has_aborted() && _curr_region != NULL) { 3326 // This means that we're already holding on to a region. 3327 assert(_finger != NULL, "if region is not NULL, then the finger " 3328 "should not be NULL either"); 3329 3330 // We might have restarted this task after an evacuation pause 3331 // which might have evacuated the region we're holding on to 3332 // underneath our feet. Let's read its limit again to make sure 3333 // that we do not iterate over a region of the heap that 3334 // contains garbage (update_region_limit() will also move 3335 // _finger to the start of the region if it is found empty). 3336 update_region_limit(); 3337 // We will start from _finger not from the start of the region, 3338 // as we might be restarting this task after aborting half-way 3339 // through scanning this region. In this case, _finger points to 3340 // the address where we last found a marked object. If this is a 3341 // fresh region, _finger points to start(). 3342 MemRegion mr = MemRegion(_finger, _region_limit); 3343 3344 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3345 "humongous regions should go around loop once only"); 3346 3347 // Some special cases: 3348 // If the memory region is empty, we can just give up the region. 3349 // If the current region is humongous then we only need to check 3350 // the bitmap for the bit associated with the start of the object, 3351 // scan the object if it's live, and give up the region. 3352 // Otherwise, let's iterate over the bitmap of the part of the region 3353 // that is left. 3354 // If the iteration is successful, give up the region. 3355 if (mr.is_empty()) { 3356 giveup_current_region(); 3357 regular_clock_call(); 3358 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3359 if (_nextMarkBitMap->isMarked(mr.start())) { 3360 // The object is marked - apply the closure 3361 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3362 bitmap_closure.do_bit(offset); 3363 } 3364 // Even if this task aborted while scanning the humongous object 3365 // we can (and should) give up the current region. 3366 giveup_current_region(); 3367 regular_clock_call(); 3368 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3369 giveup_current_region(); 3370 regular_clock_call(); 3371 } else { 3372 assert(has_aborted(), "currently the only way to do so"); 3373 // The only way to abort the bitmap iteration is to return 3374 // false from the do_bit() method. However, inside the 3375 // do_bit() method we move the _finger to point to the 3376 // object currently being looked at. So, if we bail out, we 3377 // have definitely set _finger to something non-null. 3378 assert(_finger != NULL, "invariant"); 3379 3380 // Region iteration was actually aborted. So now _finger 3381 // points to the address of the object we last scanned. If we 3382 // leave it there, when we restart this task, we will rescan 3383 // the object. It is easy to avoid this. We move the finger by 3384 // enough to point to the next possible object header (the 3385 // bitmap knows by how much we need to move it as it knows its 3386 // granularity). 3387 assert(_finger < _region_limit, "invariant"); 3388 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3389 // Check if bitmap iteration was aborted while scanning the last object 3390 if (new_finger >= _region_limit) { 3391 giveup_current_region(); 3392 } else { 3393 move_finger_to(new_finger); 3394 } 3395 } 3396 } 3397 // At this point we have either completed iterating over the 3398 // region we were holding on to, or we have aborted. 3399 3400 // We then partially drain the local queue and the global stack. 3401 // (Do we really need this?) 3402 drain_local_queue(true); 3403 drain_global_stack(true); 3404 3405 // Read the note on the claim_region() method on why it might 3406 // return NULL with potentially more regions available for 3407 // claiming and why we have to check out_of_regions() to determine 3408 // whether we're done or not. 3409 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3410 // We are going to try to claim a new region. We should have 3411 // given up on the previous one. 3412 // Separated the asserts so that we know which one fires. 3413 assert(_curr_region == NULL, "invariant"); 3414 assert(_finger == NULL, "invariant"); 3415 assert(_region_limit == NULL, "invariant"); 3416 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3417 if (claimed_region != NULL) { 3418 // Yes, we managed to claim one 3419 setup_for_region(claimed_region); 3420 assert(_curr_region == claimed_region, "invariant"); 3421 } 3422 // It is important to call the regular clock here. It might take 3423 // a while to claim a region if, for example, we hit a large 3424 // block of empty regions. So we need to call the regular clock 3425 // method once round the loop to make sure it's called 3426 // frequently enough. 3427 regular_clock_call(); 3428 } 3429 3430 if (!has_aborted() && _curr_region == NULL) { 3431 assert(_cm->out_of_regions(), 3432 "at this point we should be out of regions"); 3433 } 3434 } while ( _curr_region != NULL && !has_aborted()); 3435 3436 if (!has_aborted()) { 3437 // We cannot check whether the global stack is empty, since other 3438 // tasks might be pushing objects to it concurrently. 3439 assert(_cm->out_of_regions(), 3440 "at this point we should be out of regions"); 3441 // Try to reduce the number of available SATB buffers so that 3442 // remark has less work to do. 3443 drain_satb_buffers(); 3444 } 3445 3446 // Since we've done everything else, we can now totally drain the 3447 // local queue and global stack. 3448 drain_local_queue(false); 3449 drain_global_stack(false); 3450 3451 // Attempt at work stealing from other task's queues. 3452 if (do_stealing && !has_aborted()) { 3453 // We have not aborted. This means that we have finished all that 3454 // we could. Let's try to do some stealing... 3455 3456 // We cannot check whether the global stack is empty, since other 3457 // tasks might be pushing objects to it concurrently. 3458 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3459 "only way to reach here"); 3460 while (!has_aborted()) { 3461 oop obj; 3462 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3463 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3464 "any stolen object should be marked"); 3465 scan_object(obj); 3466 3467 // And since we're towards the end, let's totally drain the 3468 // local queue and global stack. 3469 drain_local_queue(false); 3470 drain_global_stack(false); 3471 } else { 3472 break; 3473 } 3474 } 3475 } 3476 3477 // If we are about to wrap up and go into termination, check if we 3478 // should raise the overflow flag. 3479 if (do_termination && !has_aborted()) { 3480 if (_cm->force_overflow()->should_force()) { 3481 _cm->set_has_overflown(); 3482 regular_clock_call(); 3483 } 3484 } 3485 3486 // We still haven't aborted. Now, let's try to get into the 3487 // termination protocol. 3488 if (do_termination && !has_aborted()) { 3489 // We cannot check whether the global stack is empty, since other 3490 // tasks might be concurrently pushing objects on it. 3491 // Separated the asserts so that we know which one fires. 3492 assert(_cm->out_of_regions(), "only way to reach here"); 3493 assert(_task_queue->size() == 0, "only way to reach here"); 3494 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3495 3496 // The CMTask class also extends the TerminatorTerminator class, 3497 // hence its should_exit_termination() method will also decide 3498 // whether to exit the termination protocol or not. 3499 bool finished = (is_serial || 3500 _cm->terminator()->offer_termination(this)); 3501 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3502 _termination_time_ms += 3503 termination_end_time_ms - _termination_start_time_ms; 3504 3505 if (finished) { 3506 // We're all done. 3507 3508 if (_worker_id == 0) { 3509 // let's allow task 0 to do this 3510 if (concurrent()) { 3511 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3512 // we need to set this to false before the next 3513 // safepoint. This way we ensure that the marking phase 3514 // doesn't observe any more heap expansions. 3515 _cm->clear_concurrent_marking_in_progress(); 3516 } 3517 } 3518 3519 // We can now guarantee that the global stack is empty, since 3520 // all other tasks have finished. We separated the guarantees so 3521 // that, if a condition is false, we can immediately find out 3522 // which one. 3523 guarantee(_cm->out_of_regions(), "only way to reach here"); 3524 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3525 guarantee(_task_queue->size() == 0, "only way to reach here"); 3526 guarantee(!_cm->has_overflown(), "only way to reach here"); 3527 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3528 } else { 3529 // Apparently there's more work to do. Let's abort this task. It 3530 // will restart it and we can hopefully find more things to do. 3531 set_has_aborted(); 3532 } 3533 } 3534 3535 // Mainly for debugging purposes to make sure that a pointer to the 3536 // closure which was statically allocated in this frame doesn't 3537 // escape it by accident. 3538 set_cm_oop_closure(NULL); 3539 double end_time_ms = os::elapsedVTime() * 1000.0; 3540 double elapsed_time_ms = end_time_ms - _start_time_ms; 3541 // Update the step history. 3542 _step_times_ms.add(elapsed_time_ms); 3543 3544 if (has_aborted()) { 3545 // The task was aborted for some reason. 3546 if (_has_timed_out) { 3547 double diff_ms = elapsed_time_ms - _time_target_ms; 3548 // Keep statistics of how well we did with respect to hitting 3549 // our target only if we actually timed out (if we aborted for 3550 // other reasons, then the results might get skewed). 3551 _marking_step_diffs_ms.add(diff_ms); 3552 } 3553 3554 if (_cm->has_overflown()) { 3555 // This is the interesting one. We aborted because a global 3556 // overflow was raised. This means we have to restart the 3557 // marking phase and start iterating over regions. However, in 3558 // order to do this we have to make sure that all tasks stop 3559 // what they are doing and re-initialize in a safe manner. We 3560 // will achieve this with the use of two barrier sync points. 3561 3562 if (!is_serial) { 3563 // We only need to enter the sync barrier if being called 3564 // from a parallel context 3565 _cm->enter_first_sync_barrier(_worker_id); 3566 3567 // When we exit this sync barrier we know that all tasks have 3568 // stopped doing marking work. So, it's now safe to 3569 // re-initialize our data structures. At the end of this method, 3570 // task 0 will clear the global data structures. 3571 } 3572 3573 // We clear the local state of this task... 3574 clear_region_fields(); 3575 3576 if (!is_serial) { 3577 // ...and enter the second barrier. 3578 _cm->enter_second_sync_barrier(_worker_id); 3579 } 3580 // At this point, if we're during the concurrent phase of 3581 // marking, everything has been re-initialized and we're 3582 // ready to restart. 3583 } 3584 } 3585 3586 _claimed = false; 3587 } 3588 3589 CMTask::CMTask(uint worker_id, 3590 ConcurrentMark* cm, 3591 size_t* marked_bytes, 3592 BitMap* card_bm, 3593 CMTaskQueue* task_queue, 3594 CMTaskQueueSet* task_queues) 3595 : _g1h(G1CollectedHeap::heap()), 3596 _worker_id(worker_id), _cm(cm), 3597 _claimed(false), 3598 _nextMarkBitMap(NULL), _hash_seed(17), 3599 _task_queue(task_queue), 3600 _task_queues(task_queues), 3601 _cm_oop_closure(NULL), 3602 _marked_bytes_array(marked_bytes), 3603 _card_bm(card_bm) { 3604 guarantee(task_queue != NULL, "invariant"); 3605 guarantee(task_queues != NULL, "invariant"); 3606 3607 _marking_step_diffs_ms.add(0.5); 3608 } 3609 3610 // These are formatting macros that are used below to ensure 3611 // consistent formatting. The *_H_* versions are used to format the 3612 // header for a particular value and they should be kept consistent 3613 // with the corresponding macro. Also note that most of the macros add 3614 // the necessary white space (as a prefix) which makes them a bit 3615 // easier to compose. 3616 3617 // All the output lines are prefixed with this string to be able to 3618 // identify them easily in a large log file. 3619 #define G1PPRL_LINE_PREFIX "###" 3620 3621 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3622 #ifdef _LP64 3623 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3624 #else // _LP64 3625 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3626 #endif // _LP64 3627 3628 // For per-region info 3629 #define G1PPRL_TYPE_FORMAT " %-4s" 3630 #define G1PPRL_TYPE_H_FORMAT " %4s" 3631 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3632 #define G1PPRL_BYTE_H_FORMAT " %9s" 3633 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3634 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3635 3636 // For summary info 3637 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3638 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3639 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3640 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3641 3642 G1PrintRegionLivenessInfoClosure:: 3643 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3644 : _total_used_bytes(0), _total_capacity_bytes(0), 3645 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3646 _hum_used_bytes(0), _hum_capacity_bytes(0), 3647 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3648 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3649 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3650 MemRegion g1_reserved = g1h->g1_reserved(); 3651 double now = os::elapsedTime(); 3652 3653 // Print the header of the output. 3654 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3655 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3656 G1PPRL_SUM_ADDR_FORMAT("reserved") 3657 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3658 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3659 HeapRegion::GrainBytes); 3660 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3661 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3662 G1PPRL_TYPE_H_FORMAT 3663 G1PPRL_ADDR_BASE_H_FORMAT 3664 G1PPRL_BYTE_H_FORMAT 3665 G1PPRL_BYTE_H_FORMAT 3666 G1PPRL_BYTE_H_FORMAT 3667 G1PPRL_DOUBLE_H_FORMAT 3668 G1PPRL_BYTE_H_FORMAT 3669 G1PPRL_BYTE_H_FORMAT, 3670 "type", "address-range", 3671 "used", "prev-live", "next-live", "gc-eff", 3672 "remset", "code-roots"); 3673 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3674 G1PPRL_TYPE_H_FORMAT 3675 G1PPRL_ADDR_BASE_H_FORMAT 3676 G1PPRL_BYTE_H_FORMAT 3677 G1PPRL_BYTE_H_FORMAT 3678 G1PPRL_BYTE_H_FORMAT 3679 G1PPRL_DOUBLE_H_FORMAT 3680 G1PPRL_BYTE_H_FORMAT 3681 G1PPRL_BYTE_H_FORMAT, 3682 "", "", 3683 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3684 "(bytes)", "(bytes)"); 3685 } 3686 3687 // It takes as a parameter a reference to one of the _hum_* fields, it 3688 // deduces the corresponding value for a region in a humongous region 3689 // series (either the region size, or what's left if the _hum_* field 3690 // is < the region size), and updates the _hum_* field accordingly. 3691 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3692 size_t bytes = 0; 3693 // The > 0 check is to deal with the prev and next live bytes which 3694 // could be 0. 3695 if (*hum_bytes > 0) { 3696 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3697 *hum_bytes -= bytes; 3698 } 3699 return bytes; 3700 } 3701 3702 // It deduces the values for a region in a humongous region series 3703 // from the _hum_* fields and updates those accordingly. It assumes 3704 // that that _hum_* fields have already been set up from the "starts 3705 // humongous" region and we visit the regions in address order. 3706 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3707 size_t* capacity_bytes, 3708 size_t* prev_live_bytes, 3709 size_t* next_live_bytes) { 3710 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3711 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3712 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3713 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3714 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3715 } 3716 3717 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3718 const char* type = r->get_type_str(); 3719 HeapWord* bottom = r->bottom(); 3720 HeapWord* end = r->end(); 3721 size_t capacity_bytes = r->capacity(); 3722 size_t used_bytes = r->used(); 3723 size_t prev_live_bytes = r->live_bytes(); 3724 size_t next_live_bytes = r->next_live_bytes(); 3725 double gc_eff = r->gc_efficiency(); 3726 size_t remset_bytes = r->rem_set()->mem_size(); 3727 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3728 3729 if (r->is_starts_humongous()) { 3730 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3731 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3732 "they should have been zeroed after the last time we used them"); 3733 // Set up the _hum_* fields. 3734 _hum_capacity_bytes = capacity_bytes; 3735 _hum_used_bytes = used_bytes; 3736 _hum_prev_live_bytes = prev_live_bytes; 3737 _hum_next_live_bytes = next_live_bytes; 3738 get_hum_bytes(&used_bytes, &capacity_bytes, 3739 &prev_live_bytes, &next_live_bytes); 3740 end = bottom + HeapRegion::GrainWords; 3741 } else if (r->is_continues_humongous()) { 3742 get_hum_bytes(&used_bytes, &capacity_bytes, 3743 &prev_live_bytes, &next_live_bytes); 3744 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3745 } 3746 3747 _total_used_bytes += used_bytes; 3748 _total_capacity_bytes += capacity_bytes; 3749 _total_prev_live_bytes += prev_live_bytes; 3750 _total_next_live_bytes += next_live_bytes; 3751 _total_remset_bytes += remset_bytes; 3752 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3753 3754 // Print a line for this particular region. 3755 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3756 G1PPRL_TYPE_FORMAT 3757 G1PPRL_ADDR_BASE_FORMAT 3758 G1PPRL_BYTE_FORMAT 3759 G1PPRL_BYTE_FORMAT 3760 G1PPRL_BYTE_FORMAT 3761 G1PPRL_DOUBLE_FORMAT 3762 G1PPRL_BYTE_FORMAT 3763 G1PPRL_BYTE_FORMAT, 3764 type, p2i(bottom), p2i(end), 3765 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3766 remset_bytes, strong_code_roots_bytes); 3767 3768 return false; 3769 } 3770 3771 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3772 // add static memory usages to remembered set sizes 3773 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3774 // Print the footer of the output. 3775 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3776 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3777 " SUMMARY" 3778 G1PPRL_SUM_MB_FORMAT("capacity") 3779 G1PPRL_SUM_MB_PERC_FORMAT("used") 3780 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3781 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3782 G1PPRL_SUM_MB_FORMAT("remset") 3783 G1PPRL_SUM_MB_FORMAT("code-roots"), 3784 bytes_to_mb(_total_capacity_bytes), 3785 bytes_to_mb(_total_used_bytes), 3786 perc(_total_used_bytes, _total_capacity_bytes), 3787 bytes_to_mb(_total_prev_live_bytes), 3788 perc(_total_prev_live_bytes, _total_capacity_bytes), 3789 bytes_to_mb(_total_next_live_bytes), 3790 perc(_total_next_live_bytes, _total_capacity_bytes), 3791 bytes_to_mb(_total_remset_bytes), 3792 bytes_to_mb(_total_strong_code_roots_bytes)); 3793 }