1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 458 return MAX2((n_par_threads + 2) / 4, 1U); 459 } 460 461 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 462 _g1h(g1h), 463 _markBitMap1(), 464 _markBitMap2(), 465 _parallel_marking_threads(0), 466 _max_parallel_marking_threads(0), 467 _sleep_factor(0.0), 468 _marking_task_overhead(1.0), 469 _cleanup_sleep_factor(0.0), 470 _cleanup_task_overhead(1.0), 471 _cleanup_list("Cleanup List"), 472 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 473 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 474 CardTableModRefBS::card_shift, 475 false /* in_resource_area*/), 476 477 _prevMarkBitMap(&_markBitMap1), 478 _nextMarkBitMap(&_markBitMap2), 479 480 _markStack(this), 481 // _finger set in set_non_marking_state 482 483 _max_worker_id(ParallelGCThreads), 484 // _active_tasks set in set_non_marking_state 485 // _tasks set inside the constructor 486 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 487 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 488 489 _has_overflown(false), 490 _concurrent(false), 491 _has_aborted(false), 492 _restart_for_overflow(false), 493 _concurrent_marking_in_progress(false), 494 495 // _verbose_level set below 496 497 _init_times(), 498 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 499 _cleanup_times(), 500 _total_counting_time(0.0), 501 _total_rs_scrub_time(0.0), 502 503 _parallel_workers(NULL), 504 505 _count_card_bitmaps(NULL), 506 _count_marked_bytes(NULL), 507 _completed_initialization(false) { 508 509 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 510 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 511 512 // Create & start a ConcurrentMark thread. 513 _cmThread = new ConcurrentMarkThread(this); 514 assert(cmThread() != NULL, "CM Thread should have been created"); 515 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 516 if (_cmThread->osthread() == NULL) { 517 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 518 } 519 520 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 521 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 522 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 523 524 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 525 satb_qs.set_buffer_size(G1SATBBufferSize); 526 527 _root_regions.init(_g1h, this); 528 529 if (ConcGCThreads > ParallelGCThreads) { 530 warning("Can't have more ConcGCThreads (%u) " 531 "than ParallelGCThreads (%u).", 532 ConcGCThreads, ParallelGCThreads); 533 return; 534 } 535 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 536 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 537 // if both are set 538 _sleep_factor = 0.0; 539 _marking_task_overhead = 1.0; 540 } else if (G1MarkingOverheadPercent > 0) { 541 // We will calculate the number of parallel marking threads based 542 // on a target overhead with respect to the soft real-time goal 543 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 544 double overall_cm_overhead = 545 (double) MaxGCPauseMillis * marking_overhead / 546 (double) GCPauseIntervalMillis; 547 double cpu_ratio = 1.0 / (double) os::processor_count(); 548 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 549 double marking_task_overhead = 550 overall_cm_overhead / marking_thread_num * 551 (double) os::processor_count(); 552 double sleep_factor = 553 (1.0 - marking_task_overhead) / marking_task_overhead; 554 555 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 556 _sleep_factor = sleep_factor; 557 _marking_task_overhead = marking_task_overhead; 558 } else { 559 // Calculate the number of parallel marking threads by scaling 560 // the number of parallel GC threads. 561 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 562 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 563 _sleep_factor = 0.0; 564 _marking_task_overhead = 1.0; 565 } 566 567 assert(ConcGCThreads > 0, "Should have been set"); 568 _parallel_marking_threads = ConcGCThreads; 569 _max_parallel_marking_threads = _parallel_marking_threads; 570 571 if (parallel_marking_threads() > 1) { 572 _cleanup_task_overhead = 1.0; 573 } else { 574 _cleanup_task_overhead = marking_task_overhead(); 575 } 576 _cleanup_sleep_factor = 577 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 578 579 #if 0 580 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 581 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 582 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 583 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 584 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 585 #endif 586 587 _parallel_workers = new WorkGang("G1 Marker", 588 _max_parallel_marking_threads, false, true); 589 if (_parallel_workers == NULL) { 590 vm_exit_during_initialization("Failed necessary allocation."); 591 } else { 592 _parallel_workers->initialize_workers(); 593 } 594 595 if (FLAG_IS_DEFAULT(MarkStackSize)) { 596 size_t mark_stack_size = 597 MIN2(MarkStackSizeMax, 598 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 599 // Verify that the calculated value for MarkStackSize is in range. 600 // It would be nice to use the private utility routine from Arguments. 601 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 602 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 603 "must be between 1 and " SIZE_FORMAT, 604 mark_stack_size, MarkStackSizeMax); 605 return; 606 } 607 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 608 } else { 609 // Verify MarkStackSize is in range. 610 if (FLAG_IS_CMDLINE(MarkStackSize)) { 611 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 612 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 613 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 614 "must be between 1 and " SIZE_FORMAT, 615 MarkStackSize, MarkStackSizeMax); 616 return; 617 } 618 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 619 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 620 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 621 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 622 MarkStackSize, MarkStackSizeMax); 623 return; 624 } 625 } 626 } 627 } 628 629 if (!_markStack.allocate(MarkStackSize)) { 630 warning("Failed to allocate CM marking stack"); 631 return; 632 } 633 634 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 635 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 636 637 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 638 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 639 640 BitMap::idx_t card_bm_size = _card_bm.size(); 641 642 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 643 _active_tasks = _max_worker_id; 644 645 uint max_regions = _g1h->max_regions(); 646 for (uint i = 0; i < _max_worker_id; ++i) { 647 CMTaskQueue* task_queue = new CMTaskQueue(); 648 task_queue->initialize(); 649 _task_queues->register_queue(i, task_queue); 650 651 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 652 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 653 654 _tasks[i] = new CMTask(i, this, 655 _count_marked_bytes[i], 656 &_count_card_bitmaps[i], 657 task_queue, _task_queues); 658 659 _accum_task_vtime[i] = 0.0; 660 } 661 662 // Calculate the card number for the bottom of the heap. Used 663 // in biasing indexes into the accounting card bitmaps. 664 _heap_bottom_card_num = 665 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 666 CardTableModRefBS::card_shift); 667 668 // Clear all the liveness counting data 669 clear_all_count_data(); 670 671 // so that the call below can read a sensible value 672 _heap_start = g1h->reserved_region().start(); 673 set_non_marking_state(); 674 _completed_initialization = true; 675 } 676 677 void ConcurrentMark::reset() { 678 // Starting values for these two. This should be called in a STW 679 // phase. 680 MemRegion reserved = _g1h->g1_reserved(); 681 _heap_start = reserved.start(); 682 _heap_end = reserved.end(); 683 684 // Separated the asserts so that we know which one fires. 685 assert(_heap_start != NULL, "heap bounds should look ok"); 686 assert(_heap_end != NULL, "heap bounds should look ok"); 687 assert(_heap_start < _heap_end, "heap bounds should look ok"); 688 689 // Reset all the marking data structures and any necessary flags 690 reset_marking_state(); 691 692 // We do reset all of them, since different phases will use 693 // different number of active threads. So, it's easiest to have all 694 // of them ready. 695 for (uint i = 0; i < _max_worker_id; ++i) { 696 _tasks[i]->reset(_nextMarkBitMap); 697 } 698 699 // we need this to make sure that the flag is on during the evac 700 // pause with initial mark piggy-backed 701 set_concurrent_marking_in_progress(); 702 } 703 704 705 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 706 _markStack.set_should_expand(); 707 _markStack.setEmpty(); // Also clears the _markStack overflow flag 708 if (clear_overflow) { 709 clear_has_overflown(); 710 } else { 711 assert(has_overflown(), "pre-condition"); 712 } 713 _finger = _heap_start; 714 715 for (uint i = 0; i < _max_worker_id; ++i) { 716 CMTaskQueue* queue = _task_queues->queue(i); 717 queue->set_empty(); 718 } 719 } 720 721 void ConcurrentMark::set_concurrency(uint active_tasks) { 722 assert(active_tasks <= _max_worker_id, "we should not have more"); 723 724 _active_tasks = active_tasks; 725 // Need to update the three data structures below according to the 726 // number of active threads for this phase. 727 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 728 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 729 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 730 } 731 732 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 733 set_concurrency(active_tasks); 734 735 _concurrent = concurrent; 736 // We propagate this to all tasks, not just the active ones. 737 for (uint i = 0; i < _max_worker_id; ++i) 738 _tasks[i]->set_concurrent(concurrent); 739 740 if (concurrent) { 741 set_concurrent_marking_in_progress(); 742 } else { 743 // We currently assume that the concurrent flag has been set to 744 // false before we start remark. At this point we should also be 745 // in a STW phase. 746 assert(!concurrent_marking_in_progress(), "invariant"); 747 assert(out_of_regions(), 748 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 749 p2i(_finger), p2i(_heap_end)); 750 } 751 } 752 753 void ConcurrentMark::set_non_marking_state() { 754 // We set the global marking state to some default values when we're 755 // not doing marking. 756 reset_marking_state(); 757 _active_tasks = 0; 758 clear_concurrent_marking_in_progress(); 759 } 760 761 ConcurrentMark::~ConcurrentMark() { 762 // The ConcurrentMark instance is never freed. 763 ShouldNotReachHere(); 764 } 765 766 void ConcurrentMark::clearNextBitmap() { 767 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 768 769 // Make sure that the concurrent mark thread looks to still be in 770 // the current cycle. 771 guarantee(cmThread()->during_cycle(), "invariant"); 772 773 // We are finishing up the current cycle by clearing the next 774 // marking bitmap and getting it ready for the next cycle. During 775 // this time no other cycle can start. So, let's make sure that this 776 // is the case. 777 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 778 779 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 780 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 781 _parallel_workers->run_task(&task); 782 783 // Clear the liveness counting data. If the marking has been aborted, the abort() 784 // call already did that. 785 if (cl.complete()) { 786 clear_all_count_data(); 787 } 788 789 // Repeat the asserts from above. 790 guarantee(cmThread()->during_cycle(), "invariant"); 791 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 792 } 793 794 class CheckBitmapClearHRClosure : public HeapRegionClosure { 795 CMBitMap* _bitmap; 796 bool _error; 797 public: 798 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 799 } 800 801 virtual bool doHeapRegion(HeapRegion* r) { 802 // This closure can be called concurrently to the mutator, so we must make sure 803 // that the result of the getNextMarkedWordAddress() call is compared to the 804 // value passed to it as limit to detect any found bits. 805 // We can use the region's orig_end() for the limit and the comparison value 806 // as it always contains the "real" end of the region that never changes and 807 // has no side effects. 808 // Due to the latter, there can also be no problem with the compiler generating 809 // reloads of the orig_end() call. 810 HeapWord* end = r->orig_end(); 811 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 812 } 813 }; 814 815 bool ConcurrentMark::nextMarkBitmapIsClear() { 816 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 817 _g1h->heap_region_iterate(&cl); 818 return cl.complete(); 819 } 820 821 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 822 public: 823 bool doHeapRegion(HeapRegion* r) { 824 if (!r->is_continues_humongous()) { 825 r->note_start_of_marking(); 826 } 827 return false; 828 } 829 }; 830 831 void ConcurrentMark::checkpointRootsInitialPre() { 832 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 833 G1CollectorPolicy* g1p = g1h->g1_policy(); 834 835 _has_aborted = false; 836 837 // Initialize marking structures. This has to be done in a STW phase. 838 reset(); 839 840 // For each region note start of marking. 841 NoteStartOfMarkHRClosure startcl; 842 g1h->heap_region_iterate(&startcl); 843 } 844 845 846 void ConcurrentMark::checkpointRootsInitialPost() { 847 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 848 849 // If we force an overflow during remark, the remark operation will 850 // actually abort and we'll restart concurrent marking. If we always 851 // force an overflow during remark we'll never actually complete the 852 // marking phase. So, we initialize this here, at the start of the 853 // cycle, so that at the remaining overflow number will decrease at 854 // every remark and we'll eventually not need to cause one. 855 force_overflow_stw()->init(); 856 857 // Start Concurrent Marking weak-reference discovery. 858 ReferenceProcessor* rp = g1h->ref_processor_cm(); 859 // enable ("weak") refs discovery 860 rp->enable_discovery(); 861 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 862 863 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 864 // This is the start of the marking cycle, we're expected all 865 // threads to have SATB queues with active set to false. 866 satb_mq_set.set_active_all_threads(true, /* new active value */ 867 false /* expected_active */); 868 869 _root_regions.prepare_for_scan(); 870 871 // update_g1_committed() will be called at the end of an evac pause 872 // when marking is on. So, it's also called at the end of the 873 // initial-mark pause to update the heap end, if the heap expands 874 // during it. No need to call it here. 875 } 876 877 /* 878 * Notice that in the next two methods, we actually leave the STS 879 * during the barrier sync and join it immediately afterwards. If we 880 * do not do this, the following deadlock can occur: one thread could 881 * be in the barrier sync code, waiting for the other thread to also 882 * sync up, whereas another one could be trying to yield, while also 883 * waiting for the other threads to sync up too. 884 * 885 * Note, however, that this code is also used during remark and in 886 * this case we should not attempt to leave / enter the STS, otherwise 887 * we'll either hit an assert (debug / fastdebug) or deadlock 888 * (product). So we should only leave / enter the STS if we are 889 * operating concurrently. 890 * 891 * Because the thread that does the sync barrier has left the STS, it 892 * is possible to be suspended for a Full GC or an evacuation pause 893 * could occur. This is actually safe, since the entering the sync 894 * barrier is one of the last things do_marking_step() does, and it 895 * doesn't manipulate any data structures afterwards. 896 */ 897 898 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 899 bool barrier_aborted; 900 { 901 SuspendibleThreadSetLeaver sts_leave(concurrent()); 902 barrier_aborted = !_first_overflow_barrier_sync.enter(); 903 } 904 905 // at this point everyone should have synced up and not be doing any 906 // more work 907 908 if (barrier_aborted) { 909 // If the barrier aborted we ignore the overflow condition and 910 // just abort the whole marking phase as quickly as possible. 911 return; 912 } 913 914 // If we're executing the concurrent phase of marking, reset the marking 915 // state; otherwise the marking state is reset after reference processing, 916 // during the remark pause. 917 // If we reset here as a result of an overflow during the remark we will 918 // see assertion failures from any subsequent set_concurrency_and_phase() 919 // calls. 920 if (concurrent()) { 921 // let the task associated with with worker 0 do this 922 if (worker_id == 0) { 923 // task 0 is responsible for clearing the global data structures 924 // We should be here because of an overflow. During STW we should 925 // not clear the overflow flag since we rely on it being true when 926 // we exit this method to abort the pause and restart concurrent 927 // marking. 928 reset_marking_state(true /* clear_overflow */); 929 force_overflow()->update(); 930 931 if (G1Log::fine()) { 932 gclog_or_tty->gclog_stamp(); 933 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 934 } 935 } 936 } 937 938 // after this, each task should reset its own data structures then 939 // then go into the second barrier 940 } 941 942 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 943 SuspendibleThreadSetLeaver sts_leave(concurrent()); 944 _second_overflow_barrier_sync.enter(); 945 946 // at this point everything should be re-initialized and ready to go 947 } 948 949 #ifndef PRODUCT 950 void ForceOverflowSettings::init() { 951 _num_remaining = G1ConcMarkForceOverflow; 952 _force = false; 953 update(); 954 } 955 956 void ForceOverflowSettings::update() { 957 if (_num_remaining > 0) { 958 _num_remaining -= 1; 959 _force = true; 960 } else { 961 _force = false; 962 } 963 } 964 965 bool ForceOverflowSettings::should_force() { 966 if (_force) { 967 _force = false; 968 return true; 969 } else { 970 return false; 971 } 972 } 973 #endif // !PRODUCT 974 975 class CMConcurrentMarkingTask: public AbstractGangTask { 976 private: 977 ConcurrentMark* _cm; 978 ConcurrentMarkThread* _cmt; 979 980 public: 981 void work(uint worker_id) { 982 assert(Thread::current()->is_ConcurrentGC_thread(), 983 "this should only be done by a conc GC thread"); 984 ResourceMark rm; 985 986 double start_vtime = os::elapsedVTime(); 987 988 { 989 SuspendibleThreadSetJoiner sts_join; 990 991 assert(worker_id < _cm->active_tasks(), "invariant"); 992 CMTask* the_task = _cm->task(worker_id); 993 the_task->record_start_time(); 994 if (!_cm->has_aborted()) { 995 do { 996 double start_vtime_sec = os::elapsedVTime(); 997 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 998 999 the_task->do_marking_step(mark_step_duration_ms, 1000 true /* do_termination */, 1001 false /* is_serial*/); 1002 1003 double end_vtime_sec = os::elapsedVTime(); 1004 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1005 _cm->clear_has_overflown(); 1006 1007 _cm->do_yield_check(worker_id); 1008 1009 jlong sleep_time_ms; 1010 if (!_cm->has_aborted() && the_task->has_aborted()) { 1011 sleep_time_ms = 1012 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1013 { 1014 SuspendibleThreadSetLeaver sts_leave; 1015 os::sleep(Thread::current(), sleep_time_ms, false); 1016 } 1017 } 1018 } while (!_cm->has_aborted() && the_task->has_aborted()); 1019 } 1020 the_task->record_end_time(); 1021 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1022 } 1023 1024 double end_vtime = os::elapsedVTime(); 1025 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1026 } 1027 1028 CMConcurrentMarkingTask(ConcurrentMark* cm, 1029 ConcurrentMarkThread* cmt) : 1030 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1031 1032 ~CMConcurrentMarkingTask() { } 1033 }; 1034 1035 // Calculates the number of active workers for a concurrent 1036 // phase. 1037 uint ConcurrentMark::calc_parallel_marking_threads() { 1038 uint n_conc_workers = 0; 1039 if (!UseDynamicNumberOfGCThreads || 1040 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1041 !ForceDynamicNumberOfGCThreads)) { 1042 n_conc_workers = max_parallel_marking_threads(); 1043 } else { 1044 n_conc_workers = 1045 AdaptiveSizePolicy::calc_default_active_workers( 1046 max_parallel_marking_threads(), 1047 1, /* Minimum workers */ 1048 parallel_marking_threads(), 1049 Threads::number_of_non_daemon_threads()); 1050 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1051 // that scaling has already gone into "_max_parallel_marking_threads". 1052 } 1053 assert(n_conc_workers > 0, "Always need at least 1"); 1054 return n_conc_workers; 1055 } 1056 1057 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1058 // Currently, only survivors can be root regions. 1059 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1060 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1061 1062 const uintx interval = PrefetchScanIntervalInBytes; 1063 HeapWord* curr = hr->bottom(); 1064 const HeapWord* end = hr->top(); 1065 while (curr < end) { 1066 Prefetch::read(curr, interval); 1067 oop obj = oop(curr); 1068 int size = obj->oop_iterate_size(&cl); 1069 assert(size == obj->size(), "sanity"); 1070 curr += size; 1071 } 1072 } 1073 1074 class CMRootRegionScanTask : public AbstractGangTask { 1075 private: 1076 ConcurrentMark* _cm; 1077 1078 public: 1079 CMRootRegionScanTask(ConcurrentMark* cm) : 1080 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1081 1082 void work(uint worker_id) { 1083 assert(Thread::current()->is_ConcurrentGC_thread(), 1084 "this should only be done by a conc GC thread"); 1085 1086 CMRootRegions* root_regions = _cm->root_regions(); 1087 HeapRegion* hr = root_regions->claim_next(); 1088 while (hr != NULL) { 1089 _cm->scanRootRegion(hr, worker_id); 1090 hr = root_regions->claim_next(); 1091 } 1092 } 1093 }; 1094 1095 void ConcurrentMark::scanRootRegions() { 1096 double scan_start = os::elapsedTime(); 1097 1098 // Start of concurrent marking. 1099 ClassLoaderDataGraph::clear_claimed_marks(); 1100 1101 // scan_in_progress() will have been set to true only if there was 1102 // at least one root region to scan. So, if it's false, we 1103 // should not attempt to do any further work. 1104 if (root_regions()->scan_in_progress()) { 1105 if (G1Log::fine()) { 1106 gclog_or_tty->gclog_stamp(); 1107 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1108 } 1109 1110 _parallel_marking_threads = calc_parallel_marking_threads(); 1111 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1112 "Maximum number of marking threads exceeded"); 1113 uint active_workers = MAX2(1U, parallel_marking_threads()); 1114 1115 CMRootRegionScanTask task(this); 1116 _parallel_workers->set_active_workers(active_workers); 1117 _parallel_workers->run_task(&task); 1118 1119 if (G1Log::fine()) { 1120 gclog_or_tty->gclog_stamp(); 1121 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1122 } 1123 1124 // It's possible that has_aborted() is true here without actually 1125 // aborting the survivor scan earlier. This is OK as it's 1126 // mainly used for sanity checking. 1127 root_regions()->scan_finished(); 1128 } 1129 } 1130 1131 void ConcurrentMark::markFromRoots() { 1132 // we might be tempted to assert that: 1133 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1134 // "inconsistent argument?"); 1135 // However that wouldn't be right, because it's possible that 1136 // a safepoint is indeed in progress as a younger generation 1137 // stop-the-world GC happens even as we mark in this generation. 1138 1139 _restart_for_overflow = false; 1140 force_overflow_conc()->init(); 1141 1142 // _g1h has _n_par_threads 1143 _parallel_marking_threads = calc_parallel_marking_threads(); 1144 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1145 "Maximum number of marking threads exceeded"); 1146 1147 uint active_workers = MAX2(1U, parallel_marking_threads()); 1148 assert(active_workers > 0, "Should have been set"); 1149 1150 // Parallel task terminator is set in "set_concurrency_and_phase()" 1151 set_concurrency_and_phase(active_workers, true /* concurrent */); 1152 1153 CMConcurrentMarkingTask markingTask(this, cmThread()); 1154 _parallel_workers->set_active_workers(active_workers); 1155 _parallel_workers->run_task(&markingTask); 1156 print_stats(); 1157 } 1158 1159 // Helper class to get rid of some boilerplate code. 1160 class G1CMTraceTime : public StackObj { 1161 GCTraceTimeImpl _gc_trace_time; 1162 static bool doit_and_prepend(bool doit) { 1163 if (doit) { 1164 gclog_or_tty->put(' '); 1165 } 1166 return doit; 1167 } 1168 1169 public: 1170 G1CMTraceTime(const char* title, bool doit) 1171 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1172 } 1173 }; 1174 1175 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1176 // world is stopped at this checkpoint 1177 assert(SafepointSynchronize::is_at_safepoint(), 1178 "world should be stopped"); 1179 1180 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1181 1182 // If a full collection has happened, we shouldn't do this. 1183 if (has_aborted()) { 1184 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1185 return; 1186 } 1187 1188 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1189 1190 if (VerifyDuringGC) { 1191 HandleMark hm; // handle scope 1192 g1h->prepare_for_verify(); 1193 Universe::verify(VerifyOption_G1UsePrevMarking, 1194 " VerifyDuringGC:(before)"); 1195 } 1196 g1h->check_bitmaps("Remark Start"); 1197 1198 G1CollectorPolicy* g1p = g1h->g1_policy(); 1199 g1p->record_concurrent_mark_remark_start(); 1200 1201 double start = os::elapsedTime(); 1202 1203 checkpointRootsFinalWork(); 1204 1205 double mark_work_end = os::elapsedTime(); 1206 1207 weakRefsWork(clear_all_soft_refs); 1208 1209 if (has_overflown()) { 1210 // Oops. We overflowed. Restart concurrent marking. 1211 _restart_for_overflow = true; 1212 if (G1TraceMarkStackOverflow) { 1213 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1214 } 1215 1216 // Verify the heap w.r.t. the previous marking bitmap. 1217 if (VerifyDuringGC) { 1218 HandleMark hm; // handle scope 1219 g1h->prepare_for_verify(); 1220 Universe::verify(VerifyOption_G1UsePrevMarking, 1221 " VerifyDuringGC:(overflow)"); 1222 } 1223 1224 // Clear the marking state because we will be restarting 1225 // marking due to overflowing the global mark stack. 1226 reset_marking_state(); 1227 } else { 1228 { 1229 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1230 1231 // Aggregate the per-task counting data that we have accumulated 1232 // while marking. 1233 aggregate_count_data(); 1234 } 1235 1236 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1237 // We're done with marking. 1238 // This is the end of the marking cycle, we're expected all 1239 // threads to have SATB queues with active set to true. 1240 satb_mq_set.set_active_all_threads(false, /* new active value */ 1241 true /* expected_active */); 1242 1243 if (VerifyDuringGC) { 1244 HandleMark hm; // handle scope 1245 g1h->prepare_for_verify(); 1246 Universe::verify(VerifyOption_G1UseNextMarking, 1247 " VerifyDuringGC:(after)"); 1248 } 1249 g1h->check_bitmaps("Remark End"); 1250 assert(!restart_for_overflow(), "sanity"); 1251 // Completely reset the marking state since marking completed 1252 set_non_marking_state(); 1253 } 1254 1255 // Expand the marking stack, if we have to and if we can. 1256 if (_markStack.should_expand()) { 1257 _markStack.expand(); 1258 } 1259 1260 // Statistics 1261 double now = os::elapsedTime(); 1262 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1263 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1264 _remark_times.add((now - start) * 1000.0); 1265 1266 g1p->record_concurrent_mark_remark_end(); 1267 1268 G1CMIsAliveClosure is_alive(g1h); 1269 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1270 } 1271 1272 // Base class of the closures that finalize and verify the 1273 // liveness counting data. 1274 class CMCountDataClosureBase: public HeapRegionClosure { 1275 protected: 1276 G1CollectedHeap* _g1h; 1277 ConcurrentMark* _cm; 1278 CardTableModRefBS* _ct_bs; 1279 1280 BitMap* _region_bm; 1281 BitMap* _card_bm; 1282 1283 // Takes a region that's not empty (i.e., it has at least one 1284 // live object in it and sets its corresponding bit on the region 1285 // bitmap to 1. If the region is "starts humongous" it will also set 1286 // to 1 the bits on the region bitmap that correspond to its 1287 // associated "continues humongous" regions. 1288 void set_bit_for_region(HeapRegion* hr) { 1289 assert(!hr->is_continues_humongous(), "should have filtered those out"); 1290 1291 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1292 if (!hr->is_starts_humongous()) { 1293 // Normal (non-humongous) case: just set the bit. 1294 _region_bm->par_at_put(index, true); 1295 } else { 1296 // Starts humongous case: calculate how many regions are part of 1297 // this humongous region and then set the bit range. 1298 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1299 _region_bm->par_at_put_range(index, end_index, true); 1300 } 1301 } 1302 1303 public: 1304 CMCountDataClosureBase(G1CollectedHeap* g1h, 1305 BitMap* region_bm, BitMap* card_bm): 1306 _g1h(g1h), _cm(g1h->concurrent_mark()), 1307 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1308 _region_bm(region_bm), _card_bm(card_bm) { } 1309 }; 1310 1311 // Closure that calculates the # live objects per region. Used 1312 // for verification purposes during the cleanup pause. 1313 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1314 CMBitMapRO* _bm; 1315 size_t _region_marked_bytes; 1316 1317 public: 1318 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1319 BitMap* region_bm, BitMap* card_bm) : 1320 CMCountDataClosureBase(g1h, region_bm, card_bm), 1321 _bm(bm), _region_marked_bytes(0) { } 1322 1323 bool doHeapRegion(HeapRegion* hr) { 1324 1325 if (hr->is_continues_humongous()) { 1326 // We will ignore these here and process them when their 1327 // associated "starts humongous" region is processed (see 1328 // set_bit_for_heap_region()). Note that we cannot rely on their 1329 // associated "starts humongous" region to have their bit set to 1330 // 1 since, due to the region chunking in the parallel region 1331 // iteration, a "continues humongous" region might be visited 1332 // before its associated "starts humongous". 1333 return false; 1334 } 1335 1336 HeapWord* ntams = hr->next_top_at_mark_start(); 1337 HeapWord* start = hr->bottom(); 1338 1339 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1340 "Preconditions not met - " 1341 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1342 p2i(start), p2i(ntams), p2i(hr->end())); 1343 1344 // Find the first marked object at or after "start". 1345 start = _bm->getNextMarkedWordAddress(start, ntams); 1346 1347 size_t marked_bytes = 0; 1348 1349 while (start < ntams) { 1350 oop obj = oop(start); 1351 int obj_sz = obj->size(); 1352 HeapWord* obj_end = start + obj_sz; 1353 1354 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1355 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1356 1357 // Note: if we're looking at the last region in heap - obj_end 1358 // could be actually just beyond the end of the heap; end_idx 1359 // will then correspond to a (non-existent) card that is also 1360 // just beyond the heap. 1361 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1362 // end of object is not card aligned - increment to cover 1363 // all the cards spanned by the object 1364 end_idx += 1; 1365 } 1366 1367 // Set the bits in the card BM for the cards spanned by this object. 1368 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1369 1370 // Add the size of this object to the number of marked bytes. 1371 marked_bytes += (size_t)obj_sz * HeapWordSize; 1372 1373 // Find the next marked object after this one. 1374 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1375 } 1376 1377 // Mark the allocated-since-marking portion... 1378 HeapWord* top = hr->top(); 1379 if (ntams < top) { 1380 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1381 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1382 1383 // Note: if we're looking at the last region in heap - top 1384 // could be actually just beyond the end of the heap; end_idx 1385 // will then correspond to a (non-existent) card that is also 1386 // just beyond the heap. 1387 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1388 // end of object is not card aligned - increment to cover 1389 // all the cards spanned by the object 1390 end_idx += 1; 1391 } 1392 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1393 1394 // This definitely means the region has live objects. 1395 set_bit_for_region(hr); 1396 } 1397 1398 // Update the live region bitmap. 1399 if (marked_bytes > 0) { 1400 set_bit_for_region(hr); 1401 } 1402 1403 // Set the marked bytes for the current region so that 1404 // it can be queried by a calling verification routine 1405 _region_marked_bytes = marked_bytes; 1406 1407 return false; 1408 } 1409 1410 size_t region_marked_bytes() const { return _region_marked_bytes; } 1411 }; 1412 1413 // Heap region closure used for verifying the counting data 1414 // that was accumulated concurrently and aggregated during 1415 // the remark pause. This closure is applied to the heap 1416 // regions during the STW cleanup pause. 1417 1418 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1419 G1CollectedHeap* _g1h; 1420 ConcurrentMark* _cm; 1421 CalcLiveObjectsClosure _calc_cl; 1422 BitMap* _region_bm; // Region BM to be verified 1423 BitMap* _card_bm; // Card BM to be verified 1424 1425 BitMap* _exp_region_bm; // Expected Region BM values 1426 BitMap* _exp_card_bm; // Expected card BM values 1427 1428 int _failures; 1429 1430 public: 1431 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1432 BitMap* region_bm, 1433 BitMap* card_bm, 1434 BitMap* exp_region_bm, 1435 BitMap* exp_card_bm) : 1436 _g1h(g1h), _cm(g1h->concurrent_mark()), 1437 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1438 _region_bm(region_bm), _card_bm(card_bm), 1439 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1440 _failures(0) { } 1441 1442 int failures() const { return _failures; } 1443 1444 bool doHeapRegion(HeapRegion* hr) { 1445 if (hr->is_continues_humongous()) { 1446 // We will ignore these here and process them when their 1447 // associated "starts humongous" region is processed (see 1448 // set_bit_for_heap_region()). Note that we cannot rely on their 1449 // associated "starts humongous" region to have their bit set to 1450 // 1 since, due to the region chunking in the parallel region 1451 // iteration, a "continues humongous" region might be visited 1452 // before its associated "starts humongous". 1453 return false; 1454 } 1455 1456 int failures = 0; 1457 1458 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1459 // this region and set the corresponding bits in the expected region 1460 // and card bitmaps. 1461 bool res = _calc_cl.doHeapRegion(hr); 1462 assert(res == false, "should be continuing"); 1463 1464 // Verify the marked bytes for this region. 1465 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1466 size_t act_marked_bytes = hr->next_marked_bytes(); 1467 1468 // We're not OK if expected marked bytes > actual marked bytes. It means 1469 // we have missed accounting some objects during the actual marking. 1470 if (exp_marked_bytes > act_marked_bytes) { 1471 failures += 1; 1472 } 1473 1474 // Verify the bit, for this region, in the actual and expected 1475 // (which was just calculated) region bit maps. 1476 // We're not OK if the bit in the calculated expected region 1477 // bitmap is set and the bit in the actual region bitmap is not. 1478 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1479 1480 bool expected = _exp_region_bm->at(index); 1481 bool actual = _region_bm->at(index); 1482 if (expected && !actual) { 1483 failures += 1; 1484 } 1485 1486 // Verify that the card bit maps for the cards spanned by the current 1487 // region match. We have an error if we have a set bit in the expected 1488 // bit map and the corresponding bit in the actual bitmap is not set. 1489 1490 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1491 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1492 1493 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1494 expected = _exp_card_bm->at(i); 1495 actual = _card_bm->at(i); 1496 1497 if (expected && !actual) { 1498 failures += 1; 1499 } 1500 } 1501 1502 _failures += failures; 1503 1504 // We could stop iteration over the heap when we 1505 // find the first violating region by returning true. 1506 return false; 1507 } 1508 }; 1509 1510 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1511 protected: 1512 G1CollectedHeap* _g1h; 1513 ConcurrentMark* _cm; 1514 BitMap* _actual_region_bm; 1515 BitMap* _actual_card_bm; 1516 1517 uint _n_workers; 1518 1519 BitMap* _expected_region_bm; 1520 BitMap* _expected_card_bm; 1521 1522 int _failures; 1523 1524 HeapRegionClaimer _hrclaimer; 1525 1526 public: 1527 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1528 BitMap* region_bm, BitMap* card_bm, 1529 BitMap* expected_region_bm, BitMap* expected_card_bm) 1530 : AbstractGangTask("G1 verify final counting"), 1531 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1532 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1533 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1534 _failures(0), 1535 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1536 assert(VerifyDuringGC, "don't call this otherwise"); 1537 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1538 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1539 } 1540 1541 void work(uint worker_id) { 1542 assert(worker_id < _n_workers, "invariant"); 1543 1544 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1545 _actual_region_bm, _actual_card_bm, 1546 _expected_region_bm, 1547 _expected_card_bm); 1548 1549 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1550 1551 Atomic::add(verify_cl.failures(), &_failures); 1552 } 1553 1554 int failures() const { return _failures; } 1555 }; 1556 1557 // Closure that finalizes the liveness counting data. 1558 // Used during the cleanup pause. 1559 // Sets the bits corresponding to the interval [NTAMS, top] 1560 // (which contains the implicitly live objects) in the 1561 // card liveness bitmap. Also sets the bit for each region, 1562 // containing live data, in the region liveness bitmap. 1563 1564 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1565 public: 1566 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1567 BitMap* region_bm, 1568 BitMap* card_bm) : 1569 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1570 1571 bool doHeapRegion(HeapRegion* hr) { 1572 1573 if (hr->is_continues_humongous()) { 1574 // We will ignore these here and process them when their 1575 // associated "starts humongous" region is processed (see 1576 // set_bit_for_heap_region()). Note that we cannot rely on their 1577 // associated "starts humongous" region to have their bit set to 1578 // 1 since, due to the region chunking in the parallel region 1579 // iteration, a "continues humongous" region might be visited 1580 // before its associated "starts humongous". 1581 return false; 1582 } 1583 1584 HeapWord* ntams = hr->next_top_at_mark_start(); 1585 HeapWord* top = hr->top(); 1586 1587 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1588 1589 // Mark the allocated-since-marking portion... 1590 if (ntams < top) { 1591 // This definitely means the region has live objects. 1592 set_bit_for_region(hr); 1593 1594 // Now set the bits in the card bitmap for [ntams, top) 1595 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1596 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1597 1598 // Note: if we're looking at the last region in heap - top 1599 // could be actually just beyond the end of the heap; end_idx 1600 // will then correspond to a (non-existent) card that is also 1601 // just beyond the heap. 1602 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1603 // end of object is not card aligned - increment to cover 1604 // all the cards spanned by the object 1605 end_idx += 1; 1606 } 1607 1608 assert(end_idx <= _card_bm->size(), 1609 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1610 end_idx, _card_bm->size()); 1611 assert(start_idx < _card_bm->size(), 1612 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1613 start_idx, _card_bm->size()); 1614 1615 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1616 } 1617 1618 // Set the bit for the region if it contains live data 1619 if (hr->next_marked_bytes() > 0) { 1620 set_bit_for_region(hr); 1621 } 1622 1623 return false; 1624 } 1625 }; 1626 1627 class G1ParFinalCountTask: public AbstractGangTask { 1628 protected: 1629 G1CollectedHeap* _g1h; 1630 ConcurrentMark* _cm; 1631 BitMap* _actual_region_bm; 1632 BitMap* _actual_card_bm; 1633 1634 uint _n_workers; 1635 HeapRegionClaimer _hrclaimer; 1636 1637 public: 1638 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1639 : AbstractGangTask("G1 final counting"), 1640 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1641 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1642 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1643 } 1644 1645 void work(uint worker_id) { 1646 assert(worker_id < _n_workers, "invariant"); 1647 1648 FinalCountDataUpdateClosure final_update_cl(_g1h, 1649 _actual_region_bm, 1650 _actual_card_bm); 1651 1652 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1653 } 1654 }; 1655 1656 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1657 G1CollectedHeap* _g1; 1658 size_t _freed_bytes; 1659 FreeRegionList* _local_cleanup_list; 1660 HeapRegionSetCount _old_regions_removed; 1661 HeapRegionSetCount _humongous_regions_removed; 1662 HRRSCleanupTask* _hrrs_cleanup_task; 1663 1664 public: 1665 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1666 FreeRegionList* local_cleanup_list, 1667 HRRSCleanupTask* hrrs_cleanup_task) : 1668 _g1(g1), 1669 _freed_bytes(0), 1670 _local_cleanup_list(local_cleanup_list), 1671 _old_regions_removed(), 1672 _humongous_regions_removed(), 1673 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1674 1675 size_t freed_bytes() { return _freed_bytes; } 1676 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1677 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1678 1679 bool doHeapRegion(HeapRegion *hr) { 1680 if (hr->is_continues_humongous() || hr->is_archive()) { 1681 return false; 1682 } 1683 // We use a claim value of zero here because all regions 1684 // were claimed with value 1 in the FinalCount task. 1685 _g1->reset_gc_time_stamps(hr); 1686 hr->note_end_of_marking(); 1687 1688 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1689 _freed_bytes += hr->used(); 1690 hr->set_containing_set(NULL); 1691 if (hr->is_humongous()) { 1692 assert(hr->is_starts_humongous(), "we should only see starts humongous"); 1693 _humongous_regions_removed.increment(1u, hr->capacity()); 1694 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1695 } else { 1696 _old_regions_removed.increment(1u, hr->capacity()); 1697 _g1->free_region(hr, _local_cleanup_list, true); 1698 } 1699 } else { 1700 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1701 } 1702 1703 return false; 1704 } 1705 }; 1706 1707 class G1ParNoteEndTask: public AbstractGangTask { 1708 friend class G1NoteEndOfConcMarkClosure; 1709 1710 protected: 1711 G1CollectedHeap* _g1h; 1712 FreeRegionList* _cleanup_list; 1713 HeapRegionClaimer _hrclaimer; 1714 1715 public: 1716 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1717 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1718 } 1719 1720 void work(uint worker_id) { 1721 FreeRegionList local_cleanup_list("Local Cleanup List"); 1722 HRRSCleanupTask hrrs_cleanup_task; 1723 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1724 &hrrs_cleanup_task); 1725 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1726 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1727 1728 // Now update the lists 1729 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1730 { 1731 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1732 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1733 1734 // If we iterate over the global cleanup list at the end of 1735 // cleanup to do this printing we will not guarantee to only 1736 // generate output for the newly-reclaimed regions (the list 1737 // might not be empty at the beginning of cleanup; we might 1738 // still be working on its previous contents). So we do the 1739 // printing here, before we append the new regions to the global 1740 // cleanup list. 1741 1742 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1743 if (hr_printer->is_active()) { 1744 FreeRegionListIterator iter(&local_cleanup_list); 1745 while (iter.more_available()) { 1746 HeapRegion* hr = iter.get_next(); 1747 hr_printer->cleanup(hr); 1748 } 1749 } 1750 1751 _cleanup_list->add_ordered(&local_cleanup_list); 1752 assert(local_cleanup_list.is_empty(), "post-condition"); 1753 1754 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1755 } 1756 } 1757 }; 1758 1759 class G1ParScrubRemSetTask: public AbstractGangTask { 1760 protected: 1761 G1RemSet* _g1rs; 1762 BitMap* _region_bm; 1763 BitMap* _card_bm; 1764 HeapRegionClaimer _hrclaimer; 1765 1766 public: 1767 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1768 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1769 } 1770 1771 void work(uint worker_id) { 1772 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1773 } 1774 1775 }; 1776 1777 void ConcurrentMark::cleanup() { 1778 // world is stopped at this checkpoint 1779 assert(SafepointSynchronize::is_at_safepoint(), 1780 "world should be stopped"); 1781 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1782 1783 // If a full collection has happened, we shouldn't do this. 1784 if (has_aborted()) { 1785 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1786 return; 1787 } 1788 1789 g1h->verify_region_sets_optional(); 1790 1791 if (VerifyDuringGC) { 1792 HandleMark hm; // handle scope 1793 g1h->prepare_for_verify(); 1794 Universe::verify(VerifyOption_G1UsePrevMarking, 1795 " VerifyDuringGC:(before)"); 1796 } 1797 g1h->check_bitmaps("Cleanup Start"); 1798 1799 G1CollectorPolicy* g1p = g1h->g1_policy(); 1800 g1p->record_concurrent_mark_cleanup_start(); 1801 1802 double start = os::elapsedTime(); 1803 1804 HeapRegionRemSet::reset_for_cleanup_tasks(); 1805 1806 // Do counting once more with the world stopped for good measure. 1807 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1808 1809 g1h->workers()->run_task(&g1_par_count_task); 1810 1811 if (VerifyDuringGC) { 1812 // Verify that the counting data accumulated during marking matches 1813 // that calculated by walking the marking bitmap. 1814 1815 // Bitmaps to hold expected values 1816 BitMap expected_region_bm(_region_bm.size(), true); 1817 BitMap expected_card_bm(_card_bm.size(), true); 1818 1819 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1820 &_region_bm, 1821 &_card_bm, 1822 &expected_region_bm, 1823 &expected_card_bm); 1824 1825 g1h->workers()->run_task(&g1_par_verify_task); 1826 1827 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1828 } 1829 1830 size_t start_used_bytes = g1h->used(); 1831 g1h->collector_state()->set_mark_in_progress(false); 1832 1833 double count_end = os::elapsedTime(); 1834 double this_final_counting_time = (count_end - start); 1835 _total_counting_time += this_final_counting_time; 1836 1837 if (G1PrintRegionLivenessInfo) { 1838 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1839 _g1h->heap_region_iterate(&cl); 1840 } 1841 1842 // Install newly created mark bitMap as "prev". 1843 swapMarkBitMaps(); 1844 1845 g1h->reset_gc_time_stamp(); 1846 1847 uint n_workers = _g1h->workers()->active_workers(); 1848 1849 // Note end of marking in all heap regions. 1850 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1851 g1h->workers()->run_task(&g1_par_note_end_task); 1852 g1h->check_gc_time_stamps(); 1853 1854 if (!cleanup_list_is_empty()) { 1855 // The cleanup list is not empty, so we'll have to process it 1856 // concurrently. Notify anyone else that might be wanting free 1857 // regions that there will be more free regions coming soon. 1858 g1h->set_free_regions_coming(); 1859 } 1860 1861 // call below, since it affects the metric by which we sort the heap 1862 // regions. 1863 if (G1ScrubRemSets) { 1864 double rs_scrub_start = os::elapsedTime(); 1865 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1866 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1867 1868 double rs_scrub_end = os::elapsedTime(); 1869 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1870 _total_rs_scrub_time += this_rs_scrub_time; 1871 } 1872 1873 // this will also free any regions totally full of garbage objects, 1874 // and sort the regions. 1875 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1876 1877 // Statistics. 1878 double end = os::elapsedTime(); 1879 _cleanup_times.add((end - start) * 1000.0); 1880 1881 if (G1Log::fine()) { 1882 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1883 } 1884 1885 // Clean up will have freed any regions completely full of garbage. 1886 // Update the soft reference policy with the new heap occupancy. 1887 Universe::update_heap_info_at_gc(); 1888 1889 if (VerifyDuringGC) { 1890 HandleMark hm; // handle scope 1891 g1h->prepare_for_verify(); 1892 Universe::verify(VerifyOption_G1UsePrevMarking, 1893 " VerifyDuringGC:(after)"); 1894 } 1895 1896 g1h->check_bitmaps("Cleanup End"); 1897 1898 g1h->verify_region_sets_optional(); 1899 1900 // We need to make this be a "collection" so any collection pause that 1901 // races with it goes around and waits for completeCleanup to finish. 1902 g1h->increment_total_collections(); 1903 1904 // Clean out dead classes and update Metaspace sizes. 1905 if (ClassUnloadingWithConcurrentMark) { 1906 ClassLoaderDataGraph::purge(); 1907 } 1908 MetaspaceGC::compute_new_size(); 1909 1910 // We reclaimed old regions so we should calculate the sizes to make 1911 // sure we update the old gen/space data. 1912 g1h->g1mm()->update_sizes(); 1913 g1h->allocation_context_stats().update_after_mark(); 1914 1915 g1h->trace_heap_after_concurrent_cycle(); 1916 } 1917 1918 void ConcurrentMark::completeCleanup() { 1919 if (has_aborted()) return; 1920 1921 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1922 1923 _cleanup_list.verify_optional(); 1924 FreeRegionList tmp_free_list("Tmp Free List"); 1925 1926 if (G1ConcRegionFreeingVerbose) { 1927 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1928 "cleanup list has %u entries", 1929 _cleanup_list.length()); 1930 } 1931 1932 // No one else should be accessing the _cleanup_list at this point, 1933 // so it is not necessary to take any locks 1934 while (!_cleanup_list.is_empty()) { 1935 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1936 assert(hr != NULL, "Got NULL from a non-empty list"); 1937 hr->par_clear(); 1938 tmp_free_list.add_ordered(hr); 1939 1940 // Instead of adding one region at a time to the secondary_free_list, 1941 // we accumulate them in the local list and move them a few at a 1942 // time. This also cuts down on the number of notify_all() calls 1943 // we do during this process. We'll also append the local list when 1944 // _cleanup_list is empty (which means we just removed the last 1945 // region from the _cleanup_list). 1946 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1947 _cleanup_list.is_empty()) { 1948 if (G1ConcRegionFreeingVerbose) { 1949 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1950 "appending %u entries to the secondary_free_list, " 1951 "cleanup list still has %u entries", 1952 tmp_free_list.length(), 1953 _cleanup_list.length()); 1954 } 1955 1956 { 1957 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1958 g1h->secondary_free_list_add(&tmp_free_list); 1959 SecondaryFreeList_lock->notify_all(); 1960 } 1961 #ifndef PRODUCT 1962 if (G1StressConcRegionFreeing) { 1963 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1964 os::sleep(Thread::current(), (jlong) 1, false); 1965 } 1966 } 1967 #endif 1968 } 1969 } 1970 assert(tmp_free_list.is_empty(), "post-condition"); 1971 } 1972 1973 // Supporting Object and Oop closures for reference discovery 1974 // and processing in during marking 1975 1976 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1977 HeapWord* addr = (HeapWord*)obj; 1978 return addr != NULL && 1979 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1980 } 1981 1982 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1983 // Uses the CMTask associated with a worker thread (for serial reference 1984 // processing the CMTask for worker 0 is used) to preserve (mark) and 1985 // trace referent objects. 1986 // 1987 // Using the CMTask and embedded local queues avoids having the worker 1988 // threads operating on the global mark stack. This reduces the risk 1989 // of overflowing the stack - which we would rather avoid at this late 1990 // state. Also using the tasks' local queues removes the potential 1991 // of the workers interfering with each other that could occur if 1992 // operating on the global stack. 1993 1994 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1995 ConcurrentMark* _cm; 1996 CMTask* _task; 1997 int _ref_counter_limit; 1998 int _ref_counter; 1999 bool _is_serial; 2000 public: 2001 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2002 _cm(cm), _task(task), _is_serial(is_serial), 2003 _ref_counter_limit(G1RefProcDrainInterval) { 2004 assert(_ref_counter_limit > 0, "sanity"); 2005 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2006 _ref_counter = _ref_counter_limit; 2007 } 2008 2009 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2010 virtual void do_oop( oop* p) { do_oop_work(p); } 2011 2012 template <class T> void do_oop_work(T* p) { 2013 if (!_cm->has_overflown()) { 2014 oop obj = oopDesc::load_decode_heap_oop(p); 2015 _task->deal_with_reference(obj); 2016 _ref_counter--; 2017 2018 if (_ref_counter == 0) { 2019 // We have dealt with _ref_counter_limit references, pushing them 2020 // and objects reachable from them on to the local stack (and 2021 // possibly the global stack). Call CMTask::do_marking_step() to 2022 // process these entries. 2023 // 2024 // We call CMTask::do_marking_step() in a loop, which we'll exit if 2025 // there's nothing more to do (i.e. we're done with the entries that 2026 // were pushed as a result of the CMTask::deal_with_reference() calls 2027 // above) or we overflow. 2028 // 2029 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2030 // flag while there may still be some work to do. (See the comment at 2031 // the beginning of CMTask::do_marking_step() for those conditions - 2032 // one of which is reaching the specified time target.) It is only 2033 // when CMTask::do_marking_step() returns without setting the 2034 // has_aborted() flag that the marking step has completed. 2035 do { 2036 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2037 _task->do_marking_step(mark_step_duration_ms, 2038 false /* do_termination */, 2039 _is_serial); 2040 } while (_task->has_aborted() && !_cm->has_overflown()); 2041 _ref_counter = _ref_counter_limit; 2042 } 2043 } 2044 } 2045 }; 2046 2047 // 'Drain' oop closure used by both serial and parallel reference processing. 2048 // Uses the CMTask associated with a given worker thread (for serial 2049 // reference processing the CMtask for worker 0 is used). Calls the 2050 // do_marking_step routine, with an unbelievably large timeout value, 2051 // to drain the marking data structures of the remaining entries 2052 // added by the 'keep alive' oop closure above. 2053 2054 class G1CMDrainMarkingStackClosure: public VoidClosure { 2055 ConcurrentMark* _cm; 2056 CMTask* _task; 2057 bool _is_serial; 2058 public: 2059 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2060 _cm(cm), _task(task), _is_serial(is_serial) { 2061 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2062 } 2063 2064 void do_void() { 2065 do { 2066 // We call CMTask::do_marking_step() to completely drain the local 2067 // and global marking stacks of entries pushed by the 'keep alive' 2068 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2069 // 2070 // CMTask::do_marking_step() is called in a loop, which we'll exit 2071 // if there's nothing more to do (i.e. we've completely drained the 2072 // entries that were pushed as a a result of applying the 'keep alive' 2073 // closure to the entries on the discovered ref lists) or we overflow 2074 // the global marking stack. 2075 // 2076 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2077 // flag while there may still be some work to do. (See the comment at 2078 // the beginning of CMTask::do_marking_step() for those conditions - 2079 // one of which is reaching the specified time target.) It is only 2080 // when CMTask::do_marking_step() returns without setting the 2081 // has_aborted() flag that the marking step has completed. 2082 2083 _task->do_marking_step(1000000000.0 /* something very large */, 2084 true /* do_termination */, 2085 _is_serial); 2086 } while (_task->has_aborted() && !_cm->has_overflown()); 2087 } 2088 }; 2089 2090 // Implementation of AbstractRefProcTaskExecutor for parallel 2091 // reference processing at the end of G1 concurrent marking 2092 2093 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2094 private: 2095 G1CollectedHeap* _g1h; 2096 ConcurrentMark* _cm; 2097 WorkGang* _workers; 2098 uint _active_workers; 2099 2100 public: 2101 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2102 ConcurrentMark* cm, 2103 WorkGang* workers, 2104 uint n_workers) : 2105 _g1h(g1h), _cm(cm), 2106 _workers(workers), _active_workers(n_workers) { } 2107 2108 // Executes the given task using concurrent marking worker threads. 2109 virtual void execute(ProcessTask& task); 2110 virtual void execute(EnqueueTask& task); 2111 }; 2112 2113 class G1CMRefProcTaskProxy: public AbstractGangTask { 2114 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2115 ProcessTask& _proc_task; 2116 G1CollectedHeap* _g1h; 2117 ConcurrentMark* _cm; 2118 2119 public: 2120 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2121 G1CollectedHeap* g1h, 2122 ConcurrentMark* cm) : 2123 AbstractGangTask("Process reference objects in parallel"), 2124 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2125 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2126 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2127 } 2128 2129 virtual void work(uint worker_id) { 2130 ResourceMark rm; 2131 HandleMark hm; 2132 CMTask* task = _cm->task(worker_id); 2133 G1CMIsAliveClosure g1_is_alive(_g1h); 2134 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2135 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2136 2137 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2138 } 2139 }; 2140 2141 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2142 assert(_workers != NULL, "Need parallel worker threads."); 2143 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2144 2145 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2146 2147 // We need to reset the concurrency level before each 2148 // proxy task execution, so that the termination protocol 2149 // and overflow handling in CMTask::do_marking_step() knows 2150 // how many workers to wait for. 2151 _cm->set_concurrency(_active_workers); 2152 _workers->run_task(&proc_task_proxy); 2153 } 2154 2155 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2156 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2157 EnqueueTask& _enq_task; 2158 2159 public: 2160 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2161 AbstractGangTask("Enqueue reference objects in parallel"), 2162 _enq_task(enq_task) { } 2163 2164 virtual void work(uint worker_id) { 2165 _enq_task.work(worker_id); 2166 } 2167 }; 2168 2169 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2170 assert(_workers != NULL, "Need parallel worker threads."); 2171 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2172 2173 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2174 2175 // Not strictly necessary but... 2176 // 2177 // We need to reset the concurrency level before each 2178 // proxy task execution, so that the termination protocol 2179 // and overflow handling in CMTask::do_marking_step() knows 2180 // how many workers to wait for. 2181 _cm->set_concurrency(_active_workers); 2182 _workers->run_task(&enq_task_proxy); 2183 } 2184 2185 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2186 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2187 } 2188 2189 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2190 if (has_overflown()) { 2191 // Skip processing the discovered references if we have 2192 // overflown the global marking stack. Reference objects 2193 // only get discovered once so it is OK to not 2194 // de-populate the discovered reference lists. We could have, 2195 // but the only benefit would be that, when marking restarts, 2196 // less reference objects are discovered. 2197 return; 2198 } 2199 2200 ResourceMark rm; 2201 HandleMark hm; 2202 2203 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2204 2205 // Is alive closure. 2206 G1CMIsAliveClosure g1_is_alive(g1h); 2207 2208 // Inner scope to exclude the cleaning of the string and symbol 2209 // tables from the displayed time. 2210 { 2211 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2212 2213 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2214 2215 // See the comment in G1CollectedHeap::ref_processing_init() 2216 // about how reference processing currently works in G1. 2217 2218 // Set the soft reference policy 2219 rp->setup_policy(clear_all_soft_refs); 2220 assert(_markStack.isEmpty(), "mark stack should be empty"); 2221 2222 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2223 // in serial reference processing. Note these closures are also 2224 // used for serially processing (by the the current thread) the 2225 // JNI references during parallel reference processing. 2226 // 2227 // These closures do not need to synchronize with the worker 2228 // threads involved in parallel reference processing as these 2229 // instances are executed serially by the current thread (e.g. 2230 // reference processing is not multi-threaded and is thus 2231 // performed by the current thread instead of a gang worker). 2232 // 2233 // The gang tasks involved in parallel reference processing create 2234 // their own instances of these closures, which do their own 2235 // synchronization among themselves. 2236 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2237 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2238 2239 // We need at least one active thread. If reference processing 2240 // is not multi-threaded we use the current (VMThread) thread, 2241 // otherwise we use the work gang from the G1CollectedHeap and 2242 // we utilize all the worker threads we can. 2243 bool processing_is_mt = rp->processing_is_mt(); 2244 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2245 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2246 2247 // Parallel processing task executor. 2248 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2249 g1h->workers(), active_workers); 2250 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2251 2252 // Set the concurrency level. The phase was already set prior to 2253 // executing the remark task. 2254 set_concurrency(active_workers); 2255 2256 // Set the degree of MT processing here. If the discovery was done MT, 2257 // the number of threads involved during discovery could differ from 2258 // the number of active workers. This is OK as long as the discovered 2259 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2260 rp->set_active_mt_degree(active_workers); 2261 2262 // Process the weak references. 2263 const ReferenceProcessorStats& stats = 2264 rp->process_discovered_references(&g1_is_alive, 2265 &g1_keep_alive, 2266 &g1_drain_mark_stack, 2267 executor, 2268 g1h->gc_timer_cm()); 2269 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2270 2271 // The do_oop work routines of the keep_alive and drain_marking_stack 2272 // oop closures will set the has_overflown flag if we overflow the 2273 // global marking stack. 2274 2275 assert(_markStack.overflow() || _markStack.isEmpty(), 2276 "mark stack should be empty (unless it overflowed)"); 2277 2278 if (_markStack.overflow()) { 2279 // This should have been done already when we tried to push an 2280 // entry on to the global mark stack. But let's do it again. 2281 set_has_overflown(); 2282 } 2283 2284 assert(rp->num_q() == active_workers, "why not"); 2285 2286 rp->enqueue_discovered_references(executor); 2287 2288 rp->verify_no_references_recorded(); 2289 assert(!rp->discovery_enabled(), "Post condition"); 2290 } 2291 2292 if (has_overflown()) { 2293 // We can not trust g1_is_alive if the marking stack overflowed 2294 return; 2295 } 2296 2297 assert(_markStack.isEmpty(), "Marking should have completed"); 2298 2299 // Unload Klasses, String, Symbols, Code Cache, etc. 2300 { 2301 G1CMTraceTime trace("Unloading", G1Log::finer()); 2302 2303 if (ClassUnloadingWithConcurrentMark) { 2304 bool purged_classes; 2305 2306 { 2307 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2308 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2309 } 2310 2311 { 2312 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2313 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2314 } 2315 } 2316 2317 if (G1StringDedup::is_enabled()) { 2318 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2319 G1StringDedup::unlink(&g1_is_alive); 2320 } 2321 } 2322 } 2323 2324 void ConcurrentMark::swapMarkBitMaps() { 2325 CMBitMapRO* temp = _prevMarkBitMap; 2326 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2327 _nextMarkBitMap = (CMBitMap*) temp; 2328 } 2329 2330 // Closure for marking entries in SATB buffers. 2331 class CMSATBBufferClosure : public SATBBufferClosure { 2332 private: 2333 CMTask* _task; 2334 G1CollectedHeap* _g1h; 2335 2336 // This is very similar to CMTask::deal_with_reference, but with 2337 // more relaxed requirements for the argument, so this must be more 2338 // circumspect about treating the argument as an object. 2339 void do_entry(void* entry) const { 2340 _task->increment_refs_reached(); 2341 HeapRegion* hr = _g1h->heap_region_containing_raw(entry); 2342 if (entry < hr->next_top_at_mark_start()) { 2343 // Until we get here, we don't know whether entry refers to a valid 2344 // object; it could instead have been a stale reference. 2345 oop obj = static_cast<oop>(entry); 2346 assert(obj->is_oop(true /* ignore mark word */), 2347 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2348 _task->make_reference_grey(obj, hr); 2349 } 2350 } 2351 2352 public: 2353 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2354 : _task(task), _g1h(g1h) { } 2355 2356 virtual void do_buffer(void** buffer, size_t size) { 2357 for (size_t i = 0; i < size; ++i) { 2358 do_entry(buffer[i]); 2359 } 2360 } 2361 }; 2362 2363 class G1RemarkThreadsClosure : public ThreadClosure { 2364 CMSATBBufferClosure _cm_satb_cl; 2365 G1CMOopClosure _cm_cl; 2366 MarkingCodeBlobClosure _code_cl; 2367 int _thread_parity; 2368 2369 public: 2370 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2371 _cm_satb_cl(task, g1h), 2372 _cm_cl(g1h, g1h->concurrent_mark(), task), 2373 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2374 _thread_parity(Threads::thread_claim_parity()) {} 2375 2376 void do_thread(Thread* thread) { 2377 if (thread->is_Java_thread()) { 2378 if (thread->claim_oops_do(true, _thread_parity)) { 2379 JavaThread* jt = (JavaThread*)thread; 2380 2381 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2382 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2383 // * Alive if on the stack of an executing method 2384 // * Weakly reachable otherwise 2385 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2386 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2387 jt->nmethods_do(&_code_cl); 2388 2389 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2390 } 2391 } else if (thread->is_VM_thread()) { 2392 if (thread->claim_oops_do(true, _thread_parity)) { 2393 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2394 } 2395 } 2396 } 2397 }; 2398 2399 class CMRemarkTask: public AbstractGangTask { 2400 private: 2401 ConcurrentMark* _cm; 2402 public: 2403 void work(uint worker_id) { 2404 // Since all available tasks are actually started, we should 2405 // only proceed if we're supposed to be active. 2406 if (worker_id < _cm->active_tasks()) { 2407 CMTask* task = _cm->task(worker_id); 2408 task->record_start_time(); 2409 { 2410 ResourceMark rm; 2411 HandleMark hm; 2412 2413 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2414 Threads::threads_do(&threads_f); 2415 } 2416 2417 do { 2418 task->do_marking_step(1000000000.0 /* something very large */, 2419 true /* do_termination */, 2420 false /* is_serial */); 2421 } while (task->has_aborted() && !_cm->has_overflown()); 2422 // If we overflow, then we do not want to restart. We instead 2423 // want to abort remark and do concurrent marking again. 2424 task->record_end_time(); 2425 } 2426 } 2427 2428 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2429 AbstractGangTask("Par Remark"), _cm(cm) { 2430 _cm->terminator()->reset_for_reuse(active_workers); 2431 } 2432 }; 2433 2434 void ConcurrentMark::checkpointRootsFinalWork() { 2435 ResourceMark rm; 2436 HandleMark hm; 2437 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2438 2439 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2440 2441 g1h->ensure_parsability(false); 2442 2443 // this is remark, so we'll use up all active threads 2444 uint active_workers = g1h->workers()->active_workers(); 2445 set_concurrency_and_phase(active_workers, false /* concurrent */); 2446 // Leave _parallel_marking_threads at it's 2447 // value originally calculated in the ConcurrentMark 2448 // constructor and pass values of the active workers 2449 // through the gang in the task. 2450 2451 { 2452 StrongRootsScope srs(active_workers); 2453 2454 CMRemarkTask remarkTask(this, active_workers); 2455 // We will start all available threads, even if we decide that the 2456 // active_workers will be fewer. The extra ones will just bail out 2457 // immediately. 2458 g1h->workers()->run_task(&remarkTask); 2459 } 2460 2461 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2462 guarantee(has_overflown() || 2463 satb_mq_set.completed_buffers_num() == 0, 2464 "Invariant: has_overflown = %s, num buffers = %d", 2465 BOOL_TO_STR(has_overflown()), 2466 satb_mq_set.completed_buffers_num()); 2467 2468 print_stats(); 2469 } 2470 2471 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2472 // Note we are overriding the read-only view of the prev map here, via 2473 // the cast. 2474 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2475 } 2476 2477 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2478 _nextMarkBitMap->clearRange(mr); 2479 } 2480 2481 HeapRegion* 2482 ConcurrentMark::claim_region(uint worker_id) { 2483 // "checkpoint" the finger 2484 HeapWord* finger = _finger; 2485 2486 // _heap_end will not change underneath our feet; it only changes at 2487 // yield points. 2488 while (finger < _heap_end) { 2489 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2490 2491 // Note on how this code handles humongous regions. In the 2492 // normal case the finger will reach the start of a "starts 2493 // humongous" (SH) region. Its end will either be the end of the 2494 // last "continues humongous" (CH) region in the sequence, or the 2495 // standard end of the SH region (if the SH is the only region in 2496 // the sequence). That way claim_region() will skip over the CH 2497 // regions. However, there is a subtle race between a CM thread 2498 // executing this method and a mutator thread doing a humongous 2499 // object allocation. The two are not mutually exclusive as the CM 2500 // thread does not need to hold the Heap_lock when it gets 2501 // here. So there is a chance that claim_region() will come across 2502 // a free region that's in the progress of becoming a SH or a CH 2503 // region. In the former case, it will either 2504 // a) Miss the update to the region's end, in which case it will 2505 // visit every subsequent CH region, will find their bitmaps 2506 // empty, and do nothing, or 2507 // b) Will observe the update of the region's end (in which case 2508 // it will skip the subsequent CH regions). 2509 // If it comes across a region that suddenly becomes CH, the 2510 // scenario will be similar to b). So, the race between 2511 // claim_region() and a humongous object allocation might force us 2512 // to do a bit of unnecessary work (due to some unnecessary bitmap 2513 // iterations) but it should not introduce and correctness issues. 2514 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2515 2516 // Above heap_region_containing_raw may return NULL as we always scan claim 2517 // until the end of the heap. In this case, just jump to the next region. 2518 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2519 2520 // Is the gap between reading the finger and doing the CAS too long? 2521 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2522 if (res == finger && curr_region != NULL) { 2523 // we succeeded 2524 HeapWord* bottom = curr_region->bottom(); 2525 HeapWord* limit = curr_region->next_top_at_mark_start(); 2526 2527 // notice that _finger == end cannot be guaranteed here since, 2528 // someone else might have moved the finger even further 2529 assert(_finger >= end, "the finger should have moved forward"); 2530 2531 if (limit > bottom) { 2532 return curr_region; 2533 } else { 2534 assert(limit == bottom, 2535 "the region limit should be at bottom"); 2536 // we return NULL and the caller should try calling 2537 // claim_region() again. 2538 return NULL; 2539 } 2540 } else { 2541 assert(_finger > finger, "the finger should have moved forward"); 2542 // read it again 2543 finger = _finger; 2544 } 2545 } 2546 2547 return NULL; 2548 } 2549 2550 #ifndef PRODUCT 2551 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2552 private: 2553 G1CollectedHeap* _g1h; 2554 const char* _phase; 2555 int _info; 2556 2557 public: 2558 VerifyNoCSetOops(const char* phase, int info = -1) : 2559 _g1h(G1CollectedHeap::heap()), 2560 _phase(phase), 2561 _info(info) 2562 { } 2563 2564 void operator()(oop obj) const { 2565 guarantee(obj->is_oop(), 2566 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2567 p2i(obj), _phase, _info); 2568 guarantee(!_g1h->obj_in_cs(obj), 2569 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2570 p2i(obj), _phase, _info); 2571 } 2572 }; 2573 2574 void ConcurrentMark::verify_no_cset_oops() { 2575 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2576 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2577 return; 2578 } 2579 2580 // Verify entries on the global mark stack 2581 _markStack.iterate(VerifyNoCSetOops("Stack")); 2582 2583 // Verify entries on the task queues 2584 for (uint i = 0; i < _max_worker_id; ++i) { 2585 CMTaskQueue* queue = _task_queues->queue(i); 2586 queue->iterate(VerifyNoCSetOops("Queue", i)); 2587 } 2588 2589 // Verify the global finger 2590 HeapWord* global_finger = finger(); 2591 if (global_finger != NULL && global_finger < _heap_end) { 2592 // The global finger always points to a heap region boundary. We 2593 // use heap_region_containing_raw() to get the containing region 2594 // given that the global finger could be pointing to a free region 2595 // which subsequently becomes continues humongous. If that 2596 // happens, heap_region_containing() will return the bottom of the 2597 // corresponding starts humongous region and the check below will 2598 // not hold any more. 2599 // Since we always iterate over all regions, we might get a NULL HeapRegion 2600 // here. 2601 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2602 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2603 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2604 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2605 } 2606 2607 // Verify the task fingers 2608 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2609 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2610 CMTask* task = _tasks[i]; 2611 HeapWord* task_finger = task->finger(); 2612 if (task_finger != NULL && task_finger < _heap_end) { 2613 // See above note on the global finger verification. 2614 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2615 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2616 !task_hr->in_collection_set(), 2617 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2618 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2619 } 2620 } 2621 } 2622 #endif // PRODUCT 2623 2624 // Aggregate the counting data that was constructed concurrently 2625 // with marking. 2626 class AggregateCountDataHRClosure: public HeapRegionClosure { 2627 G1CollectedHeap* _g1h; 2628 ConcurrentMark* _cm; 2629 CardTableModRefBS* _ct_bs; 2630 BitMap* _cm_card_bm; 2631 uint _max_worker_id; 2632 2633 public: 2634 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2635 BitMap* cm_card_bm, 2636 uint max_worker_id) : 2637 _g1h(g1h), _cm(g1h->concurrent_mark()), 2638 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2639 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2640 2641 bool doHeapRegion(HeapRegion* hr) { 2642 if (hr->is_continues_humongous()) { 2643 // We will ignore these here and process them when their 2644 // associated "starts humongous" region is processed. 2645 // Note that we cannot rely on their associated 2646 // "starts humongous" region to have their bit set to 1 2647 // since, due to the region chunking in the parallel region 2648 // iteration, a "continues humongous" region might be visited 2649 // before its associated "starts humongous". 2650 return false; 2651 } 2652 2653 HeapWord* start = hr->bottom(); 2654 HeapWord* limit = hr->next_top_at_mark_start(); 2655 HeapWord* end = hr->end(); 2656 2657 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2658 "Preconditions not met - " 2659 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2660 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2661 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2662 2663 assert(hr->next_marked_bytes() == 0, "Precondition"); 2664 2665 if (start == limit) { 2666 // NTAMS of this region has not been set so nothing to do. 2667 return false; 2668 } 2669 2670 // 'start' should be in the heap. 2671 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2672 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2673 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2674 2675 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2676 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2677 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2678 2679 // If ntams is not card aligned then we bump card bitmap index 2680 // for limit so that we get the all the cards spanned by 2681 // the object ending at ntams. 2682 // Note: if this is the last region in the heap then ntams 2683 // could be actually just beyond the end of the the heap; 2684 // limit_idx will then correspond to a (non-existent) card 2685 // that is also outside the heap. 2686 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2687 limit_idx += 1; 2688 } 2689 2690 assert(limit_idx <= end_idx, "or else use atomics"); 2691 2692 // Aggregate the "stripe" in the count data associated with hr. 2693 uint hrm_index = hr->hrm_index(); 2694 size_t marked_bytes = 0; 2695 2696 for (uint i = 0; i < _max_worker_id; i += 1) { 2697 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2698 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2699 2700 // Fetch the marked_bytes in this region for task i and 2701 // add it to the running total for this region. 2702 marked_bytes += marked_bytes_array[hrm_index]; 2703 2704 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2705 // into the global card bitmap. 2706 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2707 2708 while (scan_idx < limit_idx) { 2709 assert(task_card_bm->at(scan_idx) == true, "should be"); 2710 _cm_card_bm->set_bit(scan_idx); 2711 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2712 2713 // BitMap::get_next_one_offset() can handle the case when 2714 // its left_offset parameter is greater than its right_offset 2715 // parameter. It does, however, have an early exit if 2716 // left_offset == right_offset. So let's limit the value 2717 // passed in for left offset here. 2718 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2719 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2720 } 2721 } 2722 2723 // Update the marked bytes for this region. 2724 hr->add_to_marked_bytes(marked_bytes); 2725 2726 // Next heap region 2727 return false; 2728 } 2729 }; 2730 2731 class G1AggregateCountDataTask: public AbstractGangTask { 2732 protected: 2733 G1CollectedHeap* _g1h; 2734 ConcurrentMark* _cm; 2735 BitMap* _cm_card_bm; 2736 uint _max_worker_id; 2737 uint _active_workers; 2738 HeapRegionClaimer _hrclaimer; 2739 2740 public: 2741 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2742 ConcurrentMark* cm, 2743 BitMap* cm_card_bm, 2744 uint max_worker_id, 2745 uint n_workers) : 2746 AbstractGangTask("Count Aggregation"), 2747 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2748 _max_worker_id(max_worker_id), 2749 _active_workers(n_workers), 2750 _hrclaimer(_active_workers) { 2751 } 2752 2753 void work(uint worker_id) { 2754 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2755 2756 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2757 } 2758 }; 2759 2760 2761 void ConcurrentMark::aggregate_count_data() { 2762 uint n_workers = _g1h->workers()->active_workers(); 2763 2764 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2765 _max_worker_id, n_workers); 2766 2767 _g1h->workers()->run_task(&g1_par_agg_task); 2768 } 2769 2770 // Clear the per-worker arrays used to store the per-region counting data 2771 void ConcurrentMark::clear_all_count_data() { 2772 // Clear the global card bitmap - it will be filled during 2773 // liveness count aggregation (during remark) and the 2774 // final counting task. 2775 _card_bm.clear(); 2776 2777 // Clear the global region bitmap - it will be filled as part 2778 // of the final counting task. 2779 _region_bm.clear(); 2780 2781 uint max_regions = _g1h->max_regions(); 2782 assert(_max_worker_id > 0, "uninitialized"); 2783 2784 for (uint i = 0; i < _max_worker_id; i += 1) { 2785 BitMap* task_card_bm = count_card_bitmap_for(i); 2786 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2787 2788 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2789 assert(marked_bytes_array != NULL, "uninitialized"); 2790 2791 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2792 task_card_bm->clear(); 2793 } 2794 } 2795 2796 void ConcurrentMark::print_stats() { 2797 if (G1MarkingVerboseLevel > 0) { 2798 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2799 for (size_t i = 0; i < _active_tasks; ++i) { 2800 _tasks[i]->print_stats(); 2801 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2802 } 2803 } 2804 } 2805 2806 // abandon current marking iteration due to a Full GC 2807 void ConcurrentMark::abort() { 2808 if (!cmThread()->during_cycle() || _has_aborted) { 2809 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2810 return; 2811 } 2812 2813 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2814 // concurrent bitmap clearing. 2815 _nextMarkBitMap->clearAll(); 2816 2817 // Note we cannot clear the previous marking bitmap here 2818 // since VerifyDuringGC verifies the objects marked during 2819 // a full GC against the previous bitmap. 2820 2821 // Clear the liveness counting data 2822 clear_all_count_data(); 2823 // Empty mark stack 2824 reset_marking_state(); 2825 for (uint i = 0; i < _max_worker_id; ++i) { 2826 _tasks[i]->clear_region_fields(); 2827 } 2828 _first_overflow_barrier_sync.abort(); 2829 _second_overflow_barrier_sync.abort(); 2830 _has_aborted = true; 2831 2832 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2833 satb_mq_set.abandon_partial_marking(); 2834 // This can be called either during or outside marking, we'll read 2835 // the expected_active value from the SATB queue set. 2836 satb_mq_set.set_active_all_threads( 2837 false, /* new active value */ 2838 satb_mq_set.is_active() /* expected_active */); 2839 2840 _g1h->trace_heap_after_concurrent_cycle(); 2841 _g1h->register_concurrent_cycle_end(); 2842 } 2843 2844 static void print_ms_time_info(const char* prefix, const char* name, 2845 NumberSeq& ns) { 2846 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2847 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2848 if (ns.num() > 0) { 2849 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2850 prefix, ns.sd(), ns.maximum()); 2851 } 2852 } 2853 2854 void ConcurrentMark::print_summary_info() { 2855 gclog_or_tty->print_cr(" Concurrent marking:"); 2856 print_ms_time_info(" ", "init marks", _init_times); 2857 print_ms_time_info(" ", "remarks", _remark_times); 2858 { 2859 print_ms_time_info(" ", "final marks", _remark_mark_times); 2860 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2861 2862 } 2863 print_ms_time_info(" ", "cleanups", _cleanup_times); 2864 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2865 _total_counting_time, 2866 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2867 (double)_cleanup_times.num() 2868 : 0.0)); 2869 if (G1ScrubRemSets) { 2870 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2871 _total_rs_scrub_time, 2872 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2873 (double)_cleanup_times.num() 2874 : 0.0)); 2875 } 2876 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2877 (_init_times.sum() + _remark_times.sum() + 2878 _cleanup_times.sum())/1000.0); 2879 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2880 "(%8.2f s marking).", 2881 cmThread()->vtime_accum(), 2882 cmThread()->vtime_mark_accum()); 2883 } 2884 2885 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2886 _parallel_workers->print_worker_threads_on(st); 2887 } 2888 2889 void ConcurrentMark::print_on_error(outputStream* st) const { 2890 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2891 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2892 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2893 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2894 } 2895 2896 // We take a break if someone is trying to stop the world. 2897 bool ConcurrentMark::do_yield_check(uint worker_id) { 2898 if (SuspendibleThreadSet::should_yield()) { 2899 if (worker_id == 0) { 2900 _g1h->g1_policy()->record_concurrent_pause(); 2901 } 2902 SuspendibleThreadSet::yield(); 2903 return true; 2904 } else { 2905 return false; 2906 } 2907 } 2908 2909 // Closure for iteration over bitmaps 2910 class CMBitMapClosure : public BitMapClosure { 2911 private: 2912 // the bitmap that is being iterated over 2913 CMBitMap* _nextMarkBitMap; 2914 ConcurrentMark* _cm; 2915 CMTask* _task; 2916 2917 public: 2918 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2919 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2920 2921 bool do_bit(size_t offset) { 2922 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2923 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2924 assert( addr < _cm->finger(), "invariant"); 2925 assert(addr >= _task->finger(), "invariant"); 2926 2927 // We move that task's local finger along. 2928 _task->move_finger_to(addr); 2929 2930 _task->scan_object(oop(addr)); 2931 // we only partially drain the local queue and global stack 2932 _task->drain_local_queue(true); 2933 _task->drain_global_stack(true); 2934 2935 // if the has_aborted flag has been raised, we need to bail out of 2936 // the iteration 2937 return !_task->has_aborted(); 2938 } 2939 }; 2940 2941 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2942 ReferenceProcessor* result = NULL; 2943 if (G1UseConcMarkReferenceProcessing) { 2944 result = g1h->ref_processor_cm(); 2945 assert(result != NULL, "should not be NULL"); 2946 } 2947 return result; 2948 } 2949 2950 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2951 ConcurrentMark* cm, 2952 CMTask* task) 2953 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2954 _g1h(g1h), _cm(cm), _task(task) 2955 { } 2956 2957 void CMTask::setup_for_region(HeapRegion* hr) { 2958 assert(hr != NULL, 2959 "claim_region() should have filtered out NULL regions"); 2960 assert(!hr->is_continues_humongous(), 2961 "claim_region() should have filtered out continues humongous regions"); 2962 _curr_region = hr; 2963 _finger = hr->bottom(); 2964 update_region_limit(); 2965 } 2966 2967 void CMTask::update_region_limit() { 2968 HeapRegion* hr = _curr_region; 2969 HeapWord* bottom = hr->bottom(); 2970 HeapWord* limit = hr->next_top_at_mark_start(); 2971 2972 if (limit == bottom) { 2973 // The region was collected underneath our feet. 2974 // We set the finger to bottom to ensure that the bitmap 2975 // iteration that will follow this will not do anything. 2976 // (this is not a condition that holds when we set the region up, 2977 // as the region is not supposed to be empty in the first place) 2978 _finger = bottom; 2979 } else if (limit >= _region_limit) { 2980 assert(limit >= _finger, "peace of mind"); 2981 } else { 2982 assert(limit < _region_limit, "only way to get here"); 2983 // This can happen under some pretty unusual circumstances. An 2984 // evacuation pause empties the region underneath our feet (NTAMS 2985 // at bottom). We then do some allocation in the region (NTAMS 2986 // stays at bottom), followed by the region being used as a GC 2987 // alloc region (NTAMS will move to top() and the objects 2988 // originally below it will be grayed). All objects now marked in 2989 // the region are explicitly grayed, if below the global finger, 2990 // and we do not need in fact to scan anything else. So, we simply 2991 // set _finger to be limit to ensure that the bitmap iteration 2992 // doesn't do anything. 2993 _finger = limit; 2994 } 2995 2996 _region_limit = limit; 2997 } 2998 2999 void CMTask::giveup_current_region() { 3000 assert(_curr_region != NULL, "invariant"); 3001 clear_region_fields(); 3002 } 3003 3004 void CMTask::clear_region_fields() { 3005 // Values for these three fields that indicate that we're not 3006 // holding on to a region. 3007 _curr_region = NULL; 3008 _finger = NULL; 3009 _region_limit = NULL; 3010 } 3011 3012 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3013 if (cm_oop_closure == NULL) { 3014 assert(_cm_oop_closure != NULL, "invariant"); 3015 } else { 3016 assert(_cm_oop_closure == NULL, "invariant"); 3017 } 3018 _cm_oop_closure = cm_oop_closure; 3019 } 3020 3021 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3022 guarantee(nextMarkBitMap != NULL, "invariant"); 3023 _nextMarkBitMap = nextMarkBitMap; 3024 clear_region_fields(); 3025 3026 _calls = 0; 3027 _elapsed_time_ms = 0.0; 3028 _termination_time_ms = 0.0; 3029 _termination_start_time_ms = 0.0; 3030 } 3031 3032 bool CMTask::should_exit_termination() { 3033 regular_clock_call(); 3034 // This is called when we are in the termination protocol. We should 3035 // quit if, for some reason, this task wants to abort or the global 3036 // stack is not empty (this means that we can get work from it). 3037 return !_cm->mark_stack_empty() || has_aborted(); 3038 } 3039 3040 void CMTask::reached_limit() { 3041 assert(_words_scanned >= _words_scanned_limit || 3042 _refs_reached >= _refs_reached_limit , 3043 "shouldn't have been called otherwise"); 3044 regular_clock_call(); 3045 } 3046 3047 void CMTask::regular_clock_call() { 3048 if (has_aborted()) return; 3049 3050 // First, we need to recalculate the words scanned and refs reached 3051 // limits for the next clock call. 3052 recalculate_limits(); 3053 3054 // During the regular clock call we do the following 3055 3056 // (1) If an overflow has been flagged, then we abort. 3057 if (_cm->has_overflown()) { 3058 set_has_aborted(); 3059 return; 3060 } 3061 3062 // If we are not concurrent (i.e. we're doing remark) we don't need 3063 // to check anything else. The other steps are only needed during 3064 // the concurrent marking phase. 3065 if (!concurrent()) return; 3066 3067 // (2) If marking has been aborted for Full GC, then we also abort. 3068 if (_cm->has_aborted()) { 3069 set_has_aborted(); 3070 return; 3071 } 3072 3073 double curr_time_ms = os::elapsedVTime() * 1000.0; 3074 3075 // (4) We check whether we should yield. If we have to, then we abort. 3076 if (SuspendibleThreadSet::should_yield()) { 3077 // We should yield. To do this we abort the task. The caller is 3078 // responsible for yielding. 3079 set_has_aborted(); 3080 return; 3081 } 3082 3083 // (5) We check whether we've reached our time quota. If we have, 3084 // then we abort. 3085 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3086 if (elapsed_time_ms > _time_target_ms) { 3087 set_has_aborted(); 3088 _has_timed_out = true; 3089 return; 3090 } 3091 3092 // (6) Finally, we check whether there are enough completed STAB 3093 // buffers available for processing. If there are, we abort. 3094 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3095 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3096 // we do need to process SATB buffers, we'll abort and restart 3097 // the marking task to do so 3098 set_has_aborted(); 3099 return; 3100 } 3101 } 3102 3103 void CMTask::recalculate_limits() { 3104 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3105 _words_scanned_limit = _real_words_scanned_limit; 3106 3107 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3108 _refs_reached_limit = _real_refs_reached_limit; 3109 } 3110 3111 void CMTask::decrease_limits() { 3112 // This is called when we believe that we're going to do an infrequent 3113 // operation which will increase the per byte scanned cost (i.e. move 3114 // entries to/from the global stack). It basically tries to decrease the 3115 // scanning limit so that the clock is called earlier. 3116 3117 _words_scanned_limit = _real_words_scanned_limit - 3118 3 * words_scanned_period / 4; 3119 _refs_reached_limit = _real_refs_reached_limit - 3120 3 * refs_reached_period / 4; 3121 } 3122 3123 void CMTask::move_entries_to_global_stack() { 3124 // local array where we'll store the entries that will be popped 3125 // from the local queue 3126 oop buffer[global_stack_transfer_size]; 3127 3128 int n = 0; 3129 oop obj; 3130 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3131 buffer[n] = obj; 3132 ++n; 3133 } 3134 3135 if (n > 0) { 3136 // we popped at least one entry from the local queue 3137 3138 if (!_cm->mark_stack_push(buffer, n)) { 3139 set_has_aborted(); 3140 } 3141 } 3142 3143 // this operation was quite expensive, so decrease the limits 3144 decrease_limits(); 3145 } 3146 3147 void CMTask::get_entries_from_global_stack() { 3148 // local array where we'll store the entries that will be popped 3149 // from the global stack. 3150 oop buffer[global_stack_transfer_size]; 3151 int n; 3152 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3153 assert(n <= global_stack_transfer_size, 3154 "we should not pop more than the given limit"); 3155 if (n > 0) { 3156 // yes, we did actually pop at least one entry 3157 for (int i = 0; i < n; ++i) { 3158 bool success = _task_queue->push(buffer[i]); 3159 // We only call this when the local queue is empty or under a 3160 // given target limit. So, we do not expect this push to fail. 3161 assert(success, "invariant"); 3162 } 3163 } 3164 3165 // this operation was quite expensive, so decrease the limits 3166 decrease_limits(); 3167 } 3168 3169 void CMTask::drain_local_queue(bool partially) { 3170 if (has_aborted()) return; 3171 3172 // Decide what the target size is, depending whether we're going to 3173 // drain it partially (so that other tasks can steal if they run out 3174 // of things to do) or totally (at the very end). 3175 size_t target_size; 3176 if (partially) { 3177 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3178 } else { 3179 target_size = 0; 3180 } 3181 3182 if (_task_queue->size() > target_size) { 3183 oop obj; 3184 bool ret = _task_queue->pop_local(obj); 3185 while (ret) { 3186 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3187 assert(!_g1h->is_on_master_free_list( 3188 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3189 3190 scan_object(obj); 3191 3192 if (_task_queue->size() <= target_size || has_aborted()) { 3193 ret = false; 3194 } else { 3195 ret = _task_queue->pop_local(obj); 3196 } 3197 } 3198 } 3199 } 3200 3201 void CMTask::drain_global_stack(bool partially) { 3202 if (has_aborted()) return; 3203 3204 // We have a policy to drain the local queue before we attempt to 3205 // drain the global stack. 3206 assert(partially || _task_queue->size() == 0, "invariant"); 3207 3208 // Decide what the target size is, depending whether we're going to 3209 // drain it partially (so that other tasks can steal if they run out 3210 // of things to do) or totally (at the very end). Notice that, 3211 // because we move entries from the global stack in chunks or 3212 // because another task might be doing the same, we might in fact 3213 // drop below the target. But, this is not a problem. 3214 size_t target_size; 3215 if (partially) { 3216 target_size = _cm->partial_mark_stack_size_target(); 3217 } else { 3218 target_size = 0; 3219 } 3220 3221 if (_cm->mark_stack_size() > target_size) { 3222 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3223 get_entries_from_global_stack(); 3224 drain_local_queue(partially); 3225 } 3226 } 3227 } 3228 3229 // SATB Queue has several assumptions on whether to call the par or 3230 // non-par versions of the methods. this is why some of the code is 3231 // replicated. We should really get rid of the single-threaded version 3232 // of the code to simplify things. 3233 void CMTask::drain_satb_buffers() { 3234 if (has_aborted()) return; 3235 3236 // We set this so that the regular clock knows that we're in the 3237 // middle of draining buffers and doesn't set the abort flag when it 3238 // notices that SATB buffers are available for draining. It'd be 3239 // very counter productive if it did that. :-) 3240 _draining_satb_buffers = true; 3241 3242 CMSATBBufferClosure satb_cl(this, _g1h); 3243 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3244 3245 // This keeps claiming and applying the closure to completed buffers 3246 // until we run out of buffers or we need to abort. 3247 while (!has_aborted() && 3248 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3249 regular_clock_call(); 3250 } 3251 3252 _draining_satb_buffers = false; 3253 3254 assert(has_aborted() || 3255 concurrent() || 3256 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3257 3258 // again, this was a potentially expensive operation, decrease the 3259 // limits to get the regular clock call early 3260 decrease_limits(); 3261 } 3262 3263 void CMTask::print_stats() { 3264 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3265 _worker_id, _calls); 3266 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3267 _elapsed_time_ms, _termination_time_ms); 3268 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3269 _step_times_ms.num(), _step_times_ms.avg(), 3270 _step_times_ms.sd()); 3271 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3272 _step_times_ms.maximum(), _step_times_ms.sum()); 3273 } 3274 3275 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3276 return _task_queues->steal(worker_id, hash_seed, obj); 3277 } 3278 3279 /***************************************************************************** 3280 3281 The do_marking_step(time_target_ms, ...) method is the building 3282 block of the parallel marking framework. It can be called in parallel 3283 with other invocations of do_marking_step() on different tasks 3284 (but only one per task, obviously) and concurrently with the 3285 mutator threads, or during remark, hence it eliminates the need 3286 for two versions of the code. When called during remark, it will 3287 pick up from where the task left off during the concurrent marking 3288 phase. Interestingly, tasks are also claimable during evacuation 3289 pauses too, since do_marking_step() ensures that it aborts before 3290 it needs to yield. 3291 3292 The data structures that it uses to do marking work are the 3293 following: 3294 3295 (1) Marking Bitmap. If there are gray objects that appear only 3296 on the bitmap (this happens either when dealing with an overflow 3297 or when the initial marking phase has simply marked the roots 3298 and didn't push them on the stack), then tasks claim heap 3299 regions whose bitmap they then scan to find gray objects. A 3300 global finger indicates where the end of the last claimed region 3301 is. A local finger indicates how far into the region a task has 3302 scanned. The two fingers are used to determine how to gray an 3303 object (i.e. whether simply marking it is OK, as it will be 3304 visited by a task in the future, or whether it needs to be also 3305 pushed on a stack). 3306 3307 (2) Local Queue. The local queue of the task which is accessed 3308 reasonably efficiently by the task. Other tasks can steal from 3309 it when they run out of work. Throughout the marking phase, a 3310 task attempts to keep its local queue short but not totally 3311 empty, so that entries are available for stealing by other 3312 tasks. Only when there is no more work, a task will totally 3313 drain its local queue. 3314 3315 (3) Global Mark Stack. This handles local queue overflow. During 3316 marking only sets of entries are moved between it and the local 3317 queues, as access to it requires a mutex and more fine-grain 3318 interaction with it which might cause contention. If it 3319 overflows, then the marking phase should restart and iterate 3320 over the bitmap to identify gray objects. Throughout the marking 3321 phase, tasks attempt to keep the global mark stack at a small 3322 length but not totally empty, so that entries are available for 3323 popping by other tasks. Only when there is no more work, tasks 3324 will totally drain the global mark stack. 3325 3326 (4) SATB Buffer Queue. This is where completed SATB buffers are 3327 made available. Buffers are regularly removed from this queue 3328 and scanned for roots, so that the queue doesn't get too 3329 long. During remark, all completed buffers are processed, as 3330 well as the filled in parts of any uncompleted buffers. 3331 3332 The do_marking_step() method tries to abort when the time target 3333 has been reached. There are a few other cases when the 3334 do_marking_step() method also aborts: 3335 3336 (1) When the marking phase has been aborted (after a Full GC). 3337 3338 (2) When a global overflow (on the global stack) has been 3339 triggered. Before the task aborts, it will actually sync up with 3340 the other tasks to ensure that all the marking data structures 3341 (local queues, stacks, fingers etc.) are re-initialized so that 3342 when do_marking_step() completes, the marking phase can 3343 immediately restart. 3344 3345 (3) When enough completed SATB buffers are available. The 3346 do_marking_step() method only tries to drain SATB buffers right 3347 at the beginning. So, if enough buffers are available, the 3348 marking step aborts and the SATB buffers are processed at 3349 the beginning of the next invocation. 3350 3351 (4) To yield. when we have to yield then we abort and yield 3352 right at the end of do_marking_step(). This saves us from a lot 3353 of hassle as, by yielding we might allow a Full GC. If this 3354 happens then objects will be compacted underneath our feet, the 3355 heap might shrink, etc. We save checking for this by just 3356 aborting and doing the yield right at the end. 3357 3358 From the above it follows that the do_marking_step() method should 3359 be called in a loop (or, otherwise, regularly) until it completes. 3360 3361 If a marking step completes without its has_aborted() flag being 3362 true, it means it has completed the current marking phase (and 3363 also all other marking tasks have done so and have all synced up). 3364 3365 A method called regular_clock_call() is invoked "regularly" (in 3366 sub ms intervals) throughout marking. It is this clock method that 3367 checks all the abort conditions which were mentioned above and 3368 decides when the task should abort. A work-based scheme is used to 3369 trigger this clock method: when the number of object words the 3370 marking phase has scanned or the number of references the marking 3371 phase has visited reach a given limit. Additional invocations to 3372 the method clock have been planted in a few other strategic places 3373 too. The initial reason for the clock method was to avoid calling 3374 vtime too regularly, as it is quite expensive. So, once it was in 3375 place, it was natural to piggy-back all the other conditions on it 3376 too and not constantly check them throughout the code. 3377 3378 If do_termination is true then do_marking_step will enter its 3379 termination protocol. 3380 3381 The value of is_serial must be true when do_marking_step is being 3382 called serially (i.e. by the VMThread) and do_marking_step should 3383 skip any synchronization in the termination and overflow code. 3384 Examples include the serial remark code and the serial reference 3385 processing closures. 3386 3387 The value of is_serial must be false when do_marking_step is 3388 being called by any of the worker threads in a work gang. 3389 Examples include the concurrent marking code (CMMarkingTask), 3390 the MT remark code, and the MT reference processing closures. 3391 3392 *****************************************************************************/ 3393 3394 void CMTask::do_marking_step(double time_target_ms, 3395 bool do_termination, 3396 bool is_serial) { 3397 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3398 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3399 3400 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3401 assert(_task_queues != NULL, "invariant"); 3402 assert(_task_queue != NULL, "invariant"); 3403 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3404 3405 assert(!_claimed, 3406 "only one thread should claim this task at any one time"); 3407 3408 // OK, this doesn't safeguard again all possible scenarios, as it is 3409 // possible for two threads to set the _claimed flag at the same 3410 // time. But it is only for debugging purposes anyway and it will 3411 // catch most problems. 3412 _claimed = true; 3413 3414 _start_time_ms = os::elapsedVTime() * 1000.0; 3415 3416 // If do_stealing is true then do_marking_step will attempt to 3417 // steal work from the other CMTasks. It only makes sense to 3418 // enable stealing when the termination protocol is enabled 3419 // and do_marking_step() is not being called serially. 3420 bool do_stealing = do_termination && !is_serial; 3421 3422 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3423 _time_target_ms = time_target_ms - diff_prediction_ms; 3424 3425 // set up the variables that are used in the work-based scheme to 3426 // call the regular clock method 3427 _words_scanned = 0; 3428 _refs_reached = 0; 3429 recalculate_limits(); 3430 3431 // clear all flags 3432 clear_has_aborted(); 3433 _has_timed_out = false; 3434 _draining_satb_buffers = false; 3435 3436 ++_calls; 3437 3438 // Set up the bitmap and oop closures. Anything that uses them is 3439 // eventually called from this method, so it is OK to allocate these 3440 // statically. 3441 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3442 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3443 set_cm_oop_closure(&cm_oop_closure); 3444 3445 if (_cm->has_overflown()) { 3446 // This can happen if the mark stack overflows during a GC pause 3447 // and this task, after a yield point, restarts. We have to abort 3448 // as we need to get into the overflow protocol which happens 3449 // right at the end of this task. 3450 set_has_aborted(); 3451 } 3452 3453 // First drain any available SATB buffers. After this, we will not 3454 // look at SATB buffers before the next invocation of this method. 3455 // If enough completed SATB buffers are queued up, the regular clock 3456 // will abort this task so that it restarts. 3457 drain_satb_buffers(); 3458 // ...then partially drain the local queue and the global stack 3459 drain_local_queue(true); 3460 drain_global_stack(true); 3461 3462 do { 3463 if (!has_aborted() && _curr_region != NULL) { 3464 // This means that we're already holding on to a region. 3465 assert(_finger != NULL, "if region is not NULL, then the finger " 3466 "should not be NULL either"); 3467 3468 // We might have restarted this task after an evacuation pause 3469 // which might have evacuated the region we're holding on to 3470 // underneath our feet. Let's read its limit again to make sure 3471 // that we do not iterate over a region of the heap that 3472 // contains garbage (update_region_limit() will also move 3473 // _finger to the start of the region if it is found empty). 3474 update_region_limit(); 3475 // We will start from _finger not from the start of the region, 3476 // as we might be restarting this task after aborting half-way 3477 // through scanning this region. In this case, _finger points to 3478 // the address where we last found a marked object. If this is a 3479 // fresh region, _finger points to start(). 3480 MemRegion mr = MemRegion(_finger, _region_limit); 3481 3482 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3483 "humongous regions should go around loop once only"); 3484 3485 // Some special cases: 3486 // If the memory region is empty, we can just give up the region. 3487 // If the current region is humongous then we only need to check 3488 // the bitmap for the bit associated with the start of the object, 3489 // scan the object if it's live, and give up the region. 3490 // Otherwise, let's iterate over the bitmap of the part of the region 3491 // that is left. 3492 // If the iteration is successful, give up the region. 3493 if (mr.is_empty()) { 3494 giveup_current_region(); 3495 regular_clock_call(); 3496 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3497 if (_nextMarkBitMap->isMarked(mr.start())) { 3498 // The object is marked - apply the closure 3499 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3500 bitmap_closure.do_bit(offset); 3501 } 3502 // Even if this task aborted while scanning the humongous object 3503 // we can (and should) give up the current region. 3504 giveup_current_region(); 3505 regular_clock_call(); 3506 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3507 giveup_current_region(); 3508 regular_clock_call(); 3509 } else { 3510 assert(has_aborted(), "currently the only way to do so"); 3511 // The only way to abort the bitmap iteration is to return 3512 // false from the do_bit() method. However, inside the 3513 // do_bit() method we move the _finger to point to the 3514 // object currently being looked at. So, if we bail out, we 3515 // have definitely set _finger to something non-null. 3516 assert(_finger != NULL, "invariant"); 3517 3518 // Region iteration was actually aborted. So now _finger 3519 // points to the address of the object we last scanned. If we 3520 // leave it there, when we restart this task, we will rescan 3521 // the object. It is easy to avoid this. We move the finger by 3522 // enough to point to the next possible object header (the 3523 // bitmap knows by how much we need to move it as it knows its 3524 // granularity). 3525 assert(_finger < _region_limit, "invariant"); 3526 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3527 // Check if bitmap iteration was aborted while scanning the last object 3528 if (new_finger >= _region_limit) { 3529 giveup_current_region(); 3530 } else { 3531 move_finger_to(new_finger); 3532 } 3533 } 3534 } 3535 // At this point we have either completed iterating over the 3536 // region we were holding on to, or we have aborted. 3537 3538 // We then partially drain the local queue and the global stack. 3539 // (Do we really need this?) 3540 drain_local_queue(true); 3541 drain_global_stack(true); 3542 3543 // Read the note on the claim_region() method on why it might 3544 // return NULL with potentially more regions available for 3545 // claiming and why we have to check out_of_regions() to determine 3546 // whether we're done or not. 3547 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3548 // We are going to try to claim a new region. We should have 3549 // given up on the previous one. 3550 // Separated the asserts so that we know which one fires. 3551 assert(_curr_region == NULL, "invariant"); 3552 assert(_finger == NULL, "invariant"); 3553 assert(_region_limit == NULL, "invariant"); 3554 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3555 if (claimed_region != NULL) { 3556 // Yes, we managed to claim one 3557 setup_for_region(claimed_region); 3558 assert(_curr_region == claimed_region, "invariant"); 3559 } 3560 // It is important to call the regular clock here. It might take 3561 // a while to claim a region if, for example, we hit a large 3562 // block of empty regions. So we need to call the regular clock 3563 // method once round the loop to make sure it's called 3564 // frequently enough. 3565 regular_clock_call(); 3566 } 3567 3568 if (!has_aborted() && _curr_region == NULL) { 3569 assert(_cm->out_of_regions(), 3570 "at this point we should be out of regions"); 3571 } 3572 } while ( _curr_region != NULL && !has_aborted()); 3573 3574 if (!has_aborted()) { 3575 // We cannot check whether the global stack is empty, since other 3576 // tasks might be pushing objects to it concurrently. 3577 assert(_cm->out_of_regions(), 3578 "at this point we should be out of regions"); 3579 // Try to reduce the number of available SATB buffers so that 3580 // remark has less work to do. 3581 drain_satb_buffers(); 3582 } 3583 3584 // Since we've done everything else, we can now totally drain the 3585 // local queue and global stack. 3586 drain_local_queue(false); 3587 drain_global_stack(false); 3588 3589 // Attempt at work stealing from other task's queues. 3590 if (do_stealing && !has_aborted()) { 3591 // We have not aborted. This means that we have finished all that 3592 // we could. Let's try to do some stealing... 3593 3594 // We cannot check whether the global stack is empty, since other 3595 // tasks might be pushing objects to it concurrently. 3596 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3597 "only way to reach here"); 3598 while (!has_aborted()) { 3599 oop obj; 3600 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3601 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3602 "any stolen object should be marked"); 3603 scan_object(obj); 3604 3605 // And since we're towards the end, let's totally drain the 3606 // local queue and global stack. 3607 drain_local_queue(false); 3608 drain_global_stack(false); 3609 } else { 3610 break; 3611 } 3612 } 3613 } 3614 3615 // If we are about to wrap up and go into termination, check if we 3616 // should raise the overflow flag. 3617 if (do_termination && !has_aborted()) { 3618 if (_cm->force_overflow()->should_force()) { 3619 _cm->set_has_overflown(); 3620 regular_clock_call(); 3621 } 3622 } 3623 3624 // We still haven't aborted. Now, let's try to get into the 3625 // termination protocol. 3626 if (do_termination && !has_aborted()) { 3627 // We cannot check whether the global stack is empty, since other 3628 // tasks might be concurrently pushing objects on it. 3629 // Separated the asserts so that we know which one fires. 3630 assert(_cm->out_of_regions(), "only way to reach here"); 3631 assert(_task_queue->size() == 0, "only way to reach here"); 3632 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3633 3634 // The CMTask class also extends the TerminatorTerminator class, 3635 // hence its should_exit_termination() method will also decide 3636 // whether to exit the termination protocol or not. 3637 bool finished = (is_serial || 3638 _cm->terminator()->offer_termination(this)); 3639 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3640 _termination_time_ms += 3641 termination_end_time_ms - _termination_start_time_ms; 3642 3643 if (finished) { 3644 // We're all done. 3645 3646 if (_worker_id == 0) { 3647 // let's allow task 0 to do this 3648 if (concurrent()) { 3649 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3650 // we need to set this to false before the next 3651 // safepoint. This way we ensure that the marking phase 3652 // doesn't observe any more heap expansions. 3653 _cm->clear_concurrent_marking_in_progress(); 3654 } 3655 } 3656 3657 // We can now guarantee that the global stack is empty, since 3658 // all other tasks have finished. We separated the guarantees so 3659 // that, if a condition is false, we can immediately find out 3660 // which one. 3661 guarantee(_cm->out_of_regions(), "only way to reach here"); 3662 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3663 guarantee(_task_queue->size() == 0, "only way to reach here"); 3664 guarantee(!_cm->has_overflown(), "only way to reach here"); 3665 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3666 } else { 3667 // Apparently there's more work to do. Let's abort this task. It 3668 // will restart it and we can hopefully find more things to do. 3669 set_has_aborted(); 3670 } 3671 } 3672 3673 // Mainly for debugging purposes to make sure that a pointer to the 3674 // closure which was statically allocated in this frame doesn't 3675 // escape it by accident. 3676 set_cm_oop_closure(NULL); 3677 double end_time_ms = os::elapsedVTime() * 1000.0; 3678 double elapsed_time_ms = end_time_ms - _start_time_ms; 3679 // Update the step history. 3680 _step_times_ms.add(elapsed_time_ms); 3681 3682 if (has_aborted()) { 3683 // The task was aborted for some reason. 3684 if (_has_timed_out) { 3685 double diff_ms = elapsed_time_ms - _time_target_ms; 3686 // Keep statistics of how well we did with respect to hitting 3687 // our target only if we actually timed out (if we aborted for 3688 // other reasons, then the results might get skewed). 3689 _marking_step_diffs_ms.add(diff_ms); 3690 } 3691 3692 if (_cm->has_overflown()) { 3693 // This is the interesting one. We aborted because a global 3694 // overflow was raised. This means we have to restart the 3695 // marking phase and start iterating over regions. However, in 3696 // order to do this we have to make sure that all tasks stop 3697 // what they are doing and re-initialize in a safe manner. We 3698 // will achieve this with the use of two barrier sync points. 3699 3700 if (!is_serial) { 3701 // We only need to enter the sync barrier if being called 3702 // from a parallel context 3703 _cm->enter_first_sync_barrier(_worker_id); 3704 3705 // When we exit this sync barrier we know that all tasks have 3706 // stopped doing marking work. So, it's now safe to 3707 // re-initialize our data structures. At the end of this method, 3708 // task 0 will clear the global data structures. 3709 } 3710 3711 // We clear the local state of this task... 3712 clear_region_fields(); 3713 3714 if (!is_serial) { 3715 // ...and enter the second barrier. 3716 _cm->enter_second_sync_barrier(_worker_id); 3717 } 3718 // At this point, if we're during the concurrent phase of 3719 // marking, everything has been re-initialized and we're 3720 // ready to restart. 3721 } 3722 } 3723 3724 _claimed = false; 3725 } 3726 3727 CMTask::CMTask(uint worker_id, 3728 ConcurrentMark* cm, 3729 size_t* marked_bytes, 3730 BitMap* card_bm, 3731 CMTaskQueue* task_queue, 3732 CMTaskQueueSet* task_queues) 3733 : _g1h(G1CollectedHeap::heap()), 3734 _worker_id(worker_id), _cm(cm), 3735 _claimed(false), 3736 _nextMarkBitMap(NULL), _hash_seed(17), 3737 _task_queue(task_queue), 3738 _task_queues(task_queues), 3739 _cm_oop_closure(NULL), 3740 _marked_bytes_array(marked_bytes), 3741 _card_bm(card_bm) { 3742 guarantee(task_queue != NULL, "invariant"); 3743 guarantee(task_queues != NULL, "invariant"); 3744 3745 _marking_step_diffs_ms.add(0.5); 3746 } 3747 3748 // These are formatting macros that are used below to ensure 3749 // consistent formatting. The *_H_* versions are used to format the 3750 // header for a particular value and they should be kept consistent 3751 // with the corresponding macro. Also note that most of the macros add 3752 // the necessary white space (as a prefix) which makes them a bit 3753 // easier to compose. 3754 3755 // All the output lines are prefixed with this string to be able to 3756 // identify them easily in a large log file. 3757 #define G1PPRL_LINE_PREFIX "###" 3758 3759 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3760 #ifdef _LP64 3761 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3762 #else // _LP64 3763 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3764 #endif // _LP64 3765 3766 // For per-region info 3767 #define G1PPRL_TYPE_FORMAT " %-4s" 3768 #define G1PPRL_TYPE_H_FORMAT " %4s" 3769 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3770 #define G1PPRL_BYTE_H_FORMAT " %9s" 3771 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3772 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3773 3774 // For summary info 3775 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3776 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3777 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3778 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3779 3780 G1PrintRegionLivenessInfoClosure:: 3781 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 3782 : _out(out), 3783 _total_used_bytes(0), _total_capacity_bytes(0), 3784 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3785 _hum_used_bytes(0), _hum_capacity_bytes(0), 3786 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3787 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3788 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3789 MemRegion g1_reserved = g1h->g1_reserved(); 3790 double now = os::elapsedTime(); 3791 3792 // Print the header of the output. 3793 _out->cr(); 3794 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3795 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 3796 G1PPRL_SUM_ADDR_FORMAT("reserved") 3797 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3798 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3799 HeapRegion::GrainBytes); 3800 _out->print_cr(G1PPRL_LINE_PREFIX); 3801 _out->print_cr(G1PPRL_LINE_PREFIX 3802 G1PPRL_TYPE_H_FORMAT 3803 G1PPRL_ADDR_BASE_H_FORMAT 3804 G1PPRL_BYTE_H_FORMAT 3805 G1PPRL_BYTE_H_FORMAT 3806 G1PPRL_BYTE_H_FORMAT 3807 G1PPRL_DOUBLE_H_FORMAT 3808 G1PPRL_BYTE_H_FORMAT 3809 G1PPRL_BYTE_H_FORMAT, 3810 "type", "address-range", 3811 "used", "prev-live", "next-live", "gc-eff", 3812 "remset", "code-roots"); 3813 _out->print_cr(G1PPRL_LINE_PREFIX 3814 G1PPRL_TYPE_H_FORMAT 3815 G1PPRL_ADDR_BASE_H_FORMAT 3816 G1PPRL_BYTE_H_FORMAT 3817 G1PPRL_BYTE_H_FORMAT 3818 G1PPRL_BYTE_H_FORMAT 3819 G1PPRL_DOUBLE_H_FORMAT 3820 G1PPRL_BYTE_H_FORMAT 3821 G1PPRL_BYTE_H_FORMAT, 3822 "", "", 3823 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3824 "(bytes)", "(bytes)"); 3825 } 3826 3827 // It takes as a parameter a reference to one of the _hum_* fields, it 3828 // deduces the corresponding value for a region in a humongous region 3829 // series (either the region size, or what's left if the _hum_* field 3830 // is < the region size), and updates the _hum_* field accordingly. 3831 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3832 size_t bytes = 0; 3833 // The > 0 check is to deal with the prev and next live bytes which 3834 // could be 0. 3835 if (*hum_bytes > 0) { 3836 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3837 *hum_bytes -= bytes; 3838 } 3839 return bytes; 3840 } 3841 3842 // It deduces the values for a region in a humongous region series 3843 // from the _hum_* fields and updates those accordingly. It assumes 3844 // that that _hum_* fields have already been set up from the "starts 3845 // humongous" region and we visit the regions in address order. 3846 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3847 size_t* capacity_bytes, 3848 size_t* prev_live_bytes, 3849 size_t* next_live_bytes) { 3850 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3851 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3852 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3853 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3854 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3855 } 3856 3857 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3858 const char* type = r->get_type_str(); 3859 HeapWord* bottom = r->bottom(); 3860 HeapWord* end = r->end(); 3861 size_t capacity_bytes = r->capacity(); 3862 size_t used_bytes = r->used(); 3863 size_t prev_live_bytes = r->live_bytes(); 3864 size_t next_live_bytes = r->next_live_bytes(); 3865 double gc_eff = r->gc_efficiency(); 3866 size_t remset_bytes = r->rem_set()->mem_size(); 3867 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3868 3869 if (r->is_starts_humongous()) { 3870 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3871 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3872 "they should have been zeroed after the last time we used them"); 3873 // Set up the _hum_* fields. 3874 _hum_capacity_bytes = capacity_bytes; 3875 _hum_used_bytes = used_bytes; 3876 _hum_prev_live_bytes = prev_live_bytes; 3877 _hum_next_live_bytes = next_live_bytes; 3878 get_hum_bytes(&used_bytes, &capacity_bytes, 3879 &prev_live_bytes, &next_live_bytes); 3880 end = bottom + HeapRegion::GrainWords; 3881 } else if (r->is_continues_humongous()) { 3882 get_hum_bytes(&used_bytes, &capacity_bytes, 3883 &prev_live_bytes, &next_live_bytes); 3884 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3885 } 3886 3887 _total_used_bytes += used_bytes; 3888 _total_capacity_bytes += capacity_bytes; 3889 _total_prev_live_bytes += prev_live_bytes; 3890 _total_next_live_bytes += next_live_bytes; 3891 _total_remset_bytes += remset_bytes; 3892 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3893 3894 // Print a line for this particular region. 3895 _out->print_cr(G1PPRL_LINE_PREFIX 3896 G1PPRL_TYPE_FORMAT 3897 G1PPRL_ADDR_BASE_FORMAT 3898 G1PPRL_BYTE_FORMAT 3899 G1PPRL_BYTE_FORMAT 3900 G1PPRL_BYTE_FORMAT 3901 G1PPRL_DOUBLE_FORMAT 3902 G1PPRL_BYTE_FORMAT 3903 G1PPRL_BYTE_FORMAT, 3904 type, p2i(bottom), p2i(end), 3905 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3906 remset_bytes, strong_code_roots_bytes); 3907 3908 return false; 3909 } 3910 3911 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3912 // add static memory usages to remembered set sizes 3913 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3914 // Print the footer of the output. 3915 _out->print_cr(G1PPRL_LINE_PREFIX); 3916 _out->print_cr(G1PPRL_LINE_PREFIX 3917 " SUMMARY" 3918 G1PPRL_SUM_MB_FORMAT("capacity") 3919 G1PPRL_SUM_MB_PERC_FORMAT("used") 3920 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3921 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3922 G1PPRL_SUM_MB_FORMAT("remset") 3923 G1PPRL_SUM_MB_FORMAT("code-roots"), 3924 bytes_to_mb(_total_capacity_bytes), 3925 bytes_to_mb(_total_used_bytes), 3926 perc(_total_used_bytes, _total_capacity_bytes), 3927 bytes_to_mb(_total_prev_live_bytes), 3928 perc(_total_prev_live_bytes, _total_capacity_bytes), 3929 bytes_to_mb(_total_next_live_bytes), 3930 perc(_total_next_live_bytes, _total_capacity_bytes), 3931 bytes_to_mb(_total_remset_bytes), 3932 bytes_to_mb(_total_strong_code_roots_bytes)); 3933 _out->cr(); 3934 }