1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 462 return MAX2((n_par_threads + 2) / 4, 1U); 463 } 464 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 466 _g1h(g1h), 467 _markBitMap1(), 468 _markBitMap2(), 469 _parallel_marking_threads(0), 470 _max_parallel_marking_threads(0), 471 _sleep_factor(0.0), 472 _marking_task_overhead(1.0), 473 _cleanup_sleep_factor(0.0), 474 _cleanup_task_overhead(1.0), 475 _cleanup_list("Cleanup List"), 476 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 477 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 478 CardTableModRefBS::card_shift, 479 false /* in_resource_area*/), 480 481 _prevMarkBitMap(&_markBitMap1), 482 _nextMarkBitMap(&_markBitMap2), 483 484 _markStack(this), 485 // _finger set in set_non_marking_state 486 487 _max_worker_id(ParallelGCThreads), 488 // _active_tasks set in set_non_marking_state 489 // _tasks set inside the constructor 490 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 491 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 492 493 _has_overflown(false), 494 _concurrent(false), 495 _has_aborted(false), 496 _restart_for_overflow(false), 497 _concurrent_marking_in_progress(false), 498 499 // _verbose_level set below 500 501 _init_times(), 502 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 503 _cleanup_times(), 504 _total_counting_time(0.0), 505 _total_rs_scrub_time(0.0), 506 507 _parallel_workers(NULL), 508 509 _count_card_bitmaps(NULL), 510 _count_marked_bytes(NULL), 511 _completed_initialization(false) { 512 513 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 514 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 515 516 // Create & start a ConcurrentMark thread. 517 _cmThread = new ConcurrentMarkThread(this); 518 assert(cmThread() != NULL, "CM Thread should have been created"); 519 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 520 if (_cmThread->osthread() == NULL) { 521 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 522 } 523 524 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 525 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 526 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 527 528 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 529 satb_qs.set_buffer_size(G1SATBBufferSize); 530 531 _root_regions.init(_g1h, this); 532 533 if (ConcGCThreads > ParallelGCThreads) { 534 warning("Can't have more ConcGCThreads (%u) " 535 "than ParallelGCThreads (%u).", 536 ConcGCThreads, ParallelGCThreads); 537 return; 538 } 539 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 540 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 541 // if both are set 542 _sleep_factor = 0.0; 543 _marking_task_overhead = 1.0; 544 } else if (G1MarkingOverheadPercent > 0) { 545 // We will calculate the number of parallel marking threads based 546 // on a target overhead with respect to the soft real-time goal 547 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 548 double overall_cm_overhead = 549 (double) MaxGCPauseMillis * marking_overhead / 550 (double) GCPauseIntervalMillis; 551 double cpu_ratio = 1.0 / (double) os::processor_count(); 552 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 553 double marking_task_overhead = 554 overall_cm_overhead / marking_thread_num * 555 (double) os::processor_count(); 556 double sleep_factor = 557 (1.0 - marking_task_overhead) / marking_task_overhead; 558 559 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 560 _sleep_factor = sleep_factor; 561 _marking_task_overhead = marking_task_overhead; 562 } else { 563 // Calculate the number of parallel marking threads by scaling 564 // the number of parallel GC threads. 565 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 566 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 567 _sleep_factor = 0.0; 568 _marking_task_overhead = 1.0; 569 } 570 571 assert(ConcGCThreads > 0, "Should have been set"); 572 _parallel_marking_threads = ConcGCThreads; 573 _max_parallel_marking_threads = _parallel_marking_threads; 574 575 if (parallel_marking_threads() > 1) { 576 _cleanup_task_overhead = 1.0; 577 } else { 578 _cleanup_task_overhead = marking_task_overhead(); 579 } 580 _cleanup_sleep_factor = 581 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 582 583 #if 0 584 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 585 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 586 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 587 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 588 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 589 #endif 590 591 _parallel_workers = new WorkGang("G1 Marker", 592 _max_parallel_marking_threads, false, true); 593 if (_parallel_workers == NULL) { 594 vm_exit_during_initialization("Failed necessary allocation."); 595 } else { 596 _parallel_workers->initialize_workers(); 597 } 598 599 if (FLAG_IS_DEFAULT(MarkStackSize)) { 600 size_t mark_stack_size = 601 MIN2(MarkStackSizeMax, 602 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 603 // Verify that the calculated value for MarkStackSize is in range. 604 // It would be nice to use the private utility routine from Arguments. 605 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 606 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 607 "must be between 1 and " SIZE_FORMAT, 608 mark_stack_size, MarkStackSizeMax); 609 return; 610 } 611 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 612 } else { 613 // Verify MarkStackSize is in range. 614 if (FLAG_IS_CMDLINE(MarkStackSize)) { 615 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 616 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 617 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 618 "must be between 1 and " SIZE_FORMAT, 619 MarkStackSize, MarkStackSizeMax); 620 return; 621 } 622 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 623 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 624 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 625 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 626 MarkStackSize, MarkStackSizeMax); 627 return; 628 } 629 } 630 } 631 } 632 633 if (!_markStack.allocate(MarkStackSize)) { 634 warning("Failed to allocate CM marking stack"); 635 return; 636 } 637 638 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 639 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 640 641 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 642 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 643 644 BitMap::idx_t card_bm_size = _card_bm.size(); 645 646 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 647 _active_tasks = _max_worker_id; 648 649 uint max_regions = _g1h->max_regions(); 650 for (uint i = 0; i < _max_worker_id; ++i) { 651 CMTaskQueue* task_queue = new CMTaskQueue(); 652 task_queue->initialize(); 653 _task_queues->register_queue(i, task_queue); 654 655 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 656 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 657 658 _tasks[i] = new CMTask(i, this, 659 _count_marked_bytes[i], 660 &_count_card_bitmaps[i], 661 task_queue, _task_queues); 662 663 _accum_task_vtime[i] = 0.0; 664 } 665 666 // Calculate the card number for the bottom of the heap. Used 667 // in biasing indexes into the accounting card bitmaps. 668 _heap_bottom_card_num = 669 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 670 CardTableModRefBS::card_shift); 671 672 // Clear all the liveness counting data 673 clear_all_count_data(); 674 675 // so that the call below can read a sensible value 676 _heap_start = g1h->reserved_region().start(); 677 set_non_marking_state(); 678 _completed_initialization = true; 679 } 680 681 void ConcurrentMark::reset() { 682 // Starting values for these two. This should be called in a STW 683 // phase. 684 MemRegion reserved = _g1h->g1_reserved(); 685 _heap_start = reserved.start(); 686 _heap_end = reserved.end(); 687 688 // Separated the asserts so that we know which one fires. 689 assert(_heap_start != NULL, "heap bounds should look ok"); 690 assert(_heap_end != NULL, "heap bounds should look ok"); 691 assert(_heap_start < _heap_end, "heap bounds should look ok"); 692 693 // Reset all the marking data structures and any necessary flags 694 reset_marking_state(); 695 696 // We do reset all of them, since different phases will use 697 // different number of active threads. So, it's easiest to have all 698 // of them ready. 699 for (uint i = 0; i < _max_worker_id; ++i) { 700 _tasks[i]->reset(_nextMarkBitMap); 701 } 702 703 // we need this to make sure that the flag is on during the evac 704 // pause with initial mark piggy-backed 705 set_concurrent_marking_in_progress(); 706 } 707 708 709 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 710 _markStack.set_should_expand(); 711 _markStack.setEmpty(); // Also clears the _markStack overflow flag 712 if (clear_overflow) { 713 clear_has_overflown(); 714 } else { 715 assert(has_overflown(), "pre-condition"); 716 } 717 _finger = _heap_start; 718 719 for (uint i = 0; i < _max_worker_id; ++i) { 720 CMTaskQueue* queue = _task_queues->queue(i); 721 queue->set_empty(); 722 } 723 } 724 725 void ConcurrentMark::set_concurrency(uint active_tasks) { 726 assert(active_tasks <= _max_worker_id, "we should not have more"); 727 728 _active_tasks = active_tasks; 729 // Need to update the three data structures below according to the 730 // number of active threads for this phase. 731 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 732 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 733 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 734 } 735 736 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 737 set_concurrency(active_tasks); 738 739 _concurrent = concurrent; 740 // We propagate this to all tasks, not just the active ones. 741 for (uint i = 0; i < _max_worker_id; ++i) 742 _tasks[i]->set_concurrent(concurrent); 743 744 if (concurrent) { 745 set_concurrent_marking_in_progress(); 746 } else { 747 // We currently assume that the concurrent flag has been set to 748 // false before we start remark. At this point we should also be 749 // in a STW phase. 750 assert(!concurrent_marking_in_progress(), "invariant"); 751 assert(out_of_regions(), 752 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 753 p2i(_finger), p2i(_heap_end)); 754 } 755 } 756 757 void ConcurrentMark::set_non_marking_state() { 758 // We set the global marking state to some default values when we're 759 // not doing marking. 760 reset_marking_state(); 761 _active_tasks = 0; 762 clear_concurrent_marking_in_progress(); 763 } 764 765 ConcurrentMark::~ConcurrentMark() { 766 // The ConcurrentMark instance is never freed. 767 ShouldNotReachHere(); 768 } 769 770 void ConcurrentMark::clearNextBitmap() { 771 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 772 773 // Make sure that the concurrent mark thread looks to still be in 774 // the current cycle. 775 guarantee(cmThread()->during_cycle(), "invariant"); 776 777 // We are finishing up the current cycle by clearing the next 778 // marking bitmap and getting it ready for the next cycle. During 779 // this time no other cycle can start. So, let's make sure that this 780 // is the case. 781 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 782 783 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 784 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 785 _parallel_workers->run_task(&task); 786 787 // Clear the liveness counting data. If the marking has been aborted, the abort() 788 // call already did that. 789 if (cl.complete()) { 790 clear_all_count_data(); 791 } 792 793 // Repeat the asserts from above. 794 guarantee(cmThread()->during_cycle(), "invariant"); 795 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 796 } 797 798 class CheckBitmapClearHRClosure : public HeapRegionClosure { 799 CMBitMap* _bitmap; 800 bool _error; 801 public: 802 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 803 } 804 805 virtual bool doHeapRegion(HeapRegion* r) { 806 // This closure can be called concurrently to the mutator, so we must make sure 807 // that the result of the getNextMarkedWordAddress() call is compared to the 808 // value passed to it as limit to detect any found bits. 809 HeapWord* end = r->end(); 810 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 811 } 812 }; 813 814 bool ConcurrentMark::nextMarkBitmapIsClear() { 815 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 816 _g1h->heap_region_iterate(&cl); 817 return cl.complete(); 818 } 819 820 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 821 public: 822 bool doHeapRegion(HeapRegion* r) { 823 r->note_start_of_marking(); 824 return false; 825 } 826 }; 827 828 void ConcurrentMark::checkpointRootsInitialPre() { 829 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 830 G1CollectorPolicy* g1p = g1h->g1_policy(); 831 832 _has_aborted = false; 833 834 // Initialize marking structures. This has to be done in a STW phase. 835 reset(); 836 837 // For each region note start of marking. 838 NoteStartOfMarkHRClosure startcl; 839 g1h->heap_region_iterate(&startcl); 840 } 841 842 843 void ConcurrentMark::checkpointRootsInitialPost() { 844 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 845 846 // If we force an overflow during remark, the remark operation will 847 // actually abort and we'll restart concurrent marking. If we always 848 // force an overflow during remark we'll never actually complete the 849 // marking phase. So, we initialize this here, at the start of the 850 // cycle, so that at the remaining overflow number will decrease at 851 // every remark and we'll eventually not need to cause one. 852 force_overflow_stw()->init(); 853 854 // Start Concurrent Marking weak-reference discovery. 855 ReferenceProcessor* rp = g1h->ref_processor_cm(); 856 // enable ("weak") refs discovery 857 rp->enable_discovery(); 858 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 859 860 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 861 // This is the start of the marking cycle, we're expected all 862 // threads to have SATB queues with active set to false. 863 satb_mq_set.set_active_all_threads(true, /* new active value */ 864 false /* expected_active */); 865 866 _root_regions.prepare_for_scan(); 867 868 // update_g1_committed() will be called at the end of an evac pause 869 // when marking is on. So, it's also called at the end of the 870 // initial-mark pause to update the heap end, if the heap expands 871 // during it. No need to call it here. 872 } 873 874 /* 875 * Notice that in the next two methods, we actually leave the STS 876 * during the barrier sync and join it immediately afterwards. If we 877 * do not do this, the following deadlock can occur: one thread could 878 * be in the barrier sync code, waiting for the other thread to also 879 * sync up, whereas another one could be trying to yield, while also 880 * waiting for the other threads to sync up too. 881 * 882 * Note, however, that this code is also used during remark and in 883 * this case we should not attempt to leave / enter the STS, otherwise 884 * we'll either hit an assert (debug / fastdebug) or deadlock 885 * (product). So we should only leave / enter the STS if we are 886 * operating concurrently. 887 * 888 * Because the thread that does the sync barrier has left the STS, it 889 * is possible to be suspended for a Full GC or an evacuation pause 890 * could occur. This is actually safe, since the entering the sync 891 * barrier is one of the last things do_marking_step() does, and it 892 * doesn't manipulate any data structures afterwards. 893 */ 894 895 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 896 bool barrier_aborted; 897 { 898 SuspendibleThreadSetLeaver sts_leave(concurrent()); 899 barrier_aborted = !_first_overflow_barrier_sync.enter(); 900 } 901 902 // at this point everyone should have synced up and not be doing any 903 // more work 904 905 if (barrier_aborted) { 906 // If the barrier aborted we ignore the overflow condition and 907 // just abort the whole marking phase as quickly as possible. 908 return; 909 } 910 911 // If we're executing the concurrent phase of marking, reset the marking 912 // state; otherwise the marking state is reset after reference processing, 913 // during the remark pause. 914 // If we reset here as a result of an overflow during the remark we will 915 // see assertion failures from any subsequent set_concurrency_and_phase() 916 // calls. 917 if (concurrent()) { 918 // let the task associated with with worker 0 do this 919 if (worker_id == 0) { 920 // task 0 is responsible for clearing the global data structures 921 // We should be here because of an overflow. During STW we should 922 // not clear the overflow flag since we rely on it being true when 923 // we exit this method to abort the pause and restart concurrent 924 // marking. 925 reset_marking_state(true /* clear_overflow */); 926 force_overflow()->update(); 927 928 if (G1Log::fine()) { 929 gclog_or_tty->gclog_stamp(); 930 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 931 } 932 } 933 } 934 935 // after this, each task should reset its own data structures then 936 // then go into the second barrier 937 } 938 939 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 940 SuspendibleThreadSetLeaver sts_leave(concurrent()); 941 _second_overflow_barrier_sync.enter(); 942 943 // at this point everything should be re-initialized and ready to go 944 } 945 946 #ifndef PRODUCT 947 void ForceOverflowSettings::init() { 948 _num_remaining = G1ConcMarkForceOverflow; 949 _force = false; 950 update(); 951 } 952 953 void ForceOverflowSettings::update() { 954 if (_num_remaining > 0) { 955 _num_remaining -= 1; 956 _force = true; 957 } else { 958 _force = false; 959 } 960 } 961 962 bool ForceOverflowSettings::should_force() { 963 if (_force) { 964 _force = false; 965 return true; 966 } else { 967 return false; 968 } 969 } 970 #endif // !PRODUCT 971 972 class CMConcurrentMarkingTask: public AbstractGangTask { 973 private: 974 ConcurrentMark* _cm; 975 ConcurrentMarkThread* _cmt; 976 977 public: 978 void work(uint worker_id) { 979 assert(Thread::current()->is_ConcurrentGC_thread(), 980 "this should only be done by a conc GC thread"); 981 ResourceMark rm; 982 983 double start_vtime = os::elapsedVTime(); 984 985 { 986 SuspendibleThreadSetJoiner sts_join; 987 988 assert(worker_id < _cm->active_tasks(), "invariant"); 989 CMTask* the_task = _cm->task(worker_id); 990 the_task->record_start_time(); 991 if (!_cm->has_aborted()) { 992 do { 993 double start_vtime_sec = os::elapsedVTime(); 994 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 995 996 the_task->do_marking_step(mark_step_duration_ms, 997 true /* do_termination */, 998 false /* is_serial*/); 999 1000 double end_vtime_sec = os::elapsedVTime(); 1001 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1002 _cm->clear_has_overflown(); 1003 1004 _cm->do_yield_check(worker_id); 1005 1006 jlong sleep_time_ms; 1007 if (!_cm->has_aborted() && the_task->has_aborted()) { 1008 sleep_time_ms = 1009 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1010 { 1011 SuspendibleThreadSetLeaver sts_leave; 1012 os::sleep(Thread::current(), sleep_time_ms, false); 1013 } 1014 } 1015 } while (!_cm->has_aborted() && the_task->has_aborted()); 1016 } 1017 the_task->record_end_time(); 1018 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1019 } 1020 1021 double end_vtime = os::elapsedVTime(); 1022 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1023 } 1024 1025 CMConcurrentMarkingTask(ConcurrentMark* cm, 1026 ConcurrentMarkThread* cmt) : 1027 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1028 1029 ~CMConcurrentMarkingTask() { } 1030 }; 1031 1032 // Calculates the number of active workers for a concurrent 1033 // phase. 1034 uint ConcurrentMark::calc_parallel_marking_threads() { 1035 uint n_conc_workers = 0; 1036 if (!UseDynamicNumberOfGCThreads || 1037 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1038 !ForceDynamicNumberOfGCThreads)) { 1039 n_conc_workers = max_parallel_marking_threads(); 1040 } else { 1041 n_conc_workers = 1042 AdaptiveSizePolicy::calc_default_active_workers( 1043 max_parallel_marking_threads(), 1044 1, /* Minimum workers */ 1045 parallel_marking_threads(), 1046 Threads::number_of_non_daemon_threads()); 1047 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1048 // that scaling has already gone into "_max_parallel_marking_threads". 1049 } 1050 assert(n_conc_workers > 0, "Always need at least 1"); 1051 return n_conc_workers; 1052 } 1053 1054 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1055 // Currently, only survivors can be root regions. 1056 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1057 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1058 1059 const uintx interval = PrefetchScanIntervalInBytes; 1060 HeapWord* curr = hr->bottom(); 1061 const HeapWord* end = hr->top(); 1062 while (curr < end) { 1063 Prefetch::read(curr, interval); 1064 oop obj = oop(curr); 1065 int size = obj->oop_iterate_size(&cl); 1066 assert(size == obj->size(), "sanity"); 1067 curr += size; 1068 } 1069 } 1070 1071 class CMRootRegionScanTask : public AbstractGangTask { 1072 private: 1073 ConcurrentMark* _cm; 1074 1075 public: 1076 CMRootRegionScanTask(ConcurrentMark* cm) : 1077 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1078 1079 void work(uint worker_id) { 1080 assert(Thread::current()->is_ConcurrentGC_thread(), 1081 "this should only be done by a conc GC thread"); 1082 1083 CMRootRegions* root_regions = _cm->root_regions(); 1084 HeapRegion* hr = root_regions->claim_next(); 1085 while (hr != NULL) { 1086 _cm->scanRootRegion(hr, worker_id); 1087 hr = root_regions->claim_next(); 1088 } 1089 } 1090 }; 1091 1092 void ConcurrentMark::scanRootRegions() { 1093 double scan_start = os::elapsedTime(); 1094 1095 // Start of concurrent marking. 1096 ClassLoaderDataGraph::clear_claimed_marks(); 1097 1098 // scan_in_progress() will have been set to true only if there was 1099 // at least one root region to scan. So, if it's false, we 1100 // should not attempt to do any further work. 1101 if (root_regions()->scan_in_progress()) { 1102 if (G1Log::fine()) { 1103 gclog_or_tty->gclog_stamp(); 1104 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1105 } 1106 1107 _parallel_marking_threads = calc_parallel_marking_threads(); 1108 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1109 "Maximum number of marking threads exceeded"); 1110 uint active_workers = MAX2(1U, parallel_marking_threads()); 1111 1112 CMRootRegionScanTask task(this); 1113 _parallel_workers->set_active_workers(active_workers); 1114 _parallel_workers->run_task(&task); 1115 1116 if (G1Log::fine()) { 1117 gclog_or_tty->gclog_stamp(); 1118 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1119 } 1120 1121 // It's possible that has_aborted() is true here without actually 1122 // aborting the survivor scan earlier. This is OK as it's 1123 // mainly used for sanity checking. 1124 root_regions()->scan_finished(); 1125 } 1126 } 1127 1128 void ConcurrentMark::markFromRoots() { 1129 // we might be tempted to assert that: 1130 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1131 // "inconsistent argument?"); 1132 // However that wouldn't be right, because it's possible that 1133 // a safepoint is indeed in progress as a younger generation 1134 // stop-the-world GC happens even as we mark in this generation. 1135 1136 _restart_for_overflow = false; 1137 force_overflow_conc()->init(); 1138 1139 // _g1h has _n_par_threads 1140 _parallel_marking_threads = calc_parallel_marking_threads(); 1141 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1142 "Maximum number of marking threads exceeded"); 1143 1144 uint active_workers = MAX2(1U, parallel_marking_threads()); 1145 assert(active_workers > 0, "Should have been set"); 1146 1147 // Parallel task terminator is set in "set_concurrency_and_phase()" 1148 set_concurrency_and_phase(active_workers, true /* concurrent */); 1149 1150 CMConcurrentMarkingTask markingTask(this, cmThread()); 1151 _parallel_workers->set_active_workers(active_workers); 1152 _parallel_workers->run_task(&markingTask); 1153 print_stats(); 1154 } 1155 1156 // Helper class to get rid of some boilerplate code. 1157 class G1CMTraceTime : public StackObj { 1158 GCTraceTimeImpl _gc_trace_time; 1159 static bool doit_and_prepend(bool doit) { 1160 if (doit) { 1161 gclog_or_tty->put(' '); 1162 } 1163 return doit; 1164 } 1165 1166 public: 1167 G1CMTraceTime(const char* title, bool doit) 1168 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1169 } 1170 }; 1171 1172 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1173 // world is stopped at this checkpoint 1174 assert(SafepointSynchronize::is_at_safepoint(), 1175 "world should be stopped"); 1176 1177 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1178 1179 // If a full collection has happened, we shouldn't do this. 1180 if (has_aborted()) { 1181 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1182 return; 1183 } 1184 1185 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1186 1187 if (VerifyDuringGC) { 1188 HandleMark hm; // handle scope 1189 g1h->prepare_for_verify(); 1190 Universe::verify(VerifyOption_G1UsePrevMarking, 1191 " VerifyDuringGC:(before)"); 1192 } 1193 g1h->check_bitmaps("Remark Start"); 1194 1195 G1CollectorPolicy* g1p = g1h->g1_policy(); 1196 g1p->record_concurrent_mark_remark_start(); 1197 1198 double start = os::elapsedTime(); 1199 1200 checkpointRootsFinalWork(); 1201 1202 double mark_work_end = os::elapsedTime(); 1203 1204 weakRefsWork(clear_all_soft_refs); 1205 1206 if (has_overflown()) { 1207 // Oops. We overflowed. Restart concurrent marking. 1208 _restart_for_overflow = true; 1209 if (G1TraceMarkStackOverflow) { 1210 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1211 } 1212 1213 // Verify the heap w.r.t. the previous marking bitmap. 1214 if (VerifyDuringGC) { 1215 HandleMark hm; // handle scope 1216 g1h->prepare_for_verify(); 1217 Universe::verify(VerifyOption_G1UsePrevMarking, 1218 " VerifyDuringGC:(overflow)"); 1219 } 1220 1221 // Clear the marking state because we will be restarting 1222 // marking due to overflowing the global mark stack. 1223 reset_marking_state(); 1224 } else { 1225 { 1226 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1227 1228 // Aggregate the per-task counting data that we have accumulated 1229 // while marking. 1230 aggregate_count_data(); 1231 } 1232 1233 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1234 // We're done with marking. 1235 // This is the end of the marking cycle, we're expected all 1236 // threads to have SATB queues with active set to true. 1237 satb_mq_set.set_active_all_threads(false, /* new active value */ 1238 true /* expected_active */); 1239 1240 if (VerifyDuringGC) { 1241 HandleMark hm; // handle scope 1242 g1h->prepare_for_verify(); 1243 Universe::verify(VerifyOption_G1UseNextMarking, 1244 " VerifyDuringGC:(after)"); 1245 } 1246 g1h->check_bitmaps("Remark End"); 1247 assert(!restart_for_overflow(), "sanity"); 1248 // Completely reset the marking state since marking completed 1249 set_non_marking_state(); 1250 } 1251 1252 // Expand the marking stack, if we have to and if we can. 1253 if (_markStack.should_expand()) { 1254 _markStack.expand(); 1255 } 1256 1257 // Statistics 1258 double now = os::elapsedTime(); 1259 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1260 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1261 _remark_times.add((now - start) * 1000.0); 1262 1263 g1p->record_concurrent_mark_remark_end(); 1264 1265 G1CMIsAliveClosure is_alive(g1h); 1266 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1267 } 1268 1269 // Base class of the closures that finalize and verify the 1270 // liveness counting data. 1271 class CMCountDataClosureBase: public HeapRegionClosure { 1272 protected: 1273 G1CollectedHeap* _g1h; 1274 ConcurrentMark* _cm; 1275 CardTableModRefBS* _ct_bs; 1276 1277 BitMap* _region_bm; 1278 BitMap* _card_bm; 1279 1280 // Takes a region that's not empty (i.e., it has at least one 1281 // live object in it and sets its corresponding bit on the region 1282 // bitmap to 1. 1283 void set_bit_for_region(HeapRegion* hr) { 1284 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1285 _region_bm->par_at_put(index, true); 1286 } 1287 1288 public: 1289 CMCountDataClosureBase(G1CollectedHeap* g1h, 1290 BitMap* region_bm, BitMap* card_bm): 1291 _g1h(g1h), _cm(g1h->concurrent_mark()), 1292 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1293 _region_bm(region_bm), _card_bm(card_bm) { } 1294 }; 1295 1296 // Closure that calculates the # live objects per region. Used 1297 // for verification purposes during the cleanup pause. 1298 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1299 CMBitMapRO* _bm; 1300 size_t _region_marked_bytes; 1301 1302 public: 1303 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1304 BitMap* region_bm, BitMap* card_bm) : 1305 CMCountDataClosureBase(g1h, region_bm, card_bm), 1306 _bm(bm), _region_marked_bytes(0) { } 1307 1308 bool doHeapRegion(HeapRegion* hr) { 1309 HeapWord* ntams = hr->next_top_at_mark_start(); 1310 HeapWord* start = hr->bottom(); 1311 1312 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1313 "Preconditions not met - " 1314 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1315 p2i(start), p2i(ntams), p2i(hr->end())); 1316 1317 // Find the first marked object at or after "start". 1318 start = _bm->getNextMarkedWordAddress(start, ntams); 1319 1320 size_t marked_bytes = 0; 1321 1322 while (start < ntams) { 1323 oop obj = oop(start); 1324 int obj_sz = obj->size(); 1325 HeapWord* obj_end = start + obj_sz; 1326 1327 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1328 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1329 1330 // Note: if we're looking at the last region in heap - obj_end 1331 // could be actually just beyond the end of the heap; end_idx 1332 // will then correspond to a (non-existent) card that is also 1333 // just beyond the heap. 1334 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1335 // end of object is not card aligned - increment to cover 1336 // all the cards spanned by the object 1337 end_idx += 1; 1338 } 1339 1340 // Set the bits in the card BM for the cards spanned by this object. 1341 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1342 1343 // Add the size of this object to the number of marked bytes. 1344 marked_bytes += (size_t)obj_sz * HeapWordSize; 1345 1346 if (obj_end > hr->end()) { 1347 break; 1348 } 1349 // Find the next marked object after this one. 1350 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1351 } 1352 1353 // Mark the allocated-since-marking portion... 1354 HeapWord* top = hr->top(); 1355 if (ntams < top) { 1356 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1357 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1358 1359 // Note: if we're looking at the last region in heap - top 1360 // could be actually just beyond the end of the heap; end_idx 1361 // will then correspond to a (non-existent) card that is also 1362 // just beyond the heap. 1363 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1364 // end of object is not card aligned - increment to cover 1365 // all the cards spanned by the object 1366 end_idx += 1; 1367 } 1368 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1369 1370 // This definitely means the region has live objects. 1371 set_bit_for_region(hr); 1372 } 1373 1374 // Update the live region bitmap. 1375 if (marked_bytes > 0) { 1376 set_bit_for_region(hr); 1377 } 1378 1379 // Set the marked bytes for the current region so that 1380 // it can be queried by a calling verification routine 1381 _region_marked_bytes = marked_bytes; 1382 1383 return false; 1384 } 1385 1386 size_t region_marked_bytes() const { return _region_marked_bytes; } 1387 }; 1388 1389 // Heap region closure used for verifying the counting data 1390 // that was accumulated concurrently and aggregated during 1391 // the remark pause. This closure is applied to the heap 1392 // regions during the STW cleanup pause. 1393 1394 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1395 G1CollectedHeap* _g1h; 1396 ConcurrentMark* _cm; 1397 CalcLiveObjectsClosure _calc_cl; 1398 BitMap* _region_bm; // Region BM to be verified 1399 BitMap* _card_bm; // Card BM to be verified 1400 1401 BitMap* _exp_region_bm; // Expected Region BM values 1402 BitMap* _exp_card_bm; // Expected card BM values 1403 1404 int _failures; 1405 1406 public: 1407 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1408 BitMap* region_bm, 1409 BitMap* card_bm, 1410 BitMap* exp_region_bm, 1411 BitMap* exp_card_bm) : 1412 _g1h(g1h), _cm(g1h->concurrent_mark()), 1413 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1414 _region_bm(region_bm), _card_bm(card_bm), 1415 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1416 _failures(0) { } 1417 1418 int failures() const { return _failures; } 1419 1420 bool doHeapRegion(HeapRegion* hr) { 1421 int failures = 0; 1422 1423 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1424 // this region and set the corresponding bits in the expected region 1425 // and card bitmaps. 1426 bool res = _calc_cl.doHeapRegion(hr); 1427 assert(res == false, "should be continuing"); 1428 1429 // Verify the marked bytes for this region. 1430 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1431 size_t act_marked_bytes = hr->next_marked_bytes(); 1432 1433 // We're not OK if expected marked bytes > actual marked bytes. It means 1434 // we have missed accounting some objects during the actual marking. 1435 if (exp_marked_bytes > act_marked_bytes) { 1436 failures += 1; 1437 } 1438 1439 // Verify the bit, for this region, in the actual and expected 1440 // (which was just calculated) region bit maps. 1441 // We're not OK if the bit in the calculated expected region 1442 // bitmap is set and the bit in the actual region bitmap is not. 1443 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1444 1445 bool expected = _exp_region_bm->at(index); 1446 bool actual = _region_bm->at(index); 1447 if (expected && !actual) { 1448 failures += 1; 1449 } 1450 1451 // Verify that the card bit maps for the cards spanned by the current 1452 // region match. We have an error if we have a set bit in the expected 1453 // bit map and the corresponding bit in the actual bitmap is not set. 1454 1455 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1456 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1457 1458 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1459 expected = _exp_card_bm->at(i); 1460 actual = _card_bm->at(i); 1461 1462 if (expected && !actual) { 1463 failures += 1; 1464 } 1465 } 1466 1467 _failures += failures; 1468 1469 // We could stop iteration over the heap when we 1470 // find the first violating region by returning true. 1471 return false; 1472 } 1473 }; 1474 1475 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1476 protected: 1477 G1CollectedHeap* _g1h; 1478 ConcurrentMark* _cm; 1479 BitMap* _actual_region_bm; 1480 BitMap* _actual_card_bm; 1481 1482 uint _n_workers; 1483 1484 BitMap* _expected_region_bm; 1485 BitMap* _expected_card_bm; 1486 1487 int _failures; 1488 1489 HeapRegionClaimer _hrclaimer; 1490 1491 public: 1492 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1493 BitMap* region_bm, BitMap* card_bm, 1494 BitMap* expected_region_bm, BitMap* expected_card_bm) 1495 : AbstractGangTask("G1 verify final counting"), 1496 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1497 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1498 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1499 _failures(0), 1500 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1501 assert(VerifyDuringGC, "don't call this otherwise"); 1502 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1503 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1504 } 1505 1506 void work(uint worker_id) { 1507 assert(worker_id < _n_workers, "invariant"); 1508 1509 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1510 _actual_region_bm, _actual_card_bm, 1511 _expected_region_bm, 1512 _expected_card_bm); 1513 1514 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1515 1516 Atomic::add(verify_cl.failures(), &_failures); 1517 } 1518 1519 int failures() const { return _failures; } 1520 }; 1521 1522 // Closure that finalizes the liveness counting data. 1523 // Used during the cleanup pause. 1524 // Sets the bits corresponding to the interval [NTAMS, top] 1525 // (which contains the implicitly live objects) in the 1526 // card liveness bitmap. Also sets the bit for each region, 1527 // containing live data, in the region liveness bitmap. 1528 1529 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1530 public: 1531 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1532 BitMap* region_bm, 1533 BitMap* card_bm) : 1534 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1535 1536 bool doHeapRegion(HeapRegion* hr) { 1537 HeapWord* ntams = hr->next_top_at_mark_start(); 1538 HeapWord* top = hr->top(); 1539 1540 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1541 1542 // Mark the allocated-since-marking portion... 1543 if (ntams < top) { 1544 // This definitely means the region has live objects. 1545 set_bit_for_region(hr); 1546 1547 // Now set the bits in the card bitmap for [ntams, top) 1548 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1549 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1550 1551 // Note: if we're looking at the last region in heap - top 1552 // could be actually just beyond the end of the heap; end_idx 1553 // will then correspond to a (non-existent) card that is also 1554 // just beyond the heap. 1555 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1556 // end of object is not card aligned - increment to cover 1557 // all the cards spanned by the object 1558 end_idx += 1; 1559 } 1560 1561 assert(end_idx <= _card_bm->size(), 1562 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1563 end_idx, _card_bm->size()); 1564 assert(start_idx < _card_bm->size(), 1565 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1566 start_idx, _card_bm->size()); 1567 1568 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1569 } 1570 1571 // Set the bit for the region if it contains live data 1572 if (hr->next_marked_bytes() > 0) { 1573 set_bit_for_region(hr); 1574 } 1575 1576 return false; 1577 } 1578 }; 1579 1580 class G1ParFinalCountTask: public AbstractGangTask { 1581 protected: 1582 G1CollectedHeap* _g1h; 1583 ConcurrentMark* _cm; 1584 BitMap* _actual_region_bm; 1585 BitMap* _actual_card_bm; 1586 1587 uint _n_workers; 1588 HeapRegionClaimer _hrclaimer; 1589 1590 public: 1591 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1592 : AbstractGangTask("G1 final counting"), 1593 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1594 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1595 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1596 } 1597 1598 void work(uint worker_id) { 1599 assert(worker_id < _n_workers, "invariant"); 1600 1601 FinalCountDataUpdateClosure final_update_cl(_g1h, 1602 _actual_region_bm, 1603 _actual_card_bm); 1604 1605 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1606 } 1607 }; 1608 1609 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1610 G1CollectedHeap* _g1; 1611 size_t _freed_bytes; 1612 FreeRegionList* _local_cleanup_list; 1613 HeapRegionSetCount _old_regions_removed; 1614 HeapRegionSetCount _humongous_regions_removed; 1615 HRRSCleanupTask* _hrrs_cleanup_task; 1616 1617 public: 1618 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1619 FreeRegionList* local_cleanup_list, 1620 HRRSCleanupTask* hrrs_cleanup_task) : 1621 _g1(g1), 1622 _freed_bytes(0), 1623 _local_cleanup_list(local_cleanup_list), 1624 _old_regions_removed(), 1625 _humongous_regions_removed(), 1626 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1627 1628 size_t freed_bytes() { return _freed_bytes; } 1629 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1630 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1631 1632 bool doHeapRegion(HeapRegion *hr) { 1633 if (hr->is_archive()) { 1634 return false; 1635 } 1636 // We use a claim value of zero here because all regions 1637 // were claimed with value 1 in the FinalCount task. 1638 _g1->reset_gc_time_stamps(hr); 1639 hr->note_end_of_marking(); 1640 1641 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1642 _freed_bytes += hr->used(); 1643 hr->set_containing_set(NULL); 1644 if (hr->is_humongous()) { 1645 _humongous_regions_removed.increment(1u, hr->capacity()); 1646 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1647 } else { 1648 _old_regions_removed.increment(1u, hr->capacity()); 1649 _g1->free_region(hr, _local_cleanup_list, true); 1650 } 1651 } else { 1652 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1653 } 1654 1655 return false; 1656 } 1657 }; 1658 1659 class G1ParNoteEndTask: public AbstractGangTask { 1660 friend class G1NoteEndOfConcMarkClosure; 1661 1662 protected: 1663 G1CollectedHeap* _g1h; 1664 FreeRegionList* _cleanup_list; 1665 HeapRegionClaimer _hrclaimer; 1666 1667 public: 1668 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1669 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1670 } 1671 1672 void work(uint worker_id) { 1673 FreeRegionList local_cleanup_list("Local Cleanup List"); 1674 HRRSCleanupTask hrrs_cleanup_task; 1675 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1676 &hrrs_cleanup_task); 1677 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1678 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1679 1680 // Now update the lists 1681 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1682 { 1683 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1684 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1685 1686 // If we iterate over the global cleanup list at the end of 1687 // cleanup to do this printing we will not guarantee to only 1688 // generate output for the newly-reclaimed regions (the list 1689 // might not be empty at the beginning of cleanup; we might 1690 // still be working on its previous contents). So we do the 1691 // printing here, before we append the new regions to the global 1692 // cleanup list. 1693 1694 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1695 if (hr_printer->is_active()) { 1696 FreeRegionListIterator iter(&local_cleanup_list); 1697 while (iter.more_available()) { 1698 HeapRegion* hr = iter.get_next(); 1699 hr_printer->cleanup(hr); 1700 } 1701 } 1702 1703 _cleanup_list->add_ordered(&local_cleanup_list); 1704 assert(local_cleanup_list.is_empty(), "post-condition"); 1705 1706 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1707 } 1708 } 1709 }; 1710 1711 class G1ParScrubRemSetTask: public AbstractGangTask { 1712 protected: 1713 G1RemSet* _g1rs; 1714 BitMap* _region_bm; 1715 BitMap* _card_bm; 1716 HeapRegionClaimer _hrclaimer; 1717 1718 public: 1719 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1720 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1721 } 1722 1723 void work(uint worker_id) { 1724 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1725 } 1726 1727 }; 1728 1729 void ConcurrentMark::cleanup() { 1730 // world is stopped at this checkpoint 1731 assert(SafepointSynchronize::is_at_safepoint(), 1732 "world should be stopped"); 1733 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1734 1735 // If a full collection has happened, we shouldn't do this. 1736 if (has_aborted()) { 1737 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1738 return; 1739 } 1740 1741 g1h->verify_region_sets_optional(); 1742 1743 if (VerifyDuringGC) { 1744 HandleMark hm; // handle scope 1745 g1h->prepare_for_verify(); 1746 Universe::verify(VerifyOption_G1UsePrevMarking, 1747 " VerifyDuringGC:(before)"); 1748 } 1749 g1h->check_bitmaps("Cleanup Start"); 1750 1751 G1CollectorPolicy* g1p = g1h->g1_policy(); 1752 g1p->record_concurrent_mark_cleanup_start(); 1753 1754 double start = os::elapsedTime(); 1755 1756 HeapRegionRemSet::reset_for_cleanup_tasks(); 1757 1758 // Do counting once more with the world stopped for good measure. 1759 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1760 1761 g1h->workers()->run_task(&g1_par_count_task); 1762 1763 if (VerifyDuringGC) { 1764 // Verify that the counting data accumulated during marking matches 1765 // that calculated by walking the marking bitmap. 1766 1767 // Bitmaps to hold expected values 1768 BitMap expected_region_bm(_region_bm.size(), true); 1769 BitMap expected_card_bm(_card_bm.size(), true); 1770 1771 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1772 &_region_bm, 1773 &_card_bm, 1774 &expected_region_bm, 1775 &expected_card_bm); 1776 1777 g1h->workers()->run_task(&g1_par_verify_task); 1778 1779 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1780 } 1781 1782 size_t start_used_bytes = g1h->used(); 1783 g1h->collector_state()->set_mark_in_progress(false); 1784 1785 double count_end = os::elapsedTime(); 1786 double this_final_counting_time = (count_end - start); 1787 _total_counting_time += this_final_counting_time; 1788 1789 if (G1PrintRegionLivenessInfo) { 1790 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1791 _g1h->heap_region_iterate(&cl); 1792 } 1793 1794 // Install newly created mark bitMap as "prev". 1795 swapMarkBitMaps(); 1796 1797 g1h->reset_gc_time_stamp(); 1798 1799 uint n_workers = _g1h->workers()->active_workers(); 1800 1801 // Note end of marking in all heap regions. 1802 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1803 g1h->workers()->run_task(&g1_par_note_end_task); 1804 g1h->check_gc_time_stamps(); 1805 1806 if (!cleanup_list_is_empty()) { 1807 // The cleanup list is not empty, so we'll have to process it 1808 // concurrently. Notify anyone else that might be wanting free 1809 // regions that there will be more free regions coming soon. 1810 g1h->set_free_regions_coming(); 1811 } 1812 1813 // call below, since it affects the metric by which we sort the heap 1814 // regions. 1815 if (G1ScrubRemSets) { 1816 double rs_scrub_start = os::elapsedTime(); 1817 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1818 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1819 1820 double rs_scrub_end = os::elapsedTime(); 1821 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1822 _total_rs_scrub_time += this_rs_scrub_time; 1823 } 1824 1825 // this will also free any regions totally full of garbage objects, 1826 // and sort the regions. 1827 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1828 1829 // Statistics. 1830 double end = os::elapsedTime(); 1831 _cleanup_times.add((end - start) * 1000.0); 1832 1833 if (G1Log::fine()) { 1834 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1835 } 1836 1837 // Clean up will have freed any regions completely full of garbage. 1838 // Update the soft reference policy with the new heap occupancy. 1839 Universe::update_heap_info_at_gc(); 1840 1841 if (VerifyDuringGC) { 1842 HandleMark hm; // handle scope 1843 g1h->prepare_for_verify(); 1844 Universe::verify(VerifyOption_G1UsePrevMarking, 1845 " VerifyDuringGC:(after)"); 1846 } 1847 1848 g1h->check_bitmaps("Cleanup End"); 1849 1850 g1h->verify_region_sets_optional(); 1851 1852 // We need to make this be a "collection" so any collection pause that 1853 // races with it goes around and waits for completeCleanup to finish. 1854 g1h->increment_total_collections(); 1855 1856 // Clean out dead classes and update Metaspace sizes. 1857 if (ClassUnloadingWithConcurrentMark) { 1858 ClassLoaderDataGraph::purge(); 1859 } 1860 MetaspaceGC::compute_new_size(); 1861 1862 // We reclaimed old regions so we should calculate the sizes to make 1863 // sure we update the old gen/space data. 1864 g1h->g1mm()->update_sizes(); 1865 g1h->allocation_context_stats().update_after_mark(); 1866 1867 g1h->trace_heap_after_concurrent_cycle(); 1868 } 1869 1870 void ConcurrentMark::completeCleanup() { 1871 if (has_aborted()) return; 1872 1873 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1874 1875 _cleanup_list.verify_optional(); 1876 FreeRegionList tmp_free_list("Tmp Free List"); 1877 1878 if (G1ConcRegionFreeingVerbose) { 1879 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1880 "cleanup list has %u entries", 1881 _cleanup_list.length()); 1882 } 1883 1884 // No one else should be accessing the _cleanup_list at this point, 1885 // so it is not necessary to take any locks 1886 while (!_cleanup_list.is_empty()) { 1887 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1888 assert(hr != NULL, "Got NULL from a non-empty list"); 1889 hr->par_clear(); 1890 tmp_free_list.add_ordered(hr); 1891 1892 // Instead of adding one region at a time to the secondary_free_list, 1893 // we accumulate them in the local list and move them a few at a 1894 // time. This also cuts down on the number of notify_all() calls 1895 // we do during this process. We'll also append the local list when 1896 // _cleanup_list is empty (which means we just removed the last 1897 // region from the _cleanup_list). 1898 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1899 _cleanup_list.is_empty()) { 1900 if (G1ConcRegionFreeingVerbose) { 1901 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1902 "appending %u entries to the secondary_free_list, " 1903 "cleanup list still has %u entries", 1904 tmp_free_list.length(), 1905 _cleanup_list.length()); 1906 } 1907 1908 { 1909 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1910 g1h->secondary_free_list_add(&tmp_free_list); 1911 SecondaryFreeList_lock->notify_all(); 1912 } 1913 #ifndef PRODUCT 1914 if (G1StressConcRegionFreeing) { 1915 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1916 os::sleep(Thread::current(), (jlong) 1, false); 1917 } 1918 } 1919 #endif 1920 } 1921 } 1922 assert(tmp_free_list.is_empty(), "post-condition"); 1923 } 1924 1925 // Supporting Object and Oop closures for reference discovery 1926 // and processing in during marking 1927 1928 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1929 HeapWord* addr = (HeapWord*)obj; 1930 return addr != NULL && 1931 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1932 } 1933 1934 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1935 // Uses the CMTask associated with a worker thread (for serial reference 1936 // processing the CMTask for worker 0 is used) to preserve (mark) and 1937 // trace referent objects. 1938 // 1939 // Using the CMTask and embedded local queues avoids having the worker 1940 // threads operating on the global mark stack. This reduces the risk 1941 // of overflowing the stack - which we would rather avoid at this late 1942 // state. Also using the tasks' local queues removes the potential 1943 // of the workers interfering with each other that could occur if 1944 // operating on the global stack. 1945 1946 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1947 ConcurrentMark* _cm; 1948 CMTask* _task; 1949 int _ref_counter_limit; 1950 int _ref_counter; 1951 bool _is_serial; 1952 public: 1953 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1954 _cm(cm), _task(task), _is_serial(is_serial), 1955 _ref_counter_limit(G1RefProcDrainInterval) { 1956 assert(_ref_counter_limit > 0, "sanity"); 1957 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1958 _ref_counter = _ref_counter_limit; 1959 } 1960 1961 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1962 virtual void do_oop( oop* p) { do_oop_work(p); } 1963 1964 template <class T> void do_oop_work(T* p) { 1965 if (!_cm->has_overflown()) { 1966 oop obj = oopDesc::load_decode_heap_oop(p); 1967 _task->deal_with_reference(obj); 1968 _ref_counter--; 1969 1970 if (_ref_counter == 0) { 1971 // We have dealt with _ref_counter_limit references, pushing them 1972 // and objects reachable from them on to the local stack (and 1973 // possibly the global stack). Call CMTask::do_marking_step() to 1974 // process these entries. 1975 // 1976 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1977 // there's nothing more to do (i.e. we're done with the entries that 1978 // were pushed as a result of the CMTask::deal_with_reference() calls 1979 // above) or we overflow. 1980 // 1981 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1982 // flag while there may still be some work to do. (See the comment at 1983 // the beginning of CMTask::do_marking_step() for those conditions - 1984 // one of which is reaching the specified time target.) It is only 1985 // when CMTask::do_marking_step() returns without setting the 1986 // has_aborted() flag that the marking step has completed. 1987 do { 1988 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1989 _task->do_marking_step(mark_step_duration_ms, 1990 false /* do_termination */, 1991 _is_serial); 1992 } while (_task->has_aborted() && !_cm->has_overflown()); 1993 _ref_counter = _ref_counter_limit; 1994 } 1995 } 1996 } 1997 }; 1998 1999 // 'Drain' oop closure used by both serial and parallel reference processing. 2000 // Uses the CMTask associated with a given worker thread (for serial 2001 // reference processing the CMtask for worker 0 is used). Calls the 2002 // do_marking_step routine, with an unbelievably large timeout value, 2003 // to drain the marking data structures of the remaining entries 2004 // added by the 'keep alive' oop closure above. 2005 2006 class G1CMDrainMarkingStackClosure: public VoidClosure { 2007 ConcurrentMark* _cm; 2008 CMTask* _task; 2009 bool _is_serial; 2010 public: 2011 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2012 _cm(cm), _task(task), _is_serial(is_serial) { 2013 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2014 } 2015 2016 void do_void() { 2017 do { 2018 // We call CMTask::do_marking_step() to completely drain the local 2019 // and global marking stacks of entries pushed by the 'keep alive' 2020 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2021 // 2022 // CMTask::do_marking_step() is called in a loop, which we'll exit 2023 // if there's nothing more to do (i.e. we've completely drained the 2024 // entries that were pushed as a a result of applying the 'keep alive' 2025 // closure to the entries on the discovered ref lists) or we overflow 2026 // the global marking stack. 2027 // 2028 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2029 // flag while there may still be some work to do. (See the comment at 2030 // the beginning of CMTask::do_marking_step() for those conditions - 2031 // one of which is reaching the specified time target.) It is only 2032 // when CMTask::do_marking_step() returns without setting the 2033 // has_aborted() flag that the marking step has completed. 2034 2035 _task->do_marking_step(1000000000.0 /* something very large */, 2036 true /* do_termination */, 2037 _is_serial); 2038 } while (_task->has_aborted() && !_cm->has_overflown()); 2039 } 2040 }; 2041 2042 // Implementation of AbstractRefProcTaskExecutor for parallel 2043 // reference processing at the end of G1 concurrent marking 2044 2045 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2046 private: 2047 G1CollectedHeap* _g1h; 2048 ConcurrentMark* _cm; 2049 WorkGang* _workers; 2050 uint _active_workers; 2051 2052 public: 2053 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2054 ConcurrentMark* cm, 2055 WorkGang* workers, 2056 uint n_workers) : 2057 _g1h(g1h), _cm(cm), 2058 _workers(workers), _active_workers(n_workers) { } 2059 2060 // Executes the given task using concurrent marking worker threads. 2061 virtual void execute(ProcessTask& task); 2062 virtual void execute(EnqueueTask& task); 2063 }; 2064 2065 class G1CMRefProcTaskProxy: public AbstractGangTask { 2066 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2067 ProcessTask& _proc_task; 2068 G1CollectedHeap* _g1h; 2069 ConcurrentMark* _cm; 2070 2071 public: 2072 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2073 G1CollectedHeap* g1h, 2074 ConcurrentMark* cm) : 2075 AbstractGangTask("Process reference objects in parallel"), 2076 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2077 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2078 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2079 } 2080 2081 virtual void work(uint worker_id) { 2082 ResourceMark rm; 2083 HandleMark hm; 2084 CMTask* task = _cm->task(worker_id); 2085 G1CMIsAliveClosure g1_is_alive(_g1h); 2086 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2087 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2088 2089 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2090 } 2091 }; 2092 2093 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2094 assert(_workers != NULL, "Need parallel worker threads."); 2095 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2096 2097 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2098 2099 // We need to reset the concurrency level before each 2100 // proxy task execution, so that the termination protocol 2101 // and overflow handling in CMTask::do_marking_step() knows 2102 // how many workers to wait for. 2103 _cm->set_concurrency(_active_workers); 2104 _workers->run_task(&proc_task_proxy); 2105 } 2106 2107 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2108 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2109 EnqueueTask& _enq_task; 2110 2111 public: 2112 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2113 AbstractGangTask("Enqueue reference objects in parallel"), 2114 _enq_task(enq_task) { } 2115 2116 virtual void work(uint worker_id) { 2117 _enq_task.work(worker_id); 2118 } 2119 }; 2120 2121 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2122 assert(_workers != NULL, "Need parallel worker threads."); 2123 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2124 2125 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2126 2127 // Not strictly necessary but... 2128 // 2129 // We need to reset the concurrency level before each 2130 // proxy task execution, so that the termination protocol 2131 // and overflow handling in CMTask::do_marking_step() knows 2132 // how many workers to wait for. 2133 _cm->set_concurrency(_active_workers); 2134 _workers->run_task(&enq_task_proxy); 2135 } 2136 2137 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2138 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2139 } 2140 2141 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2142 if (has_overflown()) { 2143 // Skip processing the discovered references if we have 2144 // overflown the global marking stack. Reference objects 2145 // only get discovered once so it is OK to not 2146 // de-populate the discovered reference lists. We could have, 2147 // but the only benefit would be that, when marking restarts, 2148 // less reference objects are discovered. 2149 return; 2150 } 2151 2152 ResourceMark rm; 2153 HandleMark hm; 2154 2155 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2156 2157 // Is alive closure. 2158 G1CMIsAliveClosure g1_is_alive(g1h); 2159 2160 // Inner scope to exclude the cleaning of the string and symbol 2161 // tables from the displayed time. 2162 { 2163 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2164 2165 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2166 2167 // See the comment in G1CollectedHeap::ref_processing_init() 2168 // about how reference processing currently works in G1. 2169 2170 // Set the soft reference policy 2171 rp->setup_policy(clear_all_soft_refs); 2172 assert(_markStack.isEmpty(), "mark stack should be empty"); 2173 2174 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2175 // in serial reference processing. Note these closures are also 2176 // used for serially processing (by the the current thread) the 2177 // JNI references during parallel reference processing. 2178 // 2179 // These closures do not need to synchronize with the worker 2180 // threads involved in parallel reference processing as these 2181 // instances are executed serially by the current thread (e.g. 2182 // reference processing is not multi-threaded and is thus 2183 // performed by the current thread instead of a gang worker). 2184 // 2185 // The gang tasks involved in parallel reference processing create 2186 // their own instances of these closures, which do their own 2187 // synchronization among themselves. 2188 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2189 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2190 2191 // We need at least one active thread. If reference processing 2192 // is not multi-threaded we use the current (VMThread) thread, 2193 // otherwise we use the work gang from the G1CollectedHeap and 2194 // we utilize all the worker threads we can. 2195 bool processing_is_mt = rp->processing_is_mt(); 2196 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2197 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2198 2199 // Parallel processing task executor. 2200 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2201 g1h->workers(), active_workers); 2202 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2203 2204 // Set the concurrency level. The phase was already set prior to 2205 // executing the remark task. 2206 set_concurrency(active_workers); 2207 2208 // Set the degree of MT processing here. If the discovery was done MT, 2209 // the number of threads involved during discovery could differ from 2210 // the number of active workers. This is OK as long as the discovered 2211 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2212 rp->set_active_mt_degree(active_workers); 2213 2214 // Process the weak references. 2215 const ReferenceProcessorStats& stats = 2216 rp->process_discovered_references(&g1_is_alive, 2217 &g1_keep_alive, 2218 &g1_drain_mark_stack, 2219 executor, 2220 g1h->gc_timer_cm()); 2221 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2222 2223 // The do_oop work routines of the keep_alive and drain_marking_stack 2224 // oop closures will set the has_overflown flag if we overflow the 2225 // global marking stack. 2226 2227 assert(_markStack.overflow() || _markStack.isEmpty(), 2228 "mark stack should be empty (unless it overflowed)"); 2229 2230 if (_markStack.overflow()) { 2231 // This should have been done already when we tried to push an 2232 // entry on to the global mark stack. But let's do it again. 2233 set_has_overflown(); 2234 } 2235 2236 assert(rp->num_q() == active_workers, "why not"); 2237 2238 rp->enqueue_discovered_references(executor); 2239 2240 rp->verify_no_references_recorded(); 2241 assert(!rp->discovery_enabled(), "Post condition"); 2242 } 2243 2244 if (has_overflown()) { 2245 // We can not trust g1_is_alive if the marking stack overflowed 2246 return; 2247 } 2248 2249 assert(_markStack.isEmpty(), "Marking should have completed"); 2250 2251 // Unload Klasses, String, Symbols, Code Cache, etc. 2252 { 2253 G1CMTraceTime trace("Unloading", G1Log::finer()); 2254 2255 if (ClassUnloadingWithConcurrentMark) { 2256 bool purged_classes; 2257 2258 { 2259 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2260 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2261 } 2262 2263 { 2264 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2265 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2266 } 2267 } 2268 2269 if (G1StringDedup::is_enabled()) { 2270 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2271 G1StringDedup::unlink(&g1_is_alive); 2272 } 2273 } 2274 } 2275 2276 void ConcurrentMark::swapMarkBitMaps() { 2277 CMBitMapRO* temp = _prevMarkBitMap; 2278 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2279 _nextMarkBitMap = (CMBitMap*) temp; 2280 } 2281 2282 // Closure for marking entries in SATB buffers. 2283 class CMSATBBufferClosure : public SATBBufferClosure { 2284 private: 2285 CMTask* _task; 2286 G1CollectedHeap* _g1h; 2287 2288 // This is very similar to CMTask::deal_with_reference, but with 2289 // more relaxed requirements for the argument, so this must be more 2290 // circumspect about treating the argument as an object. 2291 void do_entry(void* entry) const { 2292 _task->increment_refs_reached(); 2293 HeapRegion* hr = _g1h->heap_region_containing(entry); 2294 if (entry < hr->next_top_at_mark_start()) { 2295 // Until we get here, we don't know whether entry refers to a valid 2296 // object; it could instead have been a stale reference. 2297 oop obj = static_cast<oop>(entry); 2298 assert(obj->is_oop(true /* ignore mark word */), 2299 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2300 _task->make_reference_grey(obj, hr); 2301 } 2302 } 2303 2304 public: 2305 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2306 : _task(task), _g1h(g1h) { } 2307 2308 virtual void do_buffer(void** buffer, size_t size) { 2309 for (size_t i = 0; i < size; ++i) { 2310 do_entry(buffer[i]); 2311 } 2312 } 2313 }; 2314 2315 class G1RemarkThreadsClosure : public ThreadClosure { 2316 CMSATBBufferClosure _cm_satb_cl; 2317 G1CMOopClosure _cm_cl; 2318 MarkingCodeBlobClosure _code_cl; 2319 int _thread_parity; 2320 2321 public: 2322 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2323 _cm_satb_cl(task, g1h), 2324 _cm_cl(g1h, g1h->concurrent_mark(), task), 2325 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2326 _thread_parity(Threads::thread_claim_parity()) {} 2327 2328 void do_thread(Thread* thread) { 2329 if (thread->is_Java_thread()) { 2330 if (thread->claim_oops_do(true, _thread_parity)) { 2331 JavaThread* jt = (JavaThread*)thread; 2332 2333 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2334 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2335 // * Alive if on the stack of an executing method 2336 // * Weakly reachable otherwise 2337 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2338 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2339 jt->nmethods_do(&_code_cl); 2340 2341 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2342 } 2343 } else if (thread->is_VM_thread()) { 2344 if (thread->claim_oops_do(true, _thread_parity)) { 2345 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2346 } 2347 } 2348 } 2349 }; 2350 2351 class CMRemarkTask: public AbstractGangTask { 2352 private: 2353 ConcurrentMark* _cm; 2354 public: 2355 void work(uint worker_id) { 2356 // Since all available tasks are actually started, we should 2357 // only proceed if we're supposed to be active. 2358 if (worker_id < _cm->active_tasks()) { 2359 CMTask* task = _cm->task(worker_id); 2360 task->record_start_time(); 2361 { 2362 ResourceMark rm; 2363 HandleMark hm; 2364 2365 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2366 Threads::threads_do(&threads_f); 2367 } 2368 2369 do { 2370 task->do_marking_step(1000000000.0 /* something very large */, 2371 true /* do_termination */, 2372 false /* is_serial */); 2373 } while (task->has_aborted() && !_cm->has_overflown()); 2374 // If we overflow, then we do not want to restart. We instead 2375 // want to abort remark and do concurrent marking again. 2376 task->record_end_time(); 2377 } 2378 } 2379 2380 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2381 AbstractGangTask("Par Remark"), _cm(cm) { 2382 _cm->terminator()->reset_for_reuse(active_workers); 2383 } 2384 }; 2385 2386 void ConcurrentMark::checkpointRootsFinalWork() { 2387 ResourceMark rm; 2388 HandleMark hm; 2389 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2390 2391 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2392 2393 g1h->ensure_parsability(false); 2394 2395 // this is remark, so we'll use up all active threads 2396 uint active_workers = g1h->workers()->active_workers(); 2397 set_concurrency_and_phase(active_workers, false /* concurrent */); 2398 // Leave _parallel_marking_threads at it's 2399 // value originally calculated in the ConcurrentMark 2400 // constructor and pass values of the active workers 2401 // through the gang in the task. 2402 2403 { 2404 StrongRootsScope srs(active_workers); 2405 2406 CMRemarkTask remarkTask(this, active_workers); 2407 // We will start all available threads, even if we decide that the 2408 // active_workers will be fewer. The extra ones will just bail out 2409 // immediately. 2410 g1h->workers()->run_task(&remarkTask); 2411 } 2412 2413 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2414 guarantee(has_overflown() || 2415 satb_mq_set.completed_buffers_num() == 0, 2416 "Invariant: has_overflown = %s, num buffers = %d", 2417 BOOL_TO_STR(has_overflown()), 2418 satb_mq_set.completed_buffers_num()); 2419 2420 print_stats(); 2421 } 2422 2423 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2424 // Note we are overriding the read-only view of the prev map here, via 2425 // the cast. 2426 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2427 } 2428 2429 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2430 _nextMarkBitMap->clearRange(mr); 2431 } 2432 2433 HeapRegion* 2434 ConcurrentMark::claim_region(uint worker_id) { 2435 // "checkpoint" the finger 2436 HeapWord* finger = _finger; 2437 2438 // _heap_end will not change underneath our feet; it only changes at 2439 // yield points. 2440 while (finger < _heap_end) { 2441 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2442 2443 // Note on how this code handles humongous regions. In the 2444 // normal case the finger will reach the start of a "starts 2445 // humongous" (SH) region. Its end will either be the end of the 2446 // last "continues humongous" (CH) region in the sequence, or the 2447 // standard end of the SH region (if the SH is the only region in 2448 // the sequence). That way claim_region() will skip over the CH 2449 // regions. However, there is a subtle race between a CM thread 2450 // executing this method and a mutator thread doing a humongous 2451 // object allocation. The two are not mutually exclusive as the CM 2452 // thread does not need to hold the Heap_lock when it gets 2453 // here. So there is a chance that claim_region() will come across 2454 // a free region that's in the progress of becoming a SH or a CH 2455 // region. In the former case, it will either 2456 // a) Miss the update to the region's end, in which case it will 2457 // visit every subsequent CH region, will find their bitmaps 2458 // empty, and do nothing, or 2459 // b) Will observe the update of the region's end (in which case 2460 // it will skip the subsequent CH regions). 2461 // If it comes across a region that suddenly becomes CH, the 2462 // scenario will be similar to b). So, the race between 2463 // claim_region() and a humongous object allocation might force us 2464 // to do a bit of unnecessary work (due to some unnecessary bitmap 2465 // iterations) but it should not introduce and correctness issues. 2466 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2467 2468 // Above heap_region_containing may return NULL as we always scan claim 2469 // until the end of the heap. In this case, just jump to the next region. 2470 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2471 2472 // Is the gap between reading the finger and doing the CAS too long? 2473 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2474 if (res == finger && curr_region != NULL) { 2475 // we succeeded 2476 HeapWord* bottom = curr_region->bottom(); 2477 HeapWord* limit = curr_region->next_top_at_mark_start(); 2478 2479 // notice that _finger == end cannot be guaranteed here since, 2480 // someone else might have moved the finger even further 2481 assert(_finger >= end, "the finger should have moved forward"); 2482 2483 if (limit > bottom) { 2484 return curr_region; 2485 } else { 2486 assert(limit == bottom, 2487 "the region limit should be at bottom"); 2488 // we return NULL and the caller should try calling 2489 // claim_region() again. 2490 return NULL; 2491 } 2492 } else { 2493 assert(_finger > finger, "the finger should have moved forward"); 2494 // read it again 2495 finger = _finger; 2496 } 2497 } 2498 2499 return NULL; 2500 } 2501 2502 #ifndef PRODUCT 2503 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2504 private: 2505 G1CollectedHeap* _g1h; 2506 const char* _phase; 2507 int _info; 2508 2509 public: 2510 VerifyNoCSetOops(const char* phase, int info = -1) : 2511 _g1h(G1CollectedHeap::heap()), 2512 _phase(phase), 2513 _info(info) 2514 { } 2515 2516 void operator()(oop obj) const { 2517 guarantee(obj->is_oop(), 2518 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2519 p2i(obj), _phase, _info); 2520 guarantee(!_g1h->obj_in_cs(obj), 2521 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2522 p2i(obj), _phase, _info); 2523 } 2524 }; 2525 2526 void ConcurrentMark::verify_no_cset_oops() { 2527 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2528 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2529 return; 2530 } 2531 2532 // Verify entries on the global mark stack 2533 _markStack.iterate(VerifyNoCSetOops("Stack")); 2534 2535 // Verify entries on the task queues 2536 for (uint i = 0; i < _max_worker_id; ++i) { 2537 CMTaskQueue* queue = _task_queues->queue(i); 2538 queue->iterate(VerifyNoCSetOops("Queue", i)); 2539 } 2540 2541 // Verify the global finger 2542 HeapWord* global_finger = finger(); 2543 if (global_finger != NULL && global_finger < _heap_end) { 2544 // The global finger always points to a heap region boundary. We 2545 // use heap_region_containing() to get the containing region 2546 // given that the global finger could be pointing to a free region 2547 // which subsequently becomes continues humongous. If that 2548 // happens, heap_region_containing() will return the bottom of the 2549 // corresponding starts humongous region and the check below will 2550 // not hold any more. 2551 // Since we always iterate over all regions, we might get a NULL HeapRegion 2552 // here. 2553 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2554 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2555 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2556 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2557 } 2558 2559 // Verify the task fingers 2560 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2561 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2562 CMTask* task = _tasks[i]; 2563 HeapWord* task_finger = task->finger(); 2564 if (task_finger != NULL && task_finger < _heap_end) { 2565 // See above note on the global finger verification. 2566 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2567 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2568 !task_hr->in_collection_set(), 2569 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2570 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2571 } 2572 } 2573 } 2574 #endif // PRODUCT 2575 2576 // Aggregate the counting data that was constructed concurrently 2577 // with marking. 2578 class AggregateCountDataHRClosure: public HeapRegionClosure { 2579 G1CollectedHeap* _g1h; 2580 ConcurrentMark* _cm; 2581 CardTableModRefBS* _ct_bs; 2582 BitMap* _cm_card_bm; 2583 uint _max_worker_id; 2584 2585 public: 2586 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2587 BitMap* cm_card_bm, 2588 uint max_worker_id) : 2589 _g1h(g1h), _cm(g1h->concurrent_mark()), 2590 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2591 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2592 2593 bool doHeapRegion(HeapRegion* hr) { 2594 HeapWord* start = hr->bottom(); 2595 HeapWord* limit = hr->next_top_at_mark_start(); 2596 HeapWord* end = hr->end(); 2597 2598 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2599 "Preconditions not met - " 2600 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2601 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2602 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2603 2604 assert(hr->next_marked_bytes() == 0, "Precondition"); 2605 2606 if (start == limit) { 2607 // NTAMS of this region has not been set so nothing to do. 2608 return false; 2609 } 2610 2611 // 'start' should be in the heap. 2612 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2613 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2614 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2615 2616 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2617 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2618 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2619 2620 // If ntams is not card aligned then we bump card bitmap index 2621 // for limit so that we get the all the cards spanned by 2622 // the object ending at ntams. 2623 // Note: if this is the last region in the heap then ntams 2624 // could be actually just beyond the end of the the heap; 2625 // limit_idx will then correspond to a (non-existent) card 2626 // that is also outside the heap. 2627 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2628 limit_idx += 1; 2629 } 2630 2631 assert(limit_idx <= end_idx, "or else use atomics"); 2632 2633 // Aggregate the "stripe" in the count data associated with hr. 2634 uint hrm_index = hr->hrm_index(); 2635 size_t marked_bytes = 0; 2636 2637 for (uint i = 0; i < _max_worker_id; i += 1) { 2638 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2639 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2640 2641 // Fetch the marked_bytes in this region for task i and 2642 // add it to the running total for this region. 2643 marked_bytes += marked_bytes_array[hrm_index]; 2644 2645 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2646 // into the global card bitmap. 2647 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2648 2649 while (scan_idx < limit_idx) { 2650 assert(task_card_bm->at(scan_idx) == true, "should be"); 2651 _cm_card_bm->set_bit(scan_idx); 2652 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2653 2654 // BitMap::get_next_one_offset() can handle the case when 2655 // its left_offset parameter is greater than its right_offset 2656 // parameter. It does, however, have an early exit if 2657 // left_offset == right_offset. So let's limit the value 2658 // passed in for left offset here. 2659 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2660 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2661 } 2662 } 2663 2664 // Update the marked bytes for this region. 2665 hr->add_to_marked_bytes(marked_bytes); 2666 2667 // Next heap region 2668 return false; 2669 } 2670 }; 2671 2672 class G1AggregateCountDataTask: public AbstractGangTask { 2673 protected: 2674 G1CollectedHeap* _g1h; 2675 ConcurrentMark* _cm; 2676 BitMap* _cm_card_bm; 2677 uint _max_worker_id; 2678 uint _active_workers; 2679 HeapRegionClaimer _hrclaimer; 2680 2681 public: 2682 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2683 ConcurrentMark* cm, 2684 BitMap* cm_card_bm, 2685 uint max_worker_id, 2686 uint n_workers) : 2687 AbstractGangTask("Count Aggregation"), 2688 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2689 _max_worker_id(max_worker_id), 2690 _active_workers(n_workers), 2691 _hrclaimer(_active_workers) { 2692 } 2693 2694 void work(uint worker_id) { 2695 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2696 2697 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2698 } 2699 }; 2700 2701 2702 void ConcurrentMark::aggregate_count_data() { 2703 uint n_workers = _g1h->workers()->active_workers(); 2704 2705 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2706 _max_worker_id, n_workers); 2707 2708 _g1h->workers()->run_task(&g1_par_agg_task); 2709 } 2710 2711 // Clear the per-worker arrays used to store the per-region counting data 2712 void ConcurrentMark::clear_all_count_data() { 2713 // Clear the global card bitmap - it will be filled during 2714 // liveness count aggregation (during remark) and the 2715 // final counting task. 2716 _card_bm.clear(); 2717 2718 // Clear the global region bitmap - it will be filled as part 2719 // of the final counting task. 2720 _region_bm.clear(); 2721 2722 uint max_regions = _g1h->max_regions(); 2723 assert(_max_worker_id > 0, "uninitialized"); 2724 2725 for (uint i = 0; i < _max_worker_id; i += 1) { 2726 BitMap* task_card_bm = count_card_bitmap_for(i); 2727 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2728 2729 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2730 assert(marked_bytes_array != NULL, "uninitialized"); 2731 2732 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2733 task_card_bm->clear(); 2734 } 2735 } 2736 2737 void ConcurrentMark::print_stats() { 2738 if (G1MarkingVerboseLevel > 0) { 2739 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2740 for (size_t i = 0; i < _active_tasks; ++i) { 2741 _tasks[i]->print_stats(); 2742 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2743 } 2744 } 2745 } 2746 2747 // abandon current marking iteration due to a Full GC 2748 void ConcurrentMark::abort() { 2749 if (!cmThread()->during_cycle() || _has_aborted) { 2750 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2751 return; 2752 } 2753 2754 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2755 // concurrent bitmap clearing. 2756 _nextMarkBitMap->clearAll(); 2757 2758 // Note we cannot clear the previous marking bitmap here 2759 // since VerifyDuringGC verifies the objects marked during 2760 // a full GC against the previous bitmap. 2761 2762 // Clear the liveness counting data 2763 clear_all_count_data(); 2764 // Empty mark stack 2765 reset_marking_state(); 2766 for (uint i = 0; i < _max_worker_id; ++i) { 2767 _tasks[i]->clear_region_fields(); 2768 } 2769 _first_overflow_barrier_sync.abort(); 2770 _second_overflow_barrier_sync.abort(); 2771 _has_aborted = true; 2772 2773 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2774 satb_mq_set.abandon_partial_marking(); 2775 // This can be called either during or outside marking, we'll read 2776 // the expected_active value from the SATB queue set. 2777 satb_mq_set.set_active_all_threads( 2778 false, /* new active value */ 2779 satb_mq_set.is_active() /* expected_active */); 2780 2781 _g1h->trace_heap_after_concurrent_cycle(); 2782 _g1h->register_concurrent_cycle_end(); 2783 } 2784 2785 static void print_ms_time_info(const char* prefix, const char* name, 2786 NumberSeq& ns) { 2787 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2788 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2789 if (ns.num() > 0) { 2790 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2791 prefix, ns.sd(), ns.maximum()); 2792 } 2793 } 2794 2795 void ConcurrentMark::print_summary_info() { 2796 gclog_or_tty->print_cr(" Concurrent marking:"); 2797 print_ms_time_info(" ", "init marks", _init_times); 2798 print_ms_time_info(" ", "remarks", _remark_times); 2799 { 2800 print_ms_time_info(" ", "final marks", _remark_mark_times); 2801 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2802 2803 } 2804 print_ms_time_info(" ", "cleanups", _cleanup_times); 2805 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2806 _total_counting_time, 2807 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2808 (double)_cleanup_times.num() 2809 : 0.0)); 2810 if (G1ScrubRemSets) { 2811 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2812 _total_rs_scrub_time, 2813 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2814 (double)_cleanup_times.num() 2815 : 0.0)); 2816 } 2817 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2818 (_init_times.sum() + _remark_times.sum() + 2819 _cleanup_times.sum())/1000.0); 2820 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2821 "(%8.2f s marking).", 2822 cmThread()->vtime_accum(), 2823 cmThread()->vtime_mark_accum()); 2824 } 2825 2826 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2827 _parallel_workers->print_worker_threads_on(st); 2828 } 2829 2830 void ConcurrentMark::print_on_error(outputStream* st) const { 2831 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2832 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2833 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2834 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2835 } 2836 2837 // We take a break if someone is trying to stop the world. 2838 bool ConcurrentMark::do_yield_check(uint worker_id) { 2839 if (SuspendibleThreadSet::should_yield()) { 2840 if (worker_id == 0) { 2841 _g1h->g1_policy()->record_concurrent_pause(); 2842 } 2843 SuspendibleThreadSet::yield(); 2844 return true; 2845 } else { 2846 return false; 2847 } 2848 } 2849 2850 // Closure for iteration over bitmaps 2851 class CMBitMapClosure : public BitMapClosure { 2852 private: 2853 // the bitmap that is being iterated over 2854 CMBitMap* _nextMarkBitMap; 2855 ConcurrentMark* _cm; 2856 CMTask* _task; 2857 2858 public: 2859 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2860 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2861 2862 bool do_bit(size_t offset) { 2863 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2864 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2865 assert( addr < _cm->finger(), "invariant"); 2866 assert(addr >= _task->finger(), "invariant"); 2867 2868 // We move that task's local finger along. 2869 _task->move_finger_to(addr); 2870 2871 _task->scan_object(oop(addr)); 2872 // we only partially drain the local queue and global stack 2873 _task->drain_local_queue(true); 2874 _task->drain_global_stack(true); 2875 2876 // if the has_aborted flag has been raised, we need to bail out of 2877 // the iteration 2878 return !_task->has_aborted(); 2879 } 2880 }; 2881 2882 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2883 ReferenceProcessor* result = NULL; 2884 if (G1UseConcMarkReferenceProcessing) { 2885 result = g1h->ref_processor_cm(); 2886 assert(result != NULL, "should not be NULL"); 2887 } 2888 return result; 2889 } 2890 2891 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2892 ConcurrentMark* cm, 2893 CMTask* task) 2894 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2895 _g1h(g1h), _cm(cm), _task(task) 2896 { } 2897 2898 void CMTask::setup_for_region(HeapRegion* hr) { 2899 assert(hr != NULL, 2900 "claim_region() should have filtered out NULL regions"); 2901 _curr_region = hr; 2902 _finger = hr->bottom(); 2903 update_region_limit(); 2904 } 2905 2906 void CMTask::update_region_limit() { 2907 HeapRegion* hr = _curr_region; 2908 HeapWord* bottom = hr->bottom(); 2909 HeapWord* limit = hr->next_top_at_mark_start(); 2910 2911 if (limit == bottom) { 2912 // The region was collected underneath our feet. 2913 // We set the finger to bottom to ensure that the bitmap 2914 // iteration that will follow this will not do anything. 2915 // (this is not a condition that holds when we set the region up, 2916 // as the region is not supposed to be empty in the first place) 2917 _finger = bottom; 2918 } else if (limit >= _region_limit) { 2919 assert(limit >= _finger, "peace of mind"); 2920 } else { 2921 assert(limit < _region_limit, "only way to get here"); 2922 // This can happen under some pretty unusual circumstances. An 2923 // evacuation pause empties the region underneath our feet (NTAMS 2924 // at bottom). We then do some allocation in the region (NTAMS 2925 // stays at bottom), followed by the region being used as a GC 2926 // alloc region (NTAMS will move to top() and the objects 2927 // originally below it will be grayed). All objects now marked in 2928 // the region are explicitly grayed, if below the global finger, 2929 // and we do not need in fact to scan anything else. So, we simply 2930 // set _finger to be limit to ensure that the bitmap iteration 2931 // doesn't do anything. 2932 _finger = limit; 2933 } 2934 2935 _region_limit = limit; 2936 } 2937 2938 void CMTask::giveup_current_region() { 2939 assert(_curr_region != NULL, "invariant"); 2940 clear_region_fields(); 2941 } 2942 2943 void CMTask::clear_region_fields() { 2944 // Values for these three fields that indicate that we're not 2945 // holding on to a region. 2946 _curr_region = NULL; 2947 _finger = NULL; 2948 _region_limit = NULL; 2949 } 2950 2951 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2952 if (cm_oop_closure == NULL) { 2953 assert(_cm_oop_closure != NULL, "invariant"); 2954 } else { 2955 assert(_cm_oop_closure == NULL, "invariant"); 2956 } 2957 _cm_oop_closure = cm_oop_closure; 2958 } 2959 2960 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2961 guarantee(nextMarkBitMap != NULL, "invariant"); 2962 _nextMarkBitMap = nextMarkBitMap; 2963 clear_region_fields(); 2964 2965 _calls = 0; 2966 _elapsed_time_ms = 0.0; 2967 _termination_time_ms = 0.0; 2968 _termination_start_time_ms = 0.0; 2969 } 2970 2971 bool CMTask::should_exit_termination() { 2972 regular_clock_call(); 2973 // This is called when we are in the termination protocol. We should 2974 // quit if, for some reason, this task wants to abort or the global 2975 // stack is not empty (this means that we can get work from it). 2976 return !_cm->mark_stack_empty() || has_aborted(); 2977 } 2978 2979 void CMTask::reached_limit() { 2980 assert(_words_scanned >= _words_scanned_limit || 2981 _refs_reached >= _refs_reached_limit , 2982 "shouldn't have been called otherwise"); 2983 regular_clock_call(); 2984 } 2985 2986 void CMTask::regular_clock_call() { 2987 if (has_aborted()) return; 2988 2989 // First, we need to recalculate the words scanned and refs reached 2990 // limits for the next clock call. 2991 recalculate_limits(); 2992 2993 // During the regular clock call we do the following 2994 2995 // (1) If an overflow has been flagged, then we abort. 2996 if (_cm->has_overflown()) { 2997 set_has_aborted(); 2998 return; 2999 } 3000 3001 // If we are not concurrent (i.e. we're doing remark) we don't need 3002 // to check anything else. The other steps are only needed during 3003 // the concurrent marking phase. 3004 if (!concurrent()) return; 3005 3006 // (2) If marking has been aborted for Full GC, then we also abort. 3007 if (_cm->has_aborted()) { 3008 set_has_aborted(); 3009 return; 3010 } 3011 3012 double curr_time_ms = os::elapsedVTime() * 1000.0; 3013 3014 // (4) We check whether we should yield. If we have to, then we abort. 3015 if (SuspendibleThreadSet::should_yield()) { 3016 // We should yield. To do this we abort the task. The caller is 3017 // responsible for yielding. 3018 set_has_aborted(); 3019 return; 3020 } 3021 3022 // (5) We check whether we've reached our time quota. If we have, 3023 // then we abort. 3024 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3025 if (elapsed_time_ms > _time_target_ms) { 3026 set_has_aborted(); 3027 _has_timed_out = true; 3028 return; 3029 } 3030 3031 // (6) Finally, we check whether there are enough completed STAB 3032 // buffers available for processing. If there are, we abort. 3033 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3034 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3035 // we do need to process SATB buffers, we'll abort and restart 3036 // the marking task to do so 3037 set_has_aborted(); 3038 return; 3039 } 3040 } 3041 3042 void CMTask::recalculate_limits() { 3043 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3044 _words_scanned_limit = _real_words_scanned_limit; 3045 3046 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3047 _refs_reached_limit = _real_refs_reached_limit; 3048 } 3049 3050 void CMTask::decrease_limits() { 3051 // This is called when we believe that we're going to do an infrequent 3052 // operation which will increase the per byte scanned cost (i.e. move 3053 // entries to/from the global stack). It basically tries to decrease the 3054 // scanning limit so that the clock is called earlier. 3055 3056 _words_scanned_limit = _real_words_scanned_limit - 3057 3 * words_scanned_period / 4; 3058 _refs_reached_limit = _real_refs_reached_limit - 3059 3 * refs_reached_period / 4; 3060 } 3061 3062 void CMTask::move_entries_to_global_stack() { 3063 // local array where we'll store the entries that will be popped 3064 // from the local queue 3065 oop buffer[global_stack_transfer_size]; 3066 3067 int n = 0; 3068 oop obj; 3069 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3070 buffer[n] = obj; 3071 ++n; 3072 } 3073 3074 if (n > 0) { 3075 // we popped at least one entry from the local queue 3076 3077 if (!_cm->mark_stack_push(buffer, n)) { 3078 set_has_aborted(); 3079 } 3080 } 3081 3082 // this operation was quite expensive, so decrease the limits 3083 decrease_limits(); 3084 } 3085 3086 void CMTask::get_entries_from_global_stack() { 3087 // local array where we'll store the entries that will be popped 3088 // from the global stack. 3089 oop buffer[global_stack_transfer_size]; 3090 int n; 3091 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3092 assert(n <= global_stack_transfer_size, 3093 "we should not pop more than the given limit"); 3094 if (n > 0) { 3095 // yes, we did actually pop at least one entry 3096 for (int i = 0; i < n; ++i) { 3097 bool success = _task_queue->push(buffer[i]); 3098 // We only call this when the local queue is empty or under a 3099 // given target limit. So, we do not expect this push to fail. 3100 assert(success, "invariant"); 3101 } 3102 } 3103 3104 // this operation was quite expensive, so decrease the limits 3105 decrease_limits(); 3106 } 3107 3108 void CMTask::drain_local_queue(bool partially) { 3109 if (has_aborted()) return; 3110 3111 // Decide what the target size is, depending whether we're going to 3112 // drain it partially (so that other tasks can steal if they run out 3113 // of things to do) or totally (at the very end). 3114 size_t target_size; 3115 if (partially) { 3116 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3117 } else { 3118 target_size = 0; 3119 } 3120 3121 if (_task_queue->size() > target_size) { 3122 oop obj; 3123 bool ret = _task_queue->pop_local(obj); 3124 while (ret) { 3125 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3126 assert(!_g1h->is_on_master_free_list( 3127 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3128 3129 scan_object(obj); 3130 3131 if (_task_queue->size() <= target_size || has_aborted()) { 3132 ret = false; 3133 } else { 3134 ret = _task_queue->pop_local(obj); 3135 } 3136 } 3137 } 3138 } 3139 3140 void CMTask::drain_global_stack(bool partially) { 3141 if (has_aborted()) return; 3142 3143 // We have a policy to drain the local queue before we attempt to 3144 // drain the global stack. 3145 assert(partially || _task_queue->size() == 0, "invariant"); 3146 3147 // Decide what the target size is, depending whether we're going to 3148 // drain it partially (so that other tasks can steal if they run out 3149 // of things to do) or totally (at the very end). Notice that, 3150 // because we move entries from the global stack in chunks or 3151 // because another task might be doing the same, we might in fact 3152 // drop below the target. But, this is not a problem. 3153 size_t target_size; 3154 if (partially) { 3155 target_size = _cm->partial_mark_stack_size_target(); 3156 } else { 3157 target_size = 0; 3158 } 3159 3160 if (_cm->mark_stack_size() > target_size) { 3161 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3162 get_entries_from_global_stack(); 3163 drain_local_queue(partially); 3164 } 3165 } 3166 } 3167 3168 // SATB Queue has several assumptions on whether to call the par or 3169 // non-par versions of the methods. this is why some of the code is 3170 // replicated. We should really get rid of the single-threaded version 3171 // of the code to simplify things. 3172 void CMTask::drain_satb_buffers() { 3173 if (has_aborted()) return; 3174 3175 // We set this so that the regular clock knows that we're in the 3176 // middle of draining buffers and doesn't set the abort flag when it 3177 // notices that SATB buffers are available for draining. It'd be 3178 // very counter productive if it did that. :-) 3179 _draining_satb_buffers = true; 3180 3181 CMSATBBufferClosure satb_cl(this, _g1h); 3182 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3183 3184 // This keeps claiming and applying the closure to completed buffers 3185 // until we run out of buffers or we need to abort. 3186 while (!has_aborted() && 3187 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3188 regular_clock_call(); 3189 } 3190 3191 _draining_satb_buffers = false; 3192 3193 assert(has_aborted() || 3194 concurrent() || 3195 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3196 3197 // again, this was a potentially expensive operation, decrease the 3198 // limits to get the regular clock call early 3199 decrease_limits(); 3200 } 3201 3202 void CMTask::print_stats() { 3203 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3204 _worker_id, _calls); 3205 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3206 _elapsed_time_ms, _termination_time_ms); 3207 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3208 _step_times_ms.num(), _step_times_ms.avg(), 3209 _step_times_ms.sd()); 3210 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3211 _step_times_ms.maximum(), _step_times_ms.sum()); 3212 } 3213 3214 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3215 return _task_queues->steal(worker_id, hash_seed, obj); 3216 } 3217 3218 /***************************************************************************** 3219 3220 The do_marking_step(time_target_ms, ...) method is the building 3221 block of the parallel marking framework. It can be called in parallel 3222 with other invocations of do_marking_step() on different tasks 3223 (but only one per task, obviously) and concurrently with the 3224 mutator threads, or during remark, hence it eliminates the need 3225 for two versions of the code. When called during remark, it will 3226 pick up from where the task left off during the concurrent marking 3227 phase. Interestingly, tasks are also claimable during evacuation 3228 pauses too, since do_marking_step() ensures that it aborts before 3229 it needs to yield. 3230 3231 The data structures that it uses to do marking work are the 3232 following: 3233 3234 (1) Marking Bitmap. If there are gray objects that appear only 3235 on the bitmap (this happens either when dealing with an overflow 3236 or when the initial marking phase has simply marked the roots 3237 and didn't push them on the stack), then tasks claim heap 3238 regions whose bitmap they then scan to find gray objects. A 3239 global finger indicates where the end of the last claimed region 3240 is. A local finger indicates how far into the region a task has 3241 scanned. The two fingers are used to determine how to gray an 3242 object (i.e. whether simply marking it is OK, as it will be 3243 visited by a task in the future, or whether it needs to be also 3244 pushed on a stack). 3245 3246 (2) Local Queue. The local queue of the task which is accessed 3247 reasonably efficiently by the task. Other tasks can steal from 3248 it when they run out of work. Throughout the marking phase, a 3249 task attempts to keep its local queue short but not totally 3250 empty, so that entries are available for stealing by other 3251 tasks. Only when there is no more work, a task will totally 3252 drain its local queue. 3253 3254 (3) Global Mark Stack. This handles local queue overflow. During 3255 marking only sets of entries are moved between it and the local 3256 queues, as access to it requires a mutex and more fine-grain 3257 interaction with it which might cause contention. If it 3258 overflows, then the marking phase should restart and iterate 3259 over the bitmap to identify gray objects. Throughout the marking 3260 phase, tasks attempt to keep the global mark stack at a small 3261 length but not totally empty, so that entries are available for 3262 popping by other tasks. Only when there is no more work, tasks 3263 will totally drain the global mark stack. 3264 3265 (4) SATB Buffer Queue. This is where completed SATB buffers are 3266 made available. Buffers are regularly removed from this queue 3267 and scanned for roots, so that the queue doesn't get too 3268 long. During remark, all completed buffers are processed, as 3269 well as the filled in parts of any uncompleted buffers. 3270 3271 The do_marking_step() method tries to abort when the time target 3272 has been reached. There are a few other cases when the 3273 do_marking_step() method also aborts: 3274 3275 (1) When the marking phase has been aborted (after a Full GC). 3276 3277 (2) When a global overflow (on the global stack) has been 3278 triggered. Before the task aborts, it will actually sync up with 3279 the other tasks to ensure that all the marking data structures 3280 (local queues, stacks, fingers etc.) are re-initialized so that 3281 when do_marking_step() completes, the marking phase can 3282 immediately restart. 3283 3284 (3) When enough completed SATB buffers are available. The 3285 do_marking_step() method only tries to drain SATB buffers right 3286 at the beginning. So, if enough buffers are available, the 3287 marking step aborts and the SATB buffers are processed at 3288 the beginning of the next invocation. 3289 3290 (4) To yield. when we have to yield then we abort and yield 3291 right at the end of do_marking_step(). This saves us from a lot 3292 of hassle as, by yielding we might allow a Full GC. If this 3293 happens then objects will be compacted underneath our feet, the 3294 heap might shrink, etc. We save checking for this by just 3295 aborting and doing the yield right at the end. 3296 3297 From the above it follows that the do_marking_step() method should 3298 be called in a loop (or, otherwise, regularly) until it completes. 3299 3300 If a marking step completes without its has_aborted() flag being 3301 true, it means it has completed the current marking phase (and 3302 also all other marking tasks have done so and have all synced up). 3303 3304 A method called regular_clock_call() is invoked "regularly" (in 3305 sub ms intervals) throughout marking. It is this clock method that 3306 checks all the abort conditions which were mentioned above and 3307 decides when the task should abort. A work-based scheme is used to 3308 trigger this clock method: when the number of object words the 3309 marking phase has scanned or the number of references the marking 3310 phase has visited reach a given limit. Additional invocations to 3311 the method clock have been planted in a few other strategic places 3312 too. The initial reason for the clock method was to avoid calling 3313 vtime too regularly, as it is quite expensive. So, once it was in 3314 place, it was natural to piggy-back all the other conditions on it 3315 too and not constantly check them throughout the code. 3316 3317 If do_termination is true then do_marking_step will enter its 3318 termination protocol. 3319 3320 The value of is_serial must be true when do_marking_step is being 3321 called serially (i.e. by the VMThread) and do_marking_step should 3322 skip any synchronization in the termination and overflow code. 3323 Examples include the serial remark code and the serial reference 3324 processing closures. 3325 3326 The value of is_serial must be false when do_marking_step is 3327 being called by any of the worker threads in a work gang. 3328 Examples include the concurrent marking code (CMMarkingTask), 3329 the MT remark code, and the MT reference processing closures. 3330 3331 *****************************************************************************/ 3332 3333 void CMTask::do_marking_step(double time_target_ms, 3334 bool do_termination, 3335 bool is_serial) { 3336 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3337 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3338 3339 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3340 assert(_task_queues != NULL, "invariant"); 3341 assert(_task_queue != NULL, "invariant"); 3342 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3343 3344 assert(!_claimed, 3345 "only one thread should claim this task at any one time"); 3346 3347 // OK, this doesn't safeguard again all possible scenarios, as it is 3348 // possible for two threads to set the _claimed flag at the same 3349 // time. But it is only for debugging purposes anyway and it will 3350 // catch most problems. 3351 _claimed = true; 3352 3353 _start_time_ms = os::elapsedVTime() * 1000.0; 3354 3355 // If do_stealing is true then do_marking_step will attempt to 3356 // steal work from the other CMTasks. It only makes sense to 3357 // enable stealing when the termination protocol is enabled 3358 // and do_marking_step() is not being called serially. 3359 bool do_stealing = do_termination && !is_serial; 3360 3361 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3362 _time_target_ms = time_target_ms - diff_prediction_ms; 3363 3364 // set up the variables that are used in the work-based scheme to 3365 // call the regular clock method 3366 _words_scanned = 0; 3367 _refs_reached = 0; 3368 recalculate_limits(); 3369 3370 // clear all flags 3371 clear_has_aborted(); 3372 _has_timed_out = false; 3373 _draining_satb_buffers = false; 3374 3375 ++_calls; 3376 3377 // Set up the bitmap and oop closures. Anything that uses them is 3378 // eventually called from this method, so it is OK to allocate these 3379 // statically. 3380 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3381 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3382 set_cm_oop_closure(&cm_oop_closure); 3383 3384 if (_cm->has_overflown()) { 3385 // This can happen if the mark stack overflows during a GC pause 3386 // and this task, after a yield point, restarts. We have to abort 3387 // as we need to get into the overflow protocol which happens 3388 // right at the end of this task. 3389 set_has_aborted(); 3390 } 3391 3392 // First drain any available SATB buffers. After this, we will not 3393 // look at SATB buffers before the next invocation of this method. 3394 // If enough completed SATB buffers are queued up, the regular clock 3395 // will abort this task so that it restarts. 3396 drain_satb_buffers(); 3397 // ...then partially drain the local queue and the global stack 3398 drain_local_queue(true); 3399 drain_global_stack(true); 3400 3401 do { 3402 if (!has_aborted() && _curr_region != NULL) { 3403 // This means that we're already holding on to a region. 3404 assert(_finger != NULL, "if region is not NULL, then the finger " 3405 "should not be NULL either"); 3406 3407 // We might have restarted this task after an evacuation pause 3408 // which might have evacuated the region we're holding on to 3409 // underneath our feet. Let's read its limit again to make sure 3410 // that we do not iterate over a region of the heap that 3411 // contains garbage (update_region_limit() will also move 3412 // _finger to the start of the region if it is found empty). 3413 update_region_limit(); 3414 // We will start from _finger not from the start of the region, 3415 // as we might be restarting this task after aborting half-way 3416 // through scanning this region. In this case, _finger points to 3417 // the address where we last found a marked object. If this is a 3418 // fresh region, _finger points to start(). 3419 MemRegion mr = MemRegion(_finger, _region_limit); 3420 3421 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3422 "humongous regions should go around loop once only"); 3423 3424 // Some special cases: 3425 // If the memory region is empty, we can just give up the region. 3426 // If the current region is humongous then we only need to check 3427 // the bitmap for the bit associated with the start of the object, 3428 // scan the object if it's live, and give up the region. 3429 // Otherwise, let's iterate over the bitmap of the part of the region 3430 // that is left. 3431 // If the iteration is successful, give up the region. 3432 if (mr.is_empty()) { 3433 giveup_current_region(); 3434 regular_clock_call(); 3435 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3436 if (_nextMarkBitMap->isMarked(mr.start())) { 3437 // The object is marked - apply the closure 3438 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3439 bitmap_closure.do_bit(offset); 3440 } 3441 // Even if this task aborted while scanning the humongous object 3442 // we can (and should) give up the current region. 3443 giveup_current_region(); 3444 regular_clock_call(); 3445 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3446 giveup_current_region(); 3447 regular_clock_call(); 3448 } else { 3449 assert(has_aborted(), "currently the only way to do so"); 3450 // The only way to abort the bitmap iteration is to return 3451 // false from the do_bit() method. However, inside the 3452 // do_bit() method we move the _finger to point to the 3453 // object currently being looked at. So, if we bail out, we 3454 // have definitely set _finger to something non-null. 3455 assert(_finger != NULL, "invariant"); 3456 3457 // Region iteration was actually aborted. So now _finger 3458 // points to the address of the object we last scanned. If we 3459 // leave it there, when we restart this task, we will rescan 3460 // the object. It is easy to avoid this. We move the finger by 3461 // enough to point to the next possible object header (the 3462 // bitmap knows by how much we need to move it as it knows its 3463 // granularity). 3464 assert(_finger < _region_limit, "invariant"); 3465 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3466 // Check if bitmap iteration was aborted while scanning the last object 3467 if (new_finger >= _region_limit) { 3468 giveup_current_region(); 3469 } else { 3470 move_finger_to(new_finger); 3471 } 3472 } 3473 } 3474 // At this point we have either completed iterating over the 3475 // region we were holding on to, or we have aborted. 3476 3477 // We then partially drain the local queue and the global stack. 3478 // (Do we really need this?) 3479 drain_local_queue(true); 3480 drain_global_stack(true); 3481 3482 // Read the note on the claim_region() method on why it might 3483 // return NULL with potentially more regions available for 3484 // claiming and why we have to check out_of_regions() to determine 3485 // whether we're done or not. 3486 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3487 // We are going to try to claim a new region. We should have 3488 // given up on the previous one. 3489 // Separated the asserts so that we know which one fires. 3490 assert(_curr_region == NULL, "invariant"); 3491 assert(_finger == NULL, "invariant"); 3492 assert(_region_limit == NULL, "invariant"); 3493 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3494 if (claimed_region != NULL) { 3495 // Yes, we managed to claim one 3496 setup_for_region(claimed_region); 3497 assert(_curr_region == claimed_region, "invariant"); 3498 } 3499 // It is important to call the regular clock here. It might take 3500 // a while to claim a region if, for example, we hit a large 3501 // block of empty regions. So we need to call the regular clock 3502 // method once round the loop to make sure it's called 3503 // frequently enough. 3504 regular_clock_call(); 3505 } 3506 3507 if (!has_aborted() && _curr_region == NULL) { 3508 assert(_cm->out_of_regions(), 3509 "at this point we should be out of regions"); 3510 } 3511 } while ( _curr_region != NULL && !has_aborted()); 3512 3513 if (!has_aborted()) { 3514 // We cannot check whether the global stack is empty, since other 3515 // tasks might be pushing objects to it concurrently. 3516 assert(_cm->out_of_regions(), 3517 "at this point we should be out of regions"); 3518 // Try to reduce the number of available SATB buffers so that 3519 // remark has less work to do. 3520 drain_satb_buffers(); 3521 } 3522 3523 // Since we've done everything else, we can now totally drain the 3524 // local queue and global stack. 3525 drain_local_queue(false); 3526 drain_global_stack(false); 3527 3528 // Attempt at work stealing from other task's queues. 3529 if (do_stealing && !has_aborted()) { 3530 // We have not aborted. This means that we have finished all that 3531 // we could. Let's try to do some stealing... 3532 3533 // We cannot check whether the global stack is empty, since other 3534 // tasks might be pushing objects to it concurrently. 3535 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3536 "only way to reach here"); 3537 while (!has_aborted()) { 3538 oop obj; 3539 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3540 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3541 "any stolen object should be marked"); 3542 scan_object(obj); 3543 3544 // And since we're towards the end, let's totally drain the 3545 // local queue and global stack. 3546 drain_local_queue(false); 3547 drain_global_stack(false); 3548 } else { 3549 break; 3550 } 3551 } 3552 } 3553 3554 // If we are about to wrap up and go into termination, check if we 3555 // should raise the overflow flag. 3556 if (do_termination && !has_aborted()) { 3557 if (_cm->force_overflow()->should_force()) { 3558 _cm->set_has_overflown(); 3559 regular_clock_call(); 3560 } 3561 } 3562 3563 // We still haven't aborted. Now, let's try to get into the 3564 // termination protocol. 3565 if (do_termination && !has_aborted()) { 3566 // We cannot check whether the global stack is empty, since other 3567 // tasks might be concurrently pushing objects on it. 3568 // Separated the asserts so that we know which one fires. 3569 assert(_cm->out_of_regions(), "only way to reach here"); 3570 assert(_task_queue->size() == 0, "only way to reach here"); 3571 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3572 3573 // The CMTask class also extends the TerminatorTerminator class, 3574 // hence its should_exit_termination() method will also decide 3575 // whether to exit the termination protocol or not. 3576 bool finished = (is_serial || 3577 _cm->terminator()->offer_termination(this)); 3578 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3579 _termination_time_ms += 3580 termination_end_time_ms - _termination_start_time_ms; 3581 3582 if (finished) { 3583 // We're all done. 3584 3585 if (_worker_id == 0) { 3586 // let's allow task 0 to do this 3587 if (concurrent()) { 3588 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3589 // we need to set this to false before the next 3590 // safepoint. This way we ensure that the marking phase 3591 // doesn't observe any more heap expansions. 3592 _cm->clear_concurrent_marking_in_progress(); 3593 } 3594 } 3595 3596 // We can now guarantee that the global stack is empty, since 3597 // all other tasks have finished. We separated the guarantees so 3598 // that, if a condition is false, we can immediately find out 3599 // which one. 3600 guarantee(_cm->out_of_regions(), "only way to reach here"); 3601 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3602 guarantee(_task_queue->size() == 0, "only way to reach here"); 3603 guarantee(!_cm->has_overflown(), "only way to reach here"); 3604 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3605 } else { 3606 // Apparently there's more work to do. Let's abort this task. It 3607 // will restart it and we can hopefully find more things to do. 3608 set_has_aborted(); 3609 } 3610 } 3611 3612 // Mainly for debugging purposes to make sure that a pointer to the 3613 // closure which was statically allocated in this frame doesn't 3614 // escape it by accident. 3615 set_cm_oop_closure(NULL); 3616 double end_time_ms = os::elapsedVTime() * 1000.0; 3617 double elapsed_time_ms = end_time_ms - _start_time_ms; 3618 // Update the step history. 3619 _step_times_ms.add(elapsed_time_ms); 3620 3621 if (has_aborted()) { 3622 // The task was aborted for some reason. 3623 if (_has_timed_out) { 3624 double diff_ms = elapsed_time_ms - _time_target_ms; 3625 // Keep statistics of how well we did with respect to hitting 3626 // our target only if we actually timed out (if we aborted for 3627 // other reasons, then the results might get skewed). 3628 _marking_step_diffs_ms.add(diff_ms); 3629 } 3630 3631 if (_cm->has_overflown()) { 3632 // This is the interesting one. We aborted because a global 3633 // overflow was raised. This means we have to restart the 3634 // marking phase and start iterating over regions. However, in 3635 // order to do this we have to make sure that all tasks stop 3636 // what they are doing and re-initialize in a safe manner. We 3637 // will achieve this with the use of two barrier sync points. 3638 3639 if (!is_serial) { 3640 // We only need to enter the sync barrier if being called 3641 // from a parallel context 3642 _cm->enter_first_sync_barrier(_worker_id); 3643 3644 // When we exit this sync barrier we know that all tasks have 3645 // stopped doing marking work. So, it's now safe to 3646 // re-initialize our data structures. At the end of this method, 3647 // task 0 will clear the global data structures. 3648 } 3649 3650 // We clear the local state of this task... 3651 clear_region_fields(); 3652 3653 if (!is_serial) { 3654 // ...and enter the second barrier. 3655 _cm->enter_second_sync_barrier(_worker_id); 3656 } 3657 // At this point, if we're during the concurrent phase of 3658 // marking, everything has been re-initialized and we're 3659 // ready to restart. 3660 } 3661 } 3662 3663 _claimed = false; 3664 } 3665 3666 CMTask::CMTask(uint worker_id, 3667 ConcurrentMark* cm, 3668 size_t* marked_bytes, 3669 BitMap* card_bm, 3670 CMTaskQueue* task_queue, 3671 CMTaskQueueSet* task_queues) 3672 : _g1h(G1CollectedHeap::heap()), 3673 _worker_id(worker_id), _cm(cm), 3674 _claimed(false), 3675 _nextMarkBitMap(NULL), _hash_seed(17), 3676 _task_queue(task_queue), 3677 _task_queues(task_queues), 3678 _cm_oop_closure(NULL), 3679 _marked_bytes_array(marked_bytes), 3680 _card_bm(card_bm) { 3681 guarantee(task_queue != NULL, "invariant"); 3682 guarantee(task_queues != NULL, "invariant"); 3683 3684 _marking_step_diffs_ms.add(0.5); 3685 } 3686 3687 // These are formatting macros that are used below to ensure 3688 // consistent formatting. The *_H_* versions are used to format the 3689 // header for a particular value and they should be kept consistent 3690 // with the corresponding macro. Also note that most of the macros add 3691 // the necessary white space (as a prefix) which makes them a bit 3692 // easier to compose. 3693 3694 // All the output lines are prefixed with this string to be able to 3695 // identify them easily in a large log file. 3696 #define G1PPRL_LINE_PREFIX "###" 3697 3698 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3699 #ifdef _LP64 3700 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3701 #else // _LP64 3702 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3703 #endif // _LP64 3704 3705 // For per-region info 3706 #define G1PPRL_TYPE_FORMAT " %-4s" 3707 #define G1PPRL_TYPE_H_FORMAT " %4s" 3708 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3709 #define G1PPRL_BYTE_H_FORMAT " %9s" 3710 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3711 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3712 3713 // For summary info 3714 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3715 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3716 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3717 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3718 3719 G1PrintRegionLivenessInfoClosure:: 3720 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 3721 : _out(out), 3722 _total_used_bytes(0), _total_capacity_bytes(0), 3723 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3724 _hum_used_bytes(0), _hum_capacity_bytes(0), 3725 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3726 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3727 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3728 MemRegion g1_reserved = g1h->g1_reserved(); 3729 double now = os::elapsedTime(); 3730 3731 // Print the header of the output. 3732 _out->cr(); 3733 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3734 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 3735 G1PPRL_SUM_ADDR_FORMAT("reserved") 3736 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3737 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3738 HeapRegion::GrainBytes); 3739 _out->print_cr(G1PPRL_LINE_PREFIX); 3740 _out->print_cr(G1PPRL_LINE_PREFIX 3741 G1PPRL_TYPE_H_FORMAT 3742 G1PPRL_ADDR_BASE_H_FORMAT 3743 G1PPRL_BYTE_H_FORMAT 3744 G1PPRL_BYTE_H_FORMAT 3745 G1PPRL_BYTE_H_FORMAT 3746 G1PPRL_DOUBLE_H_FORMAT 3747 G1PPRL_BYTE_H_FORMAT 3748 G1PPRL_BYTE_H_FORMAT, 3749 "type", "address-range", 3750 "used", "prev-live", "next-live", "gc-eff", 3751 "remset", "code-roots"); 3752 _out->print_cr(G1PPRL_LINE_PREFIX 3753 G1PPRL_TYPE_H_FORMAT 3754 G1PPRL_ADDR_BASE_H_FORMAT 3755 G1PPRL_BYTE_H_FORMAT 3756 G1PPRL_BYTE_H_FORMAT 3757 G1PPRL_BYTE_H_FORMAT 3758 G1PPRL_DOUBLE_H_FORMAT 3759 G1PPRL_BYTE_H_FORMAT 3760 G1PPRL_BYTE_H_FORMAT, 3761 "", "", 3762 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3763 "(bytes)", "(bytes)"); 3764 } 3765 3766 // It takes as a parameter a reference to one of the _hum_* fields, it 3767 // deduces the corresponding value for a region in a humongous region 3768 // series (either the region size, or what's left if the _hum_* field 3769 // is < the region size), and updates the _hum_* field accordingly. 3770 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3771 size_t bytes = 0; 3772 // The > 0 check is to deal with the prev and next live bytes which 3773 // could be 0. 3774 if (*hum_bytes > 0) { 3775 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3776 *hum_bytes -= bytes; 3777 } 3778 return bytes; 3779 } 3780 3781 // It deduces the values for a region in a humongous region series 3782 // from the _hum_* fields and updates those accordingly. It assumes 3783 // that that _hum_* fields have already been set up from the "starts 3784 // humongous" region and we visit the regions in address order. 3785 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3786 size_t* capacity_bytes, 3787 size_t* prev_live_bytes, 3788 size_t* next_live_bytes) { 3789 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3790 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3791 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3792 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3793 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3794 } 3795 3796 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3797 const char* type = r->get_type_str(); 3798 HeapWord* bottom = r->bottom(); 3799 HeapWord* end = r->end(); 3800 size_t capacity_bytes = r->capacity(); 3801 size_t used_bytes = r->used(); 3802 size_t prev_live_bytes = r->live_bytes(); 3803 size_t next_live_bytes = r->next_live_bytes(); 3804 double gc_eff = r->gc_efficiency(); 3805 size_t remset_bytes = r->rem_set()->mem_size(); 3806 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3807 3808 if (r->is_starts_humongous()) { 3809 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3810 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3811 "they should have been zeroed after the last time we used them"); 3812 // Set up the _hum_* fields. 3813 _hum_capacity_bytes = capacity_bytes; 3814 _hum_used_bytes = used_bytes; 3815 _hum_prev_live_bytes = prev_live_bytes; 3816 _hum_next_live_bytes = next_live_bytes; 3817 get_hum_bytes(&used_bytes, &capacity_bytes, 3818 &prev_live_bytes, &next_live_bytes); 3819 end = bottom + HeapRegion::GrainWords; 3820 } else if (r->is_continues_humongous()) { 3821 get_hum_bytes(&used_bytes, &capacity_bytes, 3822 &prev_live_bytes, &next_live_bytes); 3823 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3824 } 3825 3826 _total_used_bytes += used_bytes; 3827 _total_capacity_bytes += capacity_bytes; 3828 _total_prev_live_bytes += prev_live_bytes; 3829 _total_next_live_bytes += next_live_bytes; 3830 _total_remset_bytes += remset_bytes; 3831 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3832 3833 // Print a line for this particular region. 3834 _out->print_cr(G1PPRL_LINE_PREFIX 3835 G1PPRL_TYPE_FORMAT 3836 G1PPRL_ADDR_BASE_FORMAT 3837 G1PPRL_BYTE_FORMAT 3838 G1PPRL_BYTE_FORMAT 3839 G1PPRL_BYTE_FORMAT 3840 G1PPRL_DOUBLE_FORMAT 3841 G1PPRL_BYTE_FORMAT 3842 G1PPRL_BYTE_FORMAT, 3843 type, p2i(bottom), p2i(end), 3844 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3845 remset_bytes, strong_code_roots_bytes); 3846 3847 return false; 3848 } 3849 3850 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3851 // add static memory usages to remembered set sizes 3852 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3853 // Print the footer of the output. 3854 _out->print_cr(G1PPRL_LINE_PREFIX); 3855 _out->print_cr(G1PPRL_LINE_PREFIX 3856 " SUMMARY" 3857 G1PPRL_SUM_MB_FORMAT("capacity") 3858 G1PPRL_SUM_MB_PERC_FORMAT("used") 3859 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3860 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3861 G1PPRL_SUM_MB_FORMAT("remset") 3862 G1PPRL_SUM_MB_FORMAT("code-roots"), 3863 bytes_to_mb(_total_capacity_bytes), 3864 bytes_to_mb(_total_used_bytes), 3865 perc(_total_used_bytes, _total_capacity_bytes), 3866 bytes_to_mb(_total_prev_live_bytes), 3867 perc(_total_prev_live_bytes, _total_capacity_bytes), 3868 bytes_to_mb(_total_next_live_bytes), 3869 perc(_total_next_live_bytes, _total_capacity_bytes), 3870 bytes_to_mb(_total_remset_bytes), 3871 bytes_to_mb(_total_strong_code_roots_bytes)); 3872 _out->cr(); 3873 }