1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 458 return MAX2((n_par_threads + 2) / 4, 1U); 459 } 460 461 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 462 _g1h(g1h), 463 _markBitMap1(), 464 _markBitMap2(), 465 _parallel_marking_threads(0), 466 _max_parallel_marking_threads(0), 467 _sleep_factor(0.0), 468 _marking_task_overhead(1.0), 469 _cleanup_sleep_factor(0.0), 470 _cleanup_task_overhead(1.0), 471 _cleanup_list("Cleanup List"), 472 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 473 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 474 CardTableModRefBS::card_shift, 475 false /* in_resource_area*/), 476 477 _prevMarkBitMap(&_markBitMap1), 478 _nextMarkBitMap(&_markBitMap2), 479 480 _markStack(this), 481 // _finger set in set_non_marking_state 482 483 _max_worker_id(ParallelGCThreads), 484 // _active_tasks set in set_non_marking_state 485 // _tasks set inside the constructor 486 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 487 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 488 489 _has_overflown(false), 490 _concurrent(false), 491 _has_aborted(false), 492 _restart_for_overflow(false), 493 _concurrent_marking_in_progress(false), 494 495 // _verbose_level set below 496 497 _init_times(), 498 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 499 _cleanup_times(), 500 _total_counting_time(0.0), 501 _total_rs_scrub_time(0.0), 502 503 _parallel_workers(NULL), 504 505 _count_card_bitmaps(NULL), 506 _count_marked_bytes(NULL), 507 _completed_initialization(false) { 508 509 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 510 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 511 512 // Create & start a ConcurrentMark thread. 513 _cmThread = new ConcurrentMarkThread(this); 514 assert(cmThread() != NULL, "CM Thread should have been created"); 515 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 516 if (_cmThread->osthread() == NULL) { 517 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 518 } 519 520 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 521 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 522 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 523 524 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 525 satb_qs.set_buffer_size(G1SATBBufferSize); 526 527 _root_regions.init(_g1h, this); 528 529 if (ConcGCThreads > ParallelGCThreads) { 530 warning("Can't have more ConcGCThreads (%u) " 531 "than ParallelGCThreads (%u).", 532 ConcGCThreads, ParallelGCThreads); 533 return; 534 } 535 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 536 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 537 // if both are set 538 _sleep_factor = 0.0; 539 _marking_task_overhead = 1.0; 540 } else if (G1MarkingOverheadPercent > 0) { 541 // We will calculate the number of parallel marking threads based 542 // on a target overhead with respect to the soft real-time goal 543 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 544 double overall_cm_overhead = 545 (double) MaxGCPauseMillis * marking_overhead / 546 (double) GCPauseIntervalMillis; 547 double cpu_ratio = 1.0 / (double) os::processor_count(); 548 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 549 double marking_task_overhead = 550 overall_cm_overhead / marking_thread_num * 551 (double) os::processor_count(); 552 double sleep_factor = 553 (1.0 - marking_task_overhead) / marking_task_overhead; 554 555 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 556 _sleep_factor = sleep_factor; 557 _marking_task_overhead = marking_task_overhead; 558 } else { 559 // Calculate the number of parallel marking threads by scaling 560 // the number of parallel GC threads. 561 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 562 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 563 _sleep_factor = 0.0; 564 _marking_task_overhead = 1.0; 565 } 566 567 assert(ConcGCThreads > 0, "Should have been set"); 568 _parallel_marking_threads = ConcGCThreads; 569 _max_parallel_marking_threads = _parallel_marking_threads; 570 571 if (parallel_marking_threads() > 1) { 572 _cleanup_task_overhead = 1.0; 573 } else { 574 _cleanup_task_overhead = marking_task_overhead(); 575 } 576 _cleanup_sleep_factor = 577 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 578 579 #if 0 580 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 581 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 582 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 583 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 584 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 585 #endif 586 587 _parallel_workers = new WorkGang("G1 Marker", 588 _max_parallel_marking_threads, false, true); 589 if (_parallel_workers == NULL) { 590 vm_exit_during_initialization("Failed necessary allocation."); 591 } else { 592 _parallel_workers->initialize_workers(); 593 } 594 595 if (FLAG_IS_DEFAULT(MarkStackSize)) { 596 size_t mark_stack_size = 597 MIN2(MarkStackSizeMax, 598 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 599 // Verify that the calculated value for MarkStackSize is in range. 600 // It would be nice to use the private utility routine from Arguments. 601 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 602 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 603 "must be between 1 and " SIZE_FORMAT, 604 mark_stack_size, MarkStackSizeMax); 605 return; 606 } 607 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 608 } else { 609 // Verify MarkStackSize is in range. 610 if (FLAG_IS_CMDLINE(MarkStackSize)) { 611 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 612 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 613 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 614 "must be between 1 and " SIZE_FORMAT, 615 MarkStackSize, MarkStackSizeMax); 616 return; 617 } 618 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 619 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 620 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 621 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 622 MarkStackSize, MarkStackSizeMax); 623 return; 624 } 625 } 626 } 627 } 628 629 if (!_markStack.allocate(MarkStackSize)) { 630 warning("Failed to allocate CM marking stack"); 631 return; 632 } 633 634 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 635 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 636 637 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 638 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 639 640 BitMap::idx_t card_bm_size = _card_bm.size(); 641 642 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 643 _active_tasks = _max_worker_id; 644 645 uint max_regions = _g1h->max_regions(); 646 for (uint i = 0; i < _max_worker_id; ++i) { 647 CMTaskQueue* task_queue = new CMTaskQueue(); 648 task_queue->initialize(); 649 _task_queues->register_queue(i, task_queue); 650 651 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 652 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 653 654 _tasks[i] = new CMTask(i, this, 655 _count_marked_bytes[i], 656 &_count_card_bitmaps[i], 657 task_queue, _task_queues); 658 659 _accum_task_vtime[i] = 0.0; 660 } 661 662 // Calculate the card number for the bottom of the heap. Used 663 // in biasing indexes into the accounting card bitmaps. 664 _heap_bottom_card_num = 665 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 666 CardTableModRefBS::card_shift); 667 668 // Clear all the liveness counting data 669 clear_all_count_data(); 670 671 // so that the call below can read a sensible value 672 _heap_start = g1h->reserved_region().start(); 673 set_non_marking_state(); 674 _completed_initialization = true; 675 } 676 677 void ConcurrentMark::reset() { 678 // Starting values for these two. This should be called in a STW 679 // phase. 680 MemRegion reserved = _g1h->g1_reserved(); 681 _heap_start = reserved.start(); 682 _heap_end = reserved.end(); 683 684 // Separated the asserts so that we know which one fires. 685 assert(_heap_start != NULL, "heap bounds should look ok"); 686 assert(_heap_end != NULL, "heap bounds should look ok"); 687 assert(_heap_start < _heap_end, "heap bounds should look ok"); 688 689 // Reset all the marking data structures and any necessary flags 690 reset_marking_state(); 691 692 // We do reset all of them, since different phases will use 693 // different number of active threads. So, it's easiest to have all 694 // of them ready. 695 for (uint i = 0; i < _max_worker_id; ++i) { 696 _tasks[i]->reset(_nextMarkBitMap); 697 } 698 699 // we need this to make sure that the flag is on during the evac 700 // pause with initial mark piggy-backed 701 set_concurrent_marking_in_progress(); 702 } 703 704 705 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 706 _markStack.set_should_expand(); 707 _markStack.setEmpty(); // Also clears the _markStack overflow flag 708 if (clear_overflow) { 709 clear_has_overflown(); 710 } else { 711 assert(has_overflown(), "pre-condition"); 712 } 713 _finger = _heap_start; 714 715 for (uint i = 0; i < _max_worker_id; ++i) { 716 CMTaskQueue* queue = _task_queues->queue(i); 717 queue->set_empty(); 718 } 719 } 720 721 void ConcurrentMark::set_concurrency(uint active_tasks) { 722 assert(active_tasks <= _max_worker_id, "we should not have more"); 723 724 _active_tasks = active_tasks; 725 // Need to update the three data structures below according to the 726 // number of active threads for this phase. 727 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 728 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 729 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 730 } 731 732 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 733 set_concurrency(active_tasks); 734 735 _concurrent = concurrent; 736 // We propagate this to all tasks, not just the active ones. 737 for (uint i = 0; i < _max_worker_id; ++i) 738 _tasks[i]->set_concurrent(concurrent); 739 740 if (concurrent) { 741 set_concurrent_marking_in_progress(); 742 } else { 743 // We currently assume that the concurrent flag has been set to 744 // false before we start remark. At this point we should also be 745 // in a STW phase. 746 assert(!concurrent_marking_in_progress(), "invariant"); 747 assert(out_of_regions(), 748 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 749 p2i(_finger), p2i(_heap_end)); 750 } 751 } 752 753 void ConcurrentMark::set_non_marking_state() { 754 // We set the global marking state to some default values when we're 755 // not doing marking. 756 reset_marking_state(); 757 _active_tasks = 0; 758 clear_concurrent_marking_in_progress(); 759 } 760 761 ConcurrentMark::~ConcurrentMark() { 762 // The ConcurrentMark instance is never freed. 763 ShouldNotReachHere(); 764 } 765 766 void ConcurrentMark::clearNextBitmap() { 767 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 768 769 // Make sure that the concurrent mark thread looks to still be in 770 // the current cycle. 771 guarantee(cmThread()->during_cycle(), "invariant"); 772 773 // We are finishing up the current cycle by clearing the next 774 // marking bitmap and getting it ready for the next cycle. During 775 // this time no other cycle can start. So, let's make sure that this 776 // is the case. 777 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 778 779 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 780 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 781 _parallel_workers->run_task(&task); 782 783 // Clear the liveness counting data. If the marking has been aborted, the abort() 784 // call already did that. 785 if (cl.complete()) { 786 clear_all_count_data(); 787 } 788 789 // Repeat the asserts from above. 790 guarantee(cmThread()->during_cycle(), "invariant"); 791 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 792 } 793 794 class CheckBitmapClearHRClosure : public HeapRegionClosure { 795 CMBitMap* _bitmap; 796 bool _error; 797 public: 798 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 799 } 800 801 virtual bool doHeapRegion(HeapRegion* r) { 802 // This closure can be called concurrently to the mutator, so we must make sure 803 // that the result of the getNextMarkedWordAddress() call is compared to the 804 // value passed to it as limit to detect any found bits. 805 // end never changes in G1. 806 HeapWord* end = r->end(); 807 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 808 } 809 }; 810 811 bool ConcurrentMark::nextMarkBitmapIsClear() { 812 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 813 _g1h->heap_region_iterate(&cl); 814 return cl.complete(); 815 } 816 817 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 818 public: 819 bool doHeapRegion(HeapRegion* r) { 820 r->note_start_of_marking(); 821 return false; 822 } 823 }; 824 825 void ConcurrentMark::checkpointRootsInitialPre() { 826 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 827 G1CollectorPolicy* g1p = g1h->g1_policy(); 828 829 _has_aborted = false; 830 831 // Initialize marking structures. This has to be done in a STW phase. 832 reset(); 833 834 // For each region note start of marking. 835 NoteStartOfMarkHRClosure startcl; 836 g1h->heap_region_iterate(&startcl); 837 } 838 839 840 void ConcurrentMark::checkpointRootsInitialPost() { 841 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 842 843 // If we force an overflow during remark, the remark operation will 844 // actually abort and we'll restart concurrent marking. If we always 845 // force an overflow during remark we'll never actually complete the 846 // marking phase. So, we initialize this here, at the start of the 847 // cycle, so that at the remaining overflow number will decrease at 848 // every remark and we'll eventually not need to cause one. 849 force_overflow_stw()->init(); 850 851 // Start Concurrent Marking weak-reference discovery. 852 ReferenceProcessor* rp = g1h->ref_processor_cm(); 853 // enable ("weak") refs discovery 854 rp->enable_discovery(); 855 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 856 857 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 858 // This is the start of the marking cycle, we're expected all 859 // threads to have SATB queues with active set to false. 860 satb_mq_set.set_active_all_threads(true, /* new active value */ 861 false /* expected_active */); 862 863 _root_regions.prepare_for_scan(); 864 865 // update_g1_committed() will be called at the end of an evac pause 866 // when marking is on. So, it's also called at the end of the 867 // initial-mark pause to update the heap end, if the heap expands 868 // during it. No need to call it here. 869 } 870 871 /* 872 * Notice that in the next two methods, we actually leave the STS 873 * during the barrier sync and join it immediately afterwards. If we 874 * do not do this, the following deadlock can occur: one thread could 875 * be in the barrier sync code, waiting for the other thread to also 876 * sync up, whereas another one could be trying to yield, while also 877 * waiting for the other threads to sync up too. 878 * 879 * Note, however, that this code is also used during remark and in 880 * this case we should not attempt to leave / enter the STS, otherwise 881 * we'll either hit an assert (debug / fastdebug) or deadlock 882 * (product). So we should only leave / enter the STS if we are 883 * operating concurrently. 884 * 885 * Because the thread that does the sync barrier has left the STS, it 886 * is possible to be suspended for a Full GC or an evacuation pause 887 * could occur. This is actually safe, since the entering the sync 888 * barrier is one of the last things do_marking_step() does, and it 889 * doesn't manipulate any data structures afterwards. 890 */ 891 892 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 893 bool barrier_aborted; 894 { 895 SuspendibleThreadSetLeaver sts_leave(concurrent()); 896 barrier_aborted = !_first_overflow_barrier_sync.enter(); 897 } 898 899 // at this point everyone should have synced up and not be doing any 900 // more work 901 902 if (barrier_aborted) { 903 // If the barrier aborted we ignore the overflow condition and 904 // just abort the whole marking phase as quickly as possible. 905 return; 906 } 907 908 // If we're executing the concurrent phase of marking, reset the marking 909 // state; otherwise the marking state is reset after reference processing, 910 // during the remark pause. 911 // If we reset here as a result of an overflow during the remark we will 912 // see assertion failures from any subsequent set_concurrency_and_phase() 913 // calls. 914 if (concurrent()) { 915 // let the task associated with with worker 0 do this 916 if (worker_id == 0) { 917 // task 0 is responsible for clearing the global data structures 918 // We should be here because of an overflow. During STW we should 919 // not clear the overflow flag since we rely on it being true when 920 // we exit this method to abort the pause and restart concurrent 921 // marking. 922 reset_marking_state(true /* clear_overflow */); 923 force_overflow()->update(); 924 925 if (G1Log::fine()) { 926 gclog_or_tty->gclog_stamp(); 927 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 928 } 929 } 930 } 931 932 // after this, each task should reset its own data structures then 933 // then go into the second barrier 934 } 935 936 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 937 SuspendibleThreadSetLeaver sts_leave(concurrent()); 938 _second_overflow_barrier_sync.enter(); 939 940 // at this point everything should be re-initialized and ready to go 941 } 942 943 #ifndef PRODUCT 944 void ForceOverflowSettings::init() { 945 _num_remaining = G1ConcMarkForceOverflow; 946 _force = false; 947 update(); 948 } 949 950 void ForceOverflowSettings::update() { 951 if (_num_remaining > 0) { 952 _num_remaining -= 1; 953 _force = true; 954 } else { 955 _force = false; 956 } 957 } 958 959 bool ForceOverflowSettings::should_force() { 960 if (_force) { 961 _force = false; 962 return true; 963 } else { 964 return false; 965 } 966 } 967 #endif // !PRODUCT 968 969 class CMConcurrentMarkingTask: public AbstractGangTask { 970 private: 971 ConcurrentMark* _cm; 972 ConcurrentMarkThread* _cmt; 973 974 public: 975 void work(uint worker_id) { 976 assert(Thread::current()->is_ConcurrentGC_thread(), 977 "this should only be done by a conc GC thread"); 978 ResourceMark rm; 979 980 double start_vtime = os::elapsedVTime(); 981 982 { 983 SuspendibleThreadSetJoiner sts_join; 984 985 assert(worker_id < _cm->active_tasks(), "invariant"); 986 CMTask* the_task = _cm->task(worker_id); 987 the_task->record_start_time(); 988 if (!_cm->has_aborted()) { 989 do { 990 double start_vtime_sec = os::elapsedVTime(); 991 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 992 993 the_task->do_marking_step(mark_step_duration_ms, 994 true /* do_termination */, 995 false /* is_serial*/); 996 997 double end_vtime_sec = os::elapsedVTime(); 998 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 999 _cm->clear_has_overflown(); 1000 1001 _cm->do_yield_check(worker_id); 1002 1003 jlong sleep_time_ms; 1004 if (!_cm->has_aborted() && the_task->has_aborted()) { 1005 sleep_time_ms = 1006 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1007 { 1008 SuspendibleThreadSetLeaver sts_leave; 1009 os::sleep(Thread::current(), sleep_time_ms, false); 1010 } 1011 } 1012 } while (!_cm->has_aborted() && the_task->has_aborted()); 1013 } 1014 the_task->record_end_time(); 1015 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1016 } 1017 1018 double end_vtime = os::elapsedVTime(); 1019 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1020 } 1021 1022 CMConcurrentMarkingTask(ConcurrentMark* cm, 1023 ConcurrentMarkThread* cmt) : 1024 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1025 1026 ~CMConcurrentMarkingTask() { } 1027 }; 1028 1029 // Calculates the number of active workers for a concurrent 1030 // phase. 1031 uint ConcurrentMark::calc_parallel_marking_threads() { 1032 uint n_conc_workers = 0; 1033 if (!UseDynamicNumberOfGCThreads || 1034 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1035 !ForceDynamicNumberOfGCThreads)) { 1036 n_conc_workers = max_parallel_marking_threads(); 1037 } else { 1038 n_conc_workers = 1039 AdaptiveSizePolicy::calc_default_active_workers( 1040 max_parallel_marking_threads(), 1041 1, /* Minimum workers */ 1042 parallel_marking_threads(), 1043 Threads::number_of_non_daemon_threads()); 1044 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1045 // that scaling has already gone into "_max_parallel_marking_threads". 1046 } 1047 assert(n_conc_workers > 0, "Always need at least 1"); 1048 return n_conc_workers; 1049 } 1050 1051 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1052 // Currently, only survivors can be root regions. 1053 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1054 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1055 1056 const uintx interval = PrefetchScanIntervalInBytes; 1057 HeapWord* curr = hr->bottom(); 1058 const HeapWord* end = hr->top(); 1059 while (curr < end) { 1060 Prefetch::read(curr, interval); 1061 oop obj = oop(curr); 1062 int size = obj->oop_iterate_size(&cl); 1063 assert(size == obj->size(), "sanity"); 1064 curr += size; 1065 } 1066 } 1067 1068 class CMRootRegionScanTask : public AbstractGangTask { 1069 private: 1070 ConcurrentMark* _cm; 1071 1072 public: 1073 CMRootRegionScanTask(ConcurrentMark* cm) : 1074 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1075 1076 void work(uint worker_id) { 1077 assert(Thread::current()->is_ConcurrentGC_thread(), 1078 "this should only be done by a conc GC thread"); 1079 1080 CMRootRegions* root_regions = _cm->root_regions(); 1081 HeapRegion* hr = root_regions->claim_next(); 1082 while (hr != NULL) { 1083 _cm->scanRootRegion(hr, worker_id); 1084 hr = root_regions->claim_next(); 1085 } 1086 } 1087 }; 1088 1089 void ConcurrentMark::scanRootRegions() { 1090 double scan_start = os::elapsedTime(); 1091 1092 // Start of concurrent marking. 1093 ClassLoaderDataGraph::clear_claimed_marks(); 1094 1095 // scan_in_progress() will have been set to true only if there was 1096 // at least one root region to scan. So, if it's false, we 1097 // should not attempt to do any further work. 1098 if (root_regions()->scan_in_progress()) { 1099 if (G1Log::fine()) { 1100 gclog_or_tty->gclog_stamp(); 1101 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1102 } 1103 1104 _parallel_marking_threads = calc_parallel_marking_threads(); 1105 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1106 "Maximum number of marking threads exceeded"); 1107 uint active_workers = MAX2(1U, parallel_marking_threads()); 1108 1109 CMRootRegionScanTask task(this); 1110 _parallel_workers->set_active_workers(active_workers); 1111 _parallel_workers->run_task(&task); 1112 1113 if (G1Log::fine()) { 1114 gclog_or_tty->gclog_stamp(); 1115 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1116 } 1117 1118 // It's possible that has_aborted() is true here without actually 1119 // aborting the survivor scan earlier. This is OK as it's 1120 // mainly used for sanity checking. 1121 root_regions()->scan_finished(); 1122 } 1123 } 1124 1125 void ConcurrentMark::markFromRoots() { 1126 // we might be tempted to assert that: 1127 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1128 // "inconsistent argument?"); 1129 // However that wouldn't be right, because it's possible that 1130 // a safepoint is indeed in progress as a younger generation 1131 // stop-the-world GC happens even as we mark in this generation. 1132 1133 _restart_for_overflow = false; 1134 force_overflow_conc()->init(); 1135 1136 // _g1h has _n_par_threads 1137 _parallel_marking_threads = calc_parallel_marking_threads(); 1138 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1139 "Maximum number of marking threads exceeded"); 1140 1141 uint active_workers = MAX2(1U, parallel_marking_threads()); 1142 assert(active_workers > 0, "Should have been set"); 1143 1144 // Parallel task terminator is set in "set_concurrency_and_phase()" 1145 set_concurrency_and_phase(active_workers, true /* concurrent */); 1146 1147 CMConcurrentMarkingTask markingTask(this, cmThread()); 1148 _parallel_workers->set_active_workers(active_workers); 1149 _parallel_workers->run_task(&markingTask); 1150 print_stats(); 1151 } 1152 1153 // Helper class to get rid of some boilerplate code. 1154 class G1CMTraceTime : public StackObj { 1155 GCTraceTimeImpl _gc_trace_time; 1156 static bool doit_and_prepend(bool doit) { 1157 if (doit) { 1158 gclog_or_tty->put(' '); 1159 } 1160 return doit; 1161 } 1162 1163 public: 1164 G1CMTraceTime(const char* title, bool doit) 1165 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1166 } 1167 }; 1168 1169 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1170 // world is stopped at this checkpoint 1171 assert(SafepointSynchronize::is_at_safepoint(), 1172 "world should be stopped"); 1173 1174 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1175 1176 // If a full collection has happened, we shouldn't do this. 1177 if (has_aborted()) { 1178 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1179 return; 1180 } 1181 1182 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1183 1184 if (VerifyDuringGC) { 1185 HandleMark hm; // handle scope 1186 g1h->prepare_for_verify(); 1187 Universe::verify(VerifyOption_G1UsePrevMarking, 1188 " VerifyDuringGC:(before)"); 1189 } 1190 g1h->check_bitmaps("Remark Start"); 1191 1192 G1CollectorPolicy* g1p = g1h->g1_policy(); 1193 g1p->record_concurrent_mark_remark_start(); 1194 1195 double start = os::elapsedTime(); 1196 1197 checkpointRootsFinalWork(); 1198 1199 double mark_work_end = os::elapsedTime(); 1200 1201 weakRefsWork(clear_all_soft_refs); 1202 1203 if (has_overflown()) { 1204 // Oops. We overflowed. Restart concurrent marking. 1205 _restart_for_overflow = true; 1206 if (G1TraceMarkStackOverflow) { 1207 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1208 } 1209 1210 // Verify the heap w.r.t. the previous marking bitmap. 1211 if (VerifyDuringGC) { 1212 HandleMark hm; // handle scope 1213 g1h->prepare_for_verify(); 1214 Universe::verify(VerifyOption_G1UsePrevMarking, 1215 " VerifyDuringGC:(overflow)"); 1216 } 1217 1218 // Clear the marking state because we will be restarting 1219 // marking due to overflowing the global mark stack. 1220 reset_marking_state(); 1221 } else { 1222 { 1223 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1224 1225 // Aggregate the per-task counting data that we have accumulated 1226 // while marking. 1227 aggregate_count_data(); 1228 } 1229 1230 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1231 // We're done with marking. 1232 // This is the end of the marking cycle, we're expected all 1233 // threads to have SATB queues with active set to true. 1234 satb_mq_set.set_active_all_threads(false, /* new active value */ 1235 true /* expected_active */); 1236 1237 if (VerifyDuringGC) { 1238 HandleMark hm; // handle scope 1239 g1h->prepare_for_verify(); 1240 Universe::verify(VerifyOption_G1UseNextMarking, 1241 " VerifyDuringGC:(after)"); 1242 } 1243 g1h->check_bitmaps("Remark End"); 1244 assert(!restart_for_overflow(), "sanity"); 1245 // Completely reset the marking state since marking completed 1246 set_non_marking_state(); 1247 } 1248 1249 // Expand the marking stack, if we have to and if we can. 1250 if (_markStack.should_expand()) { 1251 _markStack.expand(); 1252 } 1253 1254 // Statistics 1255 double now = os::elapsedTime(); 1256 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1257 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1258 _remark_times.add((now - start) * 1000.0); 1259 1260 g1p->record_concurrent_mark_remark_end(); 1261 1262 G1CMIsAliveClosure is_alive(g1h); 1263 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1264 } 1265 1266 // Base class of the closures that finalize and verify the 1267 // liveness counting data. 1268 class CMCountDataClosureBase: public HeapRegionClosure { 1269 protected: 1270 G1CollectedHeap* _g1h; 1271 ConcurrentMark* _cm; 1272 CardTableModRefBS* _ct_bs; 1273 1274 BitMap* _region_bm; 1275 BitMap* _card_bm; 1276 1277 // Takes a region that's not empty (i.e., it has at least one 1278 // live object in it and sets its corresponding bit on the region 1279 // bitmap to 1. 1280 void set_bit_for_region(HeapRegion* hr) { 1281 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1282 _region_bm->par_at_put(index, true); 1283 } 1284 1285 public: 1286 CMCountDataClosureBase(G1CollectedHeap* g1h, 1287 BitMap* region_bm, BitMap* card_bm): 1288 _g1h(g1h), _cm(g1h->concurrent_mark()), 1289 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1290 _region_bm(region_bm), _card_bm(card_bm) { } 1291 }; 1292 1293 // Closure that calculates the # live objects per region. Used 1294 // for verification purposes during the cleanup pause. 1295 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1296 CMBitMapRO* _bm; 1297 size_t _region_marked_bytes; 1298 1299 public: 1300 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1301 BitMap* region_bm, BitMap* card_bm) : 1302 CMCountDataClosureBase(g1h, region_bm, card_bm), 1303 _bm(bm), _region_marked_bytes(0) { } 1304 1305 bool doHeapRegion(HeapRegion* hr) { 1306 HeapWord* ntams = hr->next_top_at_mark_start(); 1307 HeapWord* start = hr->bottom(); 1308 1309 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1310 "Preconditions not met - " 1311 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1312 p2i(start), p2i(ntams), p2i(hr->end())); 1313 1314 // Find the first marked object at or after "start". 1315 start = _bm->getNextMarkedWordAddress(start, ntams); 1316 1317 size_t marked_bytes = 0; 1318 1319 while (start < ntams) { 1320 oop obj = oop(start); 1321 int obj_sz = obj->size(); 1322 HeapWord* obj_end = start + obj_sz; 1323 1324 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1325 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1326 1327 // Note: if we're looking at the last region in heap - obj_end 1328 // could be actually just beyond the end of the heap; end_idx 1329 // will then correspond to a (non-existent) card that is also 1330 // just beyond the heap. 1331 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1332 // end of object is not card aligned - increment to cover 1333 // all the cards spanned by the object 1334 end_idx += 1; 1335 } 1336 1337 // Set the bits in the card BM for the cards spanned by this object. 1338 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1339 1340 // Add the size of this object to the number of marked bytes. 1341 marked_bytes += (size_t)obj_sz * HeapWordSize; 1342 1343 // This will happen if we are handling a humongous object that spans 1344 // several heap regions. 1345 if (obj_end > hr->end()) { 1346 break; 1347 } 1348 // Find the next marked object after this one. 1349 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1350 } 1351 1352 // Mark the allocated-since-marking portion... 1353 HeapWord* top = hr->top(); 1354 if (ntams < top) { 1355 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1356 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1357 1358 // Note: if we're looking at the last region in heap - top 1359 // could be actually just beyond the end of the heap; end_idx 1360 // will then correspond to a (non-existent) card that is also 1361 // just beyond the heap. 1362 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1363 // end of object is not card aligned - increment to cover 1364 // all the cards spanned by the object 1365 end_idx += 1; 1366 } 1367 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1368 1369 // This definitely means the region has live objects. 1370 set_bit_for_region(hr); 1371 } 1372 1373 // Update the live region bitmap. 1374 if (marked_bytes > 0) { 1375 set_bit_for_region(hr); 1376 } 1377 1378 // Set the marked bytes for the current region so that 1379 // it can be queried by a calling verification routine 1380 _region_marked_bytes = marked_bytes; 1381 1382 return false; 1383 } 1384 1385 size_t region_marked_bytes() const { return _region_marked_bytes; } 1386 }; 1387 1388 // Heap region closure used for verifying the counting data 1389 // that was accumulated concurrently and aggregated during 1390 // the remark pause. This closure is applied to the heap 1391 // regions during the STW cleanup pause. 1392 1393 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1394 G1CollectedHeap* _g1h; 1395 ConcurrentMark* _cm; 1396 CalcLiveObjectsClosure _calc_cl; 1397 BitMap* _region_bm; // Region BM to be verified 1398 BitMap* _card_bm; // Card BM to be verified 1399 1400 BitMap* _exp_region_bm; // Expected Region BM values 1401 BitMap* _exp_card_bm; // Expected card BM values 1402 1403 int _failures; 1404 1405 public: 1406 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1407 BitMap* region_bm, 1408 BitMap* card_bm, 1409 BitMap* exp_region_bm, 1410 BitMap* exp_card_bm) : 1411 _g1h(g1h), _cm(g1h->concurrent_mark()), 1412 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1413 _region_bm(region_bm), _card_bm(card_bm), 1414 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1415 _failures(0) { } 1416 1417 int failures() const { return _failures; } 1418 1419 bool doHeapRegion(HeapRegion* hr) { 1420 int failures = 0; 1421 1422 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1423 // this region and set the corresponding bits in the expected region 1424 // and card bitmaps. 1425 bool res = _calc_cl.doHeapRegion(hr); 1426 assert(res == false, "should be continuing"); 1427 1428 // Verify the marked bytes for this region. 1429 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1430 size_t act_marked_bytes = hr->next_marked_bytes(); 1431 1432 if (exp_marked_bytes > act_marked_bytes) { 1433 if (hr->is_starts_humongous()) { 1434 // For start_humongous regions, the size of the whole object will be 1435 // in exp_marked_bytes. 1436 HeapRegion* region = hr; 1437 int num_regions; 1438 for (num_regions = 0; region != NULL; num_regions++) { 1439 region = _g1h->next_region_in_humongous(region); 1440 } 1441 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1442 failures += 1; 1443 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1444 failures += 1; 1445 } 1446 } else { 1447 // We're not OK if expected marked bytes > actual marked bytes. It means 1448 // we have missed accounting some objects during the actual marking. 1449 failures += 1; 1450 } 1451 } 1452 1453 // Verify the bit, for this region, in the actual and expected 1454 // (which was just calculated) region bit maps. 1455 // We're not OK if the bit in the calculated expected region 1456 // bitmap is set and the bit in the actual region bitmap is not. 1457 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1458 1459 bool expected = _exp_region_bm->at(index); 1460 bool actual = _region_bm->at(index); 1461 if (expected && !actual) { 1462 failures += 1; 1463 } 1464 1465 // Verify that the card bit maps for the cards spanned by the current 1466 // region match. We have an error if we have a set bit in the expected 1467 // bit map and the corresponding bit in the actual bitmap is not set. 1468 1469 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1470 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1471 1472 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1473 expected = _exp_card_bm->at(i); 1474 actual = _card_bm->at(i); 1475 1476 if (expected && !actual) { 1477 failures += 1; 1478 } 1479 } 1480 1481 _failures += failures; 1482 1483 // We could stop iteration over the heap when we 1484 // find the first violating region by returning true. 1485 return false; 1486 } 1487 }; 1488 1489 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1490 protected: 1491 G1CollectedHeap* _g1h; 1492 ConcurrentMark* _cm; 1493 BitMap* _actual_region_bm; 1494 BitMap* _actual_card_bm; 1495 1496 uint _n_workers; 1497 1498 BitMap* _expected_region_bm; 1499 BitMap* _expected_card_bm; 1500 1501 int _failures; 1502 1503 HeapRegionClaimer _hrclaimer; 1504 1505 public: 1506 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1507 BitMap* region_bm, BitMap* card_bm, 1508 BitMap* expected_region_bm, BitMap* expected_card_bm) 1509 : AbstractGangTask("G1 verify final counting"), 1510 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1511 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1512 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1513 _failures(0), 1514 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1515 assert(VerifyDuringGC, "don't call this otherwise"); 1516 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1517 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1518 } 1519 1520 void work(uint worker_id) { 1521 assert(worker_id < _n_workers, "invariant"); 1522 1523 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1524 _actual_region_bm, _actual_card_bm, 1525 _expected_region_bm, 1526 _expected_card_bm); 1527 1528 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1529 1530 Atomic::add(verify_cl.failures(), &_failures); 1531 } 1532 1533 int failures() const { return _failures; } 1534 }; 1535 1536 // Closure that finalizes the liveness counting data. 1537 // Used during the cleanup pause. 1538 // Sets the bits corresponding to the interval [NTAMS, top] 1539 // (which contains the implicitly live objects) in the 1540 // card liveness bitmap. Also sets the bit for each region, 1541 // containing live data, in the region liveness bitmap. 1542 1543 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1544 public: 1545 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1546 BitMap* region_bm, 1547 BitMap* card_bm) : 1548 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1549 1550 bool doHeapRegion(HeapRegion* hr) { 1551 HeapWord* ntams = hr->next_top_at_mark_start(); 1552 HeapWord* top = hr->top(); 1553 1554 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1555 1556 // Mark the allocated-since-marking portion... 1557 if (ntams < top) { 1558 // This definitely means the region has live objects. 1559 set_bit_for_region(hr); 1560 1561 // Now set the bits in the card bitmap for [ntams, top) 1562 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1563 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1564 1565 // Note: if we're looking at the last region in heap - top 1566 // could be actually just beyond the end of the heap; end_idx 1567 // will then correspond to a (non-existent) card that is also 1568 // just beyond the heap. 1569 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1570 // end of object is not card aligned - increment to cover 1571 // all the cards spanned by the object 1572 end_idx += 1; 1573 } 1574 1575 assert(end_idx <= _card_bm->size(), 1576 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1577 end_idx, _card_bm->size()); 1578 assert(start_idx < _card_bm->size(), 1579 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1580 start_idx, _card_bm->size()); 1581 1582 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1583 } 1584 1585 // Set the bit for the region if it contains live data 1586 if (hr->next_marked_bytes() > 0) { 1587 set_bit_for_region(hr); 1588 } 1589 1590 return false; 1591 } 1592 }; 1593 1594 class G1ParFinalCountTask: public AbstractGangTask { 1595 protected: 1596 G1CollectedHeap* _g1h; 1597 ConcurrentMark* _cm; 1598 BitMap* _actual_region_bm; 1599 BitMap* _actual_card_bm; 1600 1601 uint _n_workers; 1602 HeapRegionClaimer _hrclaimer; 1603 1604 public: 1605 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1606 : AbstractGangTask("G1 final counting"), 1607 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1608 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1609 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1610 } 1611 1612 void work(uint worker_id) { 1613 assert(worker_id < _n_workers, "invariant"); 1614 1615 FinalCountDataUpdateClosure final_update_cl(_g1h, 1616 _actual_region_bm, 1617 _actual_card_bm); 1618 1619 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1620 } 1621 }; 1622 1623 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1624 G1CollectedHeap* _g1; 1625 size_t _freed_bytes; 1626 FreeRegionList* _local_cleanup_list; 1627 HeapRegionSetCount _old_regions_removed; 1628 HeapRegionSetCount _humongous_regions_removed; 1629 HRRSCleanupTask* _hrrs_cleanup_task; 1630 1631 public: 1632 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1633 FreeRegionList* local_cleanup_list, 1634 HRRSCleanupTask* hrrs_cleanup_task) : 1635 _g1(g1), 1636 _freed_bytes(0), 1637 _local_cleanup_list(local_cleanup_list), 1638 _old_regions_removed(), 1639 _humongous_regions_removed(), 1640 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1641 1642 size_t freed_bytes() { return _freed_bytes; } 1643 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1644 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1645 1646 bool doHeapRegion(HeapRegion *hr) { 1647 if (hr->is_archive()) { 1648 return false; 1649 } 1650 // We use a claim value of zero here because all regions 1651 // were claimed with value 1 in the FinalCount task. 1652 _g1->reset_gc_time_stamps(hr); 1653 hr->note_end_of_marking(); 1654 1655 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1656 _freed_bytes += hr->used(); 1657 hr->set_containing_set(NULL); 1658 if (hr->is_humongous()) { 1659 _humongous_regions_removed.increment(1u, hr->capacity()); 1660 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1661 } else { 1662 _old_regions_removed.increment(1u, hr->capacity()); 1663 _g1->free_region(hr, _local_cleanup_list, true); 1664 } 1665 } else { 1666 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1667 } 1668 1669 return false; 1670 } 1671 }; 1672 1673 class G1ParNoteEndTask: public AbstractGangTask { 1674 friend class G1NoteEndOfConcMarkClosure; 1675 1676 protected: 1677 G1CollectedHeap* _g1h; 1678 FreeRegionList* _cleanup_list; 1679 HeapRegionClaimer _hrclaimer; 1680 1681 public: 1682 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1683 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1684 } 1685 1686 void work(uint worker_id) { 1687 FreeRegionList local_cleanup_list("Local Cleanup List"); 1688 HRRSCleanupTask hrrs_cleanup_task; 1689 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1690 &hrrs_cleanup_task); 1691 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1692 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1693 1694 // Now update the lists 1695 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1696 { 1697 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1698 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1699 1700 // If we iterate over the global cleanup list at the end of 1701 // cleanup to do this printing we will not guarantee to only 1702 // generate output for the newly-reclaimed regions (the list 1703 // might not be empty at the beginning of cleanup; we might 1704 // still be working on its previous contents). So we do the 1705 // printing here, before we append the new regions to the global 1706 // cleanup list. 1707 1708 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1709 if (hr_printer->is_active()) { 1710 FreeRegionListIterator iter(&local_cleanup_list); 1711 while (iter.more_available()) { 1712 HeapRegion* hr = iter.get_next(); 1713 hr_printer->cleanup(hr); 1714 } 1715 } 1716 1717 _cleanup_list->add_ordered(&local_cleanup_list); 1718 assert(local_cleanup_list.is_empty(), "post-condition"); 1719 1720 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1721 } 1722 } 1723 }; 1724 1725 class G1ParScrubRemSetTask: public AbstractGangTask { 1726 protected: 1727 G1RemSet* _g1rs; 1728 BitMap* _region_bm; 1729 BitMap* _card_bm; 1730 HeapRegionClaimer _hrclaimer; 1731 1732 public: 1733 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1734 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1735 } 1736 1737 void work(uint worker_id) { 1738 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1739 } 1740 1741 }; 1742 1743 void ConcurrentMark::cleanup() { 1744 // world is stopped at this checkpoint 1745 assert(SafepointSynchronize::is_at_safepoint(), 1746 "world should be stopped"); 1747 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1748 1749 // If a full collection has happened, we shouldn't do this. 1750 if (has_aborted()) { 1751 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1752 return; 1753 } 1754 1755 g1h->verify_region_sets_optional(); 1756 1757 if (VerifyDuringGC) { 1758 HandleMark hm; // handle scope 1759 g1h->prepare_for_verify(); 1760 Universe::verify(VerifyOption_G1UsePrevMarking, 1761 " VerifyDuringGC:(before)"); 1762 } 1763 g1h->check_bitmaps("Cleanup Start"); 1764 1765 G1CollectorPolicy* g1p = g1h->g1_policy(); 1766 g1p->record_concurrent_mark_cleanup_start(); 1767 1768 double start = os::elapsedTime(); 1769 1770 HeapRegionRemSet::reset_for_cleanup_tasks(); 1771 1772 // Do counting once more with the world stopped for good measure. 1773 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1774 1775 g1h->workers()->run_task(&g1_par_count_task); 1776 1777 if (VerifyDuringGC) { 1778 // Verify that the counting data accumulated during marking matches 1779 // that calculated by walking the marking bitmap. 1780 1781 // Bitmaps to hold expected values 1782 BitMap expected_region_bm(_region_bm.size(), true); 1783 BitMap expected_card_bm(_card_bm.size(), true); 1784 1785 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1786 &_region_bm, 1787 &_card_bm, 1788 &expected_region_bm, 1789 &expected_card_bm); 1790 1791 g1h->workers()->run_task(&g1_par_verify_task); 1792 1793 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1794 } 1795 1796 size_t start_used_bytes = g1h->used(); 1797 g1h->collector_state()->set_mark_in_progress(false); 1798 1799 double count_end = os::elapsedTime(); 1800 double this_final_counting_time = (count_end - start); 1801 _total_counting_time += this_final_counting_time; 1802 1803 if (G1PrintRegionLivenessInfo) { 1804 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1805 _g1h->heap_region_iterate(&cl); 1806 } 1807 1808 // Install newly created mark bitMap as "prev". 1809 swapMarkBitMaps(); 1810 1811 g1h->reset_gc_time_stamp(); 1812 1813 uint n_workers = _g1h->workers()->active_workers(); 1814 1815 // Note end of marking in all heap regions. 1816 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1817 g1h->workers()->run_task(&g1_par_note_end_task); 1818 g1h->check_gc_time_stamps(); 1819 1820 if (!cleanup_list_is_empty()) { 1821 // The cleanup list is not empty, so we'll have to process it 1822 // concurrently. Notify anyone else that might be wanting free 1823 // regions that there will be more free regions coming soon. 1824 g1h->set_free_regions_coming(); 1825 } 1826 1827 // call below, since it affects the metric by which we sort the heap 1828 // regions. 1829 if (G1ScrubRemSets) { 1830 double rs_scrub_start = os::elapsedTime(); 1831 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1832 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1833 1834 double rs_scrub_end = os::elapsedTime(); 1835 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1836 _total_rs_scrub_time += this_rs_scrub_time; 1837 } 1838 1839 // this will also free any regions totally full of garbage objects, 1840 // and sort the regions. 1841 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1842 1843 // Statistics. 1844 double end = os::elapsedTime(); 1845 _cleanup_times.add((end - start) * 1000.0); 1846 1847 if (G1Log::fine()) { 1848 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1849 } 1850 1851 // Clean up will have freed any regions completely full of garbage. 1852 // Update the soft reference policy with the new heap occupancy. 1853 Universe::update_heap_info_at_gc(); 1854 1855 if (VerifyDuringGC) { 1856 HandleMark hm; // handle scope 1857 g1h->prepare_for_verify(); 1858 Universe::verify(VerifyOption_G1UsePrevMarking, 1859 " VerifyDuringGC:(after)"); 1860 } 1861 1862 g1h->check_bitmaps("Cleanup End"); 1863 1864 g1h->verify_region_sets_optional(); 1865 1866 // We need to make this be a "collection" so any collection pause that 1867 // races with it goes around and waits for completeCleanup to finish. 1868 g1h->increment_total_collections(); 1869 1870 // Clean out dead classes and update Metaspace sizes. 1871 if (ClassUnloadingWithConcurrentMark) { 1872 ClassLoaderDataGraph::purge(); 1873 } 1874 MetaspaceGC::compute_new_size(); 1875 1876 // We reclaimed old regions so we should calculate the sizes to make 1877 // sure we update the old gen/space data. 1878 g1h->g1mm()->update_sizes(); 1879 g1h->allocation_context_stats().update_after_mark(); 1880 1881 g1h->trace_heap_after_concurrent_cycle(); 1882 } 1883 1884 void ConcurrentMark::completeCleanup() { 1885 if (has_aborted()) return; 1886 1887 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1888 1889 _cleanup_list.verify_optional(); 1890 FreeRegionList tmp_free_list("Tmp Free List"); 1891 1892 if (G1ConcRegionFreeingVerbose) { 1893 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1894 "cleanup list has %u entries", 1895 _cleanup_list.length()); 1896 } 1897 1898 // No one else should be accessing the _cleanup_list at this point, 1899 // so it is not necessary to take any locks 1900 while (!_cleanup_list.is_empty()) { 1901 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1902 assert(hr != NULL, "Got NULL from a non-empty list"); 1903 hr->par_clear(); 1904 tmp_free_list.add_ordered(hr); 1905 1906 // Instead of adding one region at a time to the secondary_free_list, 1907 // we accumulate them in the local list and move them a few at a 1908 // time. This also cuts down on the number of notify_all() calls 1909 // we do during this process. We'll also append the local list when 1910 // _cleanup_list is empty (which means we just removed the last 1911 // region from the _cleanup_list). 1912 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1913 _cleanup_list.is_empty()) { 1914 if (G1ConcRegionFreeingVerbose) { 1915 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1916 "appending %u entries to the secondary_free_list, " 1917 "cleanup list still has %u entries", 1918 tmp_free_list.length(), 1919 _cleanup_list.length()); 1920 } 1921 1922 { 1923 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1924 g1h->secondary_free_list_add(&tmp_free_list); 1925 SecondaryFreeList_lock->notify_all(); 1926 } 1927 #ifndef PRODUCT 1928 if (G1StressConcRegionFreeing) { 1929 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1930 os::sleep(Thread::current(), (jlong) 1, false); 1931 } 1932 } 1933 #endif 1934 } 1935 } 1936 assert(tmp_free_list.is_empty(), "post-condition"); 1937 } 1938 1939 // Supporting Object and Oop closures for reference discovery 1940 // and processing in during marking 1941 1942 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1943 HeapWord* addr = (HeapWord*)obj; 1944 return addr != NULL && 1945 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1946 } 1947 1948 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1949 // Uses the CMTask associated with a worker thread (for serial reference 1950 // processing the CMTask for worker 0 is used) to preserve (mark) and 1951 // trace referent objects. 1952 // 1953 // Using the CMTask and embedded local queues avoids having the worker 1954 // threads operating on the global mark stack. This reduces the risk 1955 // of overflowing the stack - which we would rather avoid at this late 1956 // state. Also using the tasks' local queues removes the potential 1957 // of the workers interfering with each other that could occur if 1958 // operating on the global stack. 1959 1960 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1961 ConcurrentMark* _cm; 1962 CMTask* _task; 1963 int _ref_counter_limit; 1964 int _ref_counter; 1965 bool _is_serial; 1966 public: 1967 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1968 _cm(cm), _task(task), _is_serial(is_serial), 1969 _ref_counter_limit(G1RefProcDrainInterval) { 1970 assert(_ref_counter_limit > 0, "sanity"); 1971 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1972 _ref_counter = _ref_counter_limit; 1973 } 1974 1975 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1976 virtual void do_oop( oop* p) { do_oop_work(p); } 1977 1978 template <class T> void do_oop_work(T* p) { 1979 if (!_cm->has_overflown()) { 1980 oop obj = oopDesc::load_decode_heap_oop(p); 1981 _task->deal_with_reference(obj); 1982 _ref_counter--; 1983 1984 if (_ref_counter == 0) { 1985 // We have dealt with _ref_counter_limit references, pushing them 1986 // and objects reachable from them on to the local stack (and 1987 // possibly the global stack). Call CMTask::do_marking_step() to 1988 // process these entries. 1989 // 1990 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1991 // there's nothing more to do (i.e. we're done with the entries that 1992 // were pushed as a result of the CMTask::deal_with_reference() calls 1993 // above) or we overflow. 1994 // 1995 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1996 // flag while there may still be some work to do. (See the comment at 1997 // the beginning of CMTask::do_marking_step() for those conditions - 1998 // one of which is reaching the specified time target.) It is only 1999 // when CMTask::do_marking_step() returns without setting the 2000 // has_aborted() flag that the marking step has completed. 2001 do { 2002 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2003 _task->do_marking_step(mark_step_duration_ms, 2004 false /* do_termination */, 2005 _is_serial); 2006 } while (_task->has_aborted() && !_cm->has_overflown()); 2007 _ref_counter = _ref_counter_limit; 2008 } 2009 } 2010 } 2011 }; 2012 2013 // 'Drain' oop closure used by both serial and parallel reference processing. 2014 // Uses the CMTask associated with a given worker thread (for serial 2015 // reference processing the CMtask for worker 0 is used). Calls the 2016 // do_marking_step routine, with an unbelievably large timeout value, 2017 // to drain the marking data structures of the remaining entries 2018 // added by the 'keep alive' oop closure above. 2019 2020 class G1CMDrainMarkingStackClosure: public VoidClosure { 2021 ConcurrentMark* _cm; 2022 CMTask* _task; 2023 bool _is_serial; 2024 public: 2025 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2026 _cm(cm), _task(task), _is_serial(is_serial) { 2027 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2028 } 2029 2030 void do_void() { 2031 do { 2032 // We call CMTask::do_marking_step() to completely drain the local 2033 // and global marking stacks of entries pushed by the 'keep alive' 2034 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2035 // 2036 // CMTask::do_marking_step() is called in a loop, which we'll exit 2037 // if there's nothing more to do (i.e. we've completely drained the 2038 // entries that were pushed as a a result of applying the 'keep alive' 2039 // closure to the entries on the discovered ref lists) or we overflow 2040 // the global marking stack. 2041 // 2042 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2043 // flag while there may still be some work to do. (See the comment at 2044 // the beginning of CMTask::do_marking_step() for those conditions - 2045 // one of which is reaching the specified time target.) It is only 2046 // when CMTask::do_marking_step() returns without setting the 2047 // has_aborted() flag that the marking step has completed. 2048 2049 _task->do_marking_step(1000000000.0 /* something very large */, 2050 true /* do_termination */, 2051 _is_serial); 2052 } while (_task->has_aborted() && !_cm->has_overflown()); 2053 } 2054 }; 2055 2056 // Implementation of AbstractRefProcTaskExecutor for parallel 2057 // reference processing at the end of G1 concurrent marking 2058 2059 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2060 private: 2061 G1CollectedHeap* _g1h; 2062 ConcurrentMark* _cm; 2063 WorkGang* _workers; 2064 uint _active_workers; 2065 2066 public: 2067 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2068 ConcurrentMark* cm, 2069 WorkGang* workers, 2070 uint n_workers) : 2071 _g1h(g1h), _cm(cm), 2072 _workers(workers), _active_workers(n_workers) { } 2073 2074 // Executes the given task using concurrent marking worker threads. 2075 virtual void execute(ProcessTask& task); 2076 virtual void execute(EnqueueTask& task); 2077 }; 2078 2079 class G1CMRefProcTaskProxy: public AbstractGangTask { 2080 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2081 ProcessTask& _proc_task; 2082 G1CollectedHeap* _g1h; 2083 ConcurrentMark* _cm; 2084 2085 public: 2086 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2087 G1CollectedHeap* g1h, 2088 ConcurrentMark* cm) : 2089 AbstractGangTask("Process reference objects in parallel"), 2090 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2091 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2092 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2093 } 2094 2095 virtual void work(uint worker_id) { 2096 ResourceMark rm; 2097 HandleMark hm; 2098 CMTask* task = _cm->task(worker_id); 2099 G1CMIsAliveClosure g1_is_alive(_g1h); 2100 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2101 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2102 2103 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2104 } 2105 }; 2106 2107 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2108 assert(_workers != NULL, "Need parallel worker threads."); 2109 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2110 2111 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2112 2113 // We need to reset the concurrency level before each 2114 // proxy task execution, so that the termination protocol 2115 // and overflow handling in CMTask::do_marking_step() knows 2116 // how many workers to wait for. 2117 _cm->set_concurrency(_active_workers); 2118 _workers->run_task(&proc_task_proxy); 2119 } 2120 2121 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2122 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2123 EnqueueTask& _enq_task; 2124 2125 public: 2126 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2127 AbstractGangTask("Enqueue reference objects in parallel"), 2128 _enq_task(enq_task) { } 2129 2130 virtual void work(uint worker_id) { 2131 _enq_task.work(worker_id); 2132 } 2133 }; 2134 2135 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2136 assert(_workers != NULL, "Need parallel worker threads."); 2137 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2138 2139 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2140 2141 // Not strictly necessary but... 2142 // 2143 // We need to reset the concurrency level before each 2144 // proxy task execution, so that the termination protocol 2145 // and overflow handling in CMTask::do_marking_step() knows 2146 // how many workers to wait for. 2147 _cm->set_concurrency(_active_workers); 2148 _workers->run_task(&enq_task_proxy); 2149 } 2150 2151 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2152 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2153 } 2154 2155 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2156 if (has_overflown()) { 2157 // Skip processing the discovered references if we have 2158 // overflown the global marking stack. Reference objects 2159 // only get discovered once so it is OK to not 2160 // de-populate the discovered reference lists. We could have, 2161 // but the only benefit would be that, when marking restarts, 2162 // less reference objects are discovered. 2163 return; 2164 } 2165 2166 ResourceMark rm; 2167 HandleMark hm; 2168 2169 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2170 2171 // Is alive closure. 2172 G1CMIsAliveClosure g1_is_alive(g1h); 2173 2174 // Inner scope to exclude the cleaning of the string and symbol 2175 // tables from the displayed time. 2176 { 2177 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2178 2179 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2180 2181 // See the comment in G1CollectedHeap::ref_processing_init() 2182 // about how reference processing currently works in G1. 2183 2184 // Set the soft reference policy 2185 rp->setup_policy(clear_all_soft_refs); 2186 assert(_markStack.isEmpty(), "mark stack should be empty"); 2187 2188 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2189 // in serial reference processing. Note these closures are also 2190 // used for serially processing (by the the current thread) the 2191 // JNI references during parallel reference processing. 2192 // 2193 // These closures do not need to synchronize with the worker 2194 // threads involved in parallel reference processing as these 2195 // instances are executed serially by the current thread (e.g. 2196 // reference processing is not multi-threaded and is thus 2197 // performed by the current thread instead of a gang worker). 2198 // 2199 // The gang tasks involved in parallel reference processing create 2200 // their own instances of these closures, which do their own 2201 // synchronization among themselves. 2202 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2203 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2204 2205 // We need at least one active thread. If reference processing 2206 // is not multi-threaded we use the current (VMThread) thread, 2207 // otherwise we use the work gang from the G1CollectedHeap and 2208 // we utilize all the worker threads we can. 2209 bool processing_is_mt = rp->processing_is_mt(); 2210 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2211 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2212 2213 // Parallel processing task executor. 2214 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2215 g1h->workers(), active_workers); 2216 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2217 2218 // Set the concurrency level. The phase was already set prior to 2219 // executing the remark task. 2220 set_concurrency(active_workers); 2221 2222 // Set the degree of MT processing here. If the discovery was done MT, 2223 // the number of threads involved during discovery could differ from 2224 // the number of active workers. This is OK as long as the discovered 2225 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2226 rp->set_active_mt_degree(active_workers); 2227 2228 // Process the weak references. 2229 const ReferenceProcessorStats& stats = 2230 rp->process_discovered_references(&g1_is_alive, 2231 &g1_keep_alive, 2232 &g1_drain_mark_stack, 2233 executor, 2234 g1h->gc_timer_cm()); 2235 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2236 2237 // The do_oop work routines of the keep_alive and drain_marking_stack 2238 // oop closures will set the has_overflown flag if we overflow the 2239 // global marking stack. 2240 2241 assert(_markStack.overflow() || _markStack.isEmpty(), 2242 "mark stack should be empty (unless it overflowed)"); 2243 2244 if (_markStack.overflow()) { 2245 // This should have been done already when we tried to push an 2246 // entry on to the global mark stack. But let's do it again. 2247 set_has_overflown(); 2248 } 2249 2250 assert(rp->num_q() == active_workers, "why not"); 2251 2252 rp->enqueue_discovered_references(executor); 2253 2254 rp->verify_no_references_recorded(); 2255 assert(!rp->discovery_enabled(), "Post condition"); 2256 } 2257 2258 if (has_overflown()) { 2259 // We can not trust g1_is_alive if the marking stack overflowed 2260 return; 2261 } 2262 2263 assert(_markStack.isEmpty(), "Marking should have completed"); 2264 2265 // Unload Klasses, String, Symbols, Code Cache, etc. 2266 { 2267 G1CMTraceTime trace("Unloading", G1Log::finer()); 2268 2269 if (ClassUnloadingWithConcurrentMark) { 2270 bool purged_classes; 2271 2272 { 2273 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2274 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2275 } 2276 2277 { 2278 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2279 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2280 } 2281 } 2282 2283 if (G1StringDedup::is_enabled()) { 2284 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2285 G1StringDedup::unlink(&g1_is_alive); 2286 } 2287 } 2288 } 2289 2290 void ConcurrentMark::swapMarkBitMaps() { 2291 CMBitMapRO* temp = _prevMarkBitMap; 2292 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2293 _nextMarkBitMap = (CMBitMap*) temp; 2294 } 2295 2296 // Closure for marking entries in SATB buffers. 2297 class CMSATBBufferClosure : public SATBBufferClosure { 2298 private: 2299 CMTask* _task; 2300 G1CollectedHeap* _g1h; 2301 2302 // This is very similar to CMTask::deal_with_reference, but with 2303 // more relaxed requirements for the argument, so this must be more 2304 // circumspect about treating the argument as an object. 2305 void do_entry(void* entry) const { 2306 _task->increment_refs_reached(); 2307 HeapRegion* hr = _g1h->heap_region_containing(entry); 2308 if (entry < hr->next_top_at_mark_start()) { 2309 // Until we get here, we don't know whether entry refers to a valid 2310 // object; it could instead have been a stale reference. 2311 oop obj = static_cast<oop>(entry); 2312 assert(obj->is_oop(true /* ignore mark word */), 2313 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2314 _task->make_reference_grey(obj, hr); 2315 } 2316 } 2317 2318 public: 2319 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2320 : _task(task), _g1h(g1h) { } 2321 2322 virtual void do_buffer(void** buffer, size_t size) { 2323 for (size_t i = 0; i < size; ++i) { 2324 do_entry(buffer[i]); 2325 } 2326 } 2327 }; 2328 2329 class G1RemarkThreadsClosure : public ThreadClosure { 2330 CMSATBBufferClosure _cm_satb_cl; 2331 G1CMOopClosure _cm_cl; 2332 MarkingCodeBlobClosure _code_cl; 2333 int _thread_parity; 2334 2335 public: 2336 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2337 _cm_satb_cl(task, g1h), 2338 _cm_cl(g1h, g1h->concurrent_mark(), task), 2339 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2340 _thread_parity(Threads::thread_claim_parity()) {} 2341 2342 void do_thread(Thread* thread) { 2343 if (thread->is_Java_thread()) { 2344 if (thread->claim_oops_do(true, _thread_parity)) { 2345 JavaThread* jt = (JavaThread*)thread; 2346 2347 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2348 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2349 // * Alive if on the stack of an executing method 2350 // * Weakly reachable otherwise 2351 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2352 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2353 jt->nmethods_do(&_code_cl); 2354 2355 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2356 } 2357 } else if (thread->is_VM_thread()) { 2358 if (thread->claim_oops_do(true, _thread_parity)) { 2359 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2360 } 2361 } 2362 } 2363 }; 2364 2365 class CMRemarkTask: public AbstractGangTask { 2366 private: 2367 ConcurrentMark* _cm; 2368 public: 2369 void work(uint worker_id) { 2370 // Since all available tasks are actually started, we should 2371 // only proceed if we're supposed to be active. 2372 if (worker_id < _cm->active_tasks()) { 2373 CMTask* task = _cm->task(worker_id); 2374 task->record_start_time(); 2375 { 2376 ResourceMark rm; 2377 HandleMark hm; 2378 2379 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2380 Threads::threads_do(&threads_f); 2381 } 2382 2383 do { 2384 task->do_marking_step(1000000000.0 /* something very large */, 2385 true /* do_termination */, 2386 false /* is_serial */); 2387 } while (task->has_aborted() && !_cm->has_overflown()); 2388 // If we overflow, then we do not want to restart. We instead 2389 // want to abort remark and do concurrent marking again. 2390 task->record_end_time(); 2391 } 2392 } 2393 2394 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2395 AbstractGangTask("Par Remark"), _cm(cm) { 2396 _cm->terminator()->reset_for_reuse(active_workers); 2397 } 2398 }; 2399 2400 void ConcurrentMark::checkpointRootsFinalWork() { 2401 ResourceMark rm; 2402 HandleMark hm; 2403 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2404 2405 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2406 2407 g1h->ensure_parsability(false); 2408 2409 // this is remark, so we'll use up all active threads 2410 uint active_workers = g1h->workers()->active_workers(); 2411 set_concurrency_and_phase(active_workers, false /* concurrent */); 2412 // Leave _parallel_marking_threads at it's 2413 // value originally calculated in the ConcurrentMark 2414 // constructor and pass values of the active workers 2415 // through the gang in the task. 2416 2417 { 2418 StrongRootsScope srs(active_workers); 2419 2420 CMRemarkTask remarkTask(this, active_workers); 2421 // We will start all available threads, even if we decide that the 2422 // active_workers will be fewer. The extra ones will just bail out 2423 // immediately. 2424 g1h->workers()->run_task(&remarkTask); 2425 } 2426 2427 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2428 guarantee(has_overflown() || 2429 satb_mq_set.completed_buffers_num() == 0, 2430 "Invariant: has_overflown = %s, num buffers = %d", 2431 BOOL_TO_STR(has_overflown()), 2432 satb_mq_set.completed_buffers_num()); 2433 2434 print_stats(); 2435 } 2436 2437 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2438 // Note we are overriding the read-only view of the prev map here, via 2439 // the cast. 2440 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2441 } 2442 2443 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2444 _nextMarkBitMap->clearRange(mr); 2445 } 2446 2447 HeapRegion* 2448 ConcurrentMark::claim_region(uint worker_id) { 2449 // "checkpoint" the finger 2450 HeapWord* finger = _finger; 2451 2452 // _heap_end will not change underneath our feet; it only changes at 2453 // yield points. 2454 while (finger < _heap_end) { 2455 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2456 2457 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2458 2459 // Above heap_region_containing may return NULL as we always scan claim 2460 // until the end of the heap. In this case, just jump to the next region. 2461 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2462 2463 // Is the gap between reading the finger and doing the CAS too long? 2464 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2465 if (res == finger && curr_region != NULL) { 2466 // we succeeded 2467 HeapWord* bottom = curr_region->bottom(); 2468 HeapWord* limit = curr_region->next_top_at_mark_start(); 2469 2470 // notice that _finger == end cannot be guaranteed here since, 2471 // someone else might have moved the finger even further 2472 assert(_finger >= end, "the finger should have moved forward"); 2473 2474 if (limit > bottom) { 2475 return curr_region; 2476 } else { 2477 assert(limit == bottom, 2478 "the region limit should be at bottom"); 2479 // we return NULL and the caller should try calling 2480 // claim_region() again. 2481 return NULL; 2482 } 2483 } else { 2484 assert(_finger > finger, "the finger should have moved forward"); 2485 // read it again 2486 finger = _finger; 2487 } 2488 } 2489 2490 return NULL; 2491 } 2492 2493 #ifndef PRODUCT 2494 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2495 private: 2496 G1CollectedHeap* _g1h; 2497 const char* _phase; 2498 int _info; 2499 2500 public: 2501 VerifyNoCSetOops(const char* phase, int info = -1) : 2502 _g1h(G1CollectedHeap::heap()), 2503 _phase(phase), 2504 _info(info) 2505 { } 2506 2507 void operator()(oop obj) const { 2508 guarantee(obj->is_oop(), 2509 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2510 p2i(obj), _phase, _info); 2511 guarantee(!_g1h->obj_in_cs(obj), 2512 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2513 p2i(obj), _phase, _info); 2514 } 2515 }; 2516 2517 void ConcurrentMark::verify_no_cset_oops() { 2518 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2519 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2520 return; 2521 } 2522 2523 // Verify entries on the global mark stack 2524 _markStack.iterate(VerifyNoCSetOops("Stack")); 2525 2526 // Verify entries on the task queues 2527 for (uint i = 0; i < _max_worker_id; ++i) { 2528 CMTaskQueue* queue = _task_queues->queue(i); 2529 queue->iterate(VerifyNoCSetOops("Queue", i)); 2530 } 2531 2532 // Verify the global finger 2533 HeapWord* global_finger = finger(); 2534 if (global_finger != NULL && global_finger < _heap_end) { 2535 // Since we always iterate over all regions, we might get a NULL HeapRegion 2536 // here. 2537 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2538 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2539 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2540 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2541 } 2542 2543 // Verify the task fingers 2544 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2545 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2546 CMTask* task = _tasks[i]; 2547 HeapWord* task_finger = task->finger(); 2548 if (task_finger != NULL && task_finger < _heap_end) { 2549 // See above note on the global finger verification. 2550 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2551 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2552 !task_hr->in_collection_set(), 2553 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2554 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2555 } 2556 } 2557 } 2558 #endif // PRODUCT 2559 2560 // Aggregate the counting data that was constructed concurrently 2561 // with marking. 2562 class AggregateCountDataHRClosure: public HeapRegionClosure { 2563 G1CollectedHeap* _g1h; 2564 ConcurrentMark* _cm; 2565 CardTableModRefBS* _ct_bs; 2566 BitMap* _cm_card_bm; 2567 uint _max_worker_id; 2568 2569 public: 2570 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2571 BitMap* cm_card_bm, 2572 uint max_worker_id) : 2573 _g1h(g1h), _cm(g1h->concurrent_mark()), 2574 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2575 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2576 2577 bool doHeapRegion(HeapRegion* hr) { 2578 HeapWord* start = hr->bottom(); 2579 HeapWord* limit = hr->next_top_at_mark_start(); 2580 HeapWord* end = hr->end(); 2581 2582 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2583 "Preconditions not met - " 2584 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2585 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2586 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2587 2588 assert(hr->next_marked_bytes() == 0, "Precondition"); 2589 2590 if (start == limit) { 2591 // NTAMS of this region has not been set so nothing to do. 2592 return false; 2593 } 2594 2595 // 'start' should be in the heap. 2596 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2597 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2598 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2599 2600 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2601 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2602 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2603 2604 // If ntams is not card aligned then we bump card bitmap index 2605 // for limit so that we get the all the cards spanned by 2606 // the object ending at ntams. 2607 // Note: if this is the last region in the heap then ntams 2608 // could be actually just beyond the end of the the heap; 2609 // limit_idx will then correspond to a (non-existent) card 2610 // that is also outside the heap. 2611 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2612 limit_idx += 1; 2613 } 2614 2615 assert(limit_idx <= end_idx, "or else use atomics"); 2616 2617 // Aggregate the "stripe" in the count data associated with hr. 2618 uint hrm_index = hr->hrm_index(); 2619 size_t marked_bytes = 0; 2620 2621 for (uint i = 0; i < _max_worker_id; i += 1) { 2622 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2623 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2624 2625 // Fetch the marked_bytes in this region for task i and 2626 // add it to the running total for this region. 2627 marked_bytes += marked_bytes_array[hrm_index]; 2628 2629 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2630 // into the global card bitmap. 2631 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2632 2633 while (scan_idx < limit_idx) { 2634 assert(task_card_bm->at(scan_idx) == true, "should be"); 2635 _cm_card_bm->set_bit(scan_idx); 2636 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2637 2638 // BitMap::get_next_one_offset() can handle the case when 2639 // its left_offset parameter is greater than its right_offset 2640 // parameter. It does, however, have an early exit if 2641 // left_offset == right_offset. So let's limit the value 2642 // passed in for left offset here. 2643 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2644 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2645 } 2646 } 2647 2648 // Update the marked bytes for this region. 2649 hr->add_to_marked_bytes(marked_bytes); 2650 2651 // Next heap region 2652 return false; 2653 } 2654 }; 2655 2656 class G1AggregateCountDataTask: public AbstractGangTask { 2657 protected: 2658 G1CollectedHeap* _g1h; 2659 ConcurrentMark* _cm; 2660 BitMap* _cm_card_bm; 2661 uint _max_worker_id; 2662 uint _active_workers; 2663 HeapRegionClaimer _hrclaimer; 2664 2665 public: 2666 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2667 ConcurrentMark* cm, 2668 BitMap* cm_card_bm, 2669 uint max_worker_id, 2670 uint n_workers) : 2671 AbstractGangTask("Count Aggregation"), 2672 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2673 _max_worker_id(max_worker_id), 2674 _active_workers(n_workers), 2675 _hrclaimer(_active_workers) { 2676 } 2677 2678 void work(uint worker_id) { 2679 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2680 2681 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2682 } 2683 }; 2684 2685 2686 void ConcurrentMark::aggregate_count_data() { 2687 uint n_workers = _g1h->workers()->active_workers(); 2688 2689 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2690 _max_worker_id, n_workers); 2691 2692 _g1h->workers()->run_task(&g1_par_agg_task); 2693 } 2694 2695 // Clear the per-worker arrays used to store the per-region counting data 2696 void ConcurrentMark::clear_all_count_data() { 2697 // Clear the global card bitmap - it will be filled during 2698 // liveness count aggregation (during remark) and the 2699 // final counting task. 2700 _card_bm.clear(); 2701 2702 // Clear the global region bitmap - it will be filled as part 2703 // of the final counting task. 2704 _region_bm.clear(); 2705 2706 uint max_regions = _g1h->max_regions(); 2707 assert(_max_worker_id > 0, "uninitialized"); 2708 2709 for (uint i = 0; i < _max_worker_id; i += 1) { 2710 BitMap* task_card_bm = count_card_bitmap_for(i); 2711 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2712 2713 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2714 assert(marked_bytes_array != NULL, "uninitialized"); 2715 2716 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2717 task_card_bm->clear(); 2718 } 2719 } 2720 2721 void ConcurrentMark::print_stats() { 2722 if (G1MarkingVerboseLevel > 0) { 2723 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2724 for (size_t i = 0; i < _active_tasks; ++i) { 2725 _tasks[i]->print_stats(); 2726 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2727 } 2728 } 2729 } 2730 2731 // abandon current marking iteration due to a Full GC 2732 void ConcurrentMark::abort() { 2733 if (!cmThread()->during_cycle() || _has_aborted) { 2734 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2735 return; 2736 } 2737 2738 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2739 // concurrent bitmap clearing. 2740 _nextMarkBitMap->clearAll(); 2741 2742 // Note we cannot clear the previous marking bitmap here 2743 // since VerifyDuringGC verifies the objects marked during 2744 // a full GC against the previous bitmap. 2745 2746 // Clear the liveness counting data 2747 clear_all_count_data(); 2748 // Empty mark stack 2749 reset_marking_state(); 2750 for (uint i = 0; i < _max_worker_id; ++i) { 2751 _tasks[i]->clear_region_fields(); 2752 } 2753 _first_overflow_barrier_sync.abort(); 2754 _second_overflow_barrier_sync.abort(); 2755 _has_aborted = true; 2756 2757 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2758 satb_mq_set.abandon_partial_marking(); 2759 // This can be called either during or outside marking, we'll read 2760 // the expected_active value from the SATB queue set. 2761 satb_mq_set.set_active_all_threads( 2762 false, /* new active value */ 2763 satb_mq_set.is_active() /* expected_active */); 2764 2765 _g1h->trace_heap_after_concurrent_cycle(); 2766 _g1h->register_concurrent_cycle_end(); 2767 } 2768 2769 static void print_ms_time_info(const char* prefix, const char* name, 2770 NumberSeq& ns) { 2771 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2772 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2773 if (ns.num() > 0) { 2774 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2775 prefix, ns.sd(), ns.maximum()); 2776 } 2777 } 2778 2779 void ConcurrentMark::print_summary_info() { 2780 gclog_or_tty->print_cr(" Concurrent marking:"); 2781 print_ms_time_info(" ", "init marks", _init_times); 2782 print_ms_time_info(" ", "remarks", _remark_times); 2783 { 2784 print_ms_time_info(" ", "final marks", _remark_mark_times); 2785 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2786 2787 } 2788 print_ms_time_info(" ", "cleanups", _cleanup_times); 2789 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2790 _total_counting_time, 2791 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2792 (double)_cleanup_times.num() 2793 : 0.0)); 2794 if (G1ScrubRemSets) { 2795 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2796 _total_rs_scrub_time, 2797 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2798 (double)_cleanup_times.num() 2799 : 0.0)); 2800 } 2801 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2802 (_init_times.sum() + _remark_times.sum() + 2803 _cleanup_times.sum())/1000.0); 2804 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2805 "(%8.2f s marking).", 2806 cmThread()->vtime_accum(), 2807 cmThread()->vtime_mark_accum()); 2808 } 2809 2810 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2811 _parallel_workers->print_worker_threads_on(st); 2812 } 2813 2814 void ConcurrentMark::print_on_error(outputStream* st) const { 2815 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2816 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2817 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2818 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2819 } 2820 2821 // We take a break if someone is trying to stop the world. 2822 bool ConcurrentMark::do_yield_check(uint worker_id) { 2823 if (SuspendibleThreadSet::should_yield()) { 2824 if (worker_id == 0) { 2825 _g1h->g1_policy()->record_concurrent_pause(); 2826 } 2827 SuspendibleThreadSet::yield(); 2828 return true; 2829 } else { 2830 return false; 2831 } 2832 } 2833 2834 // Closure for iteration over bitmaps 2835 class CMBitMapClosure : public BitMapClosure { 2836 private: 2837 // the bitmap that is being iterated over 2838 CMBitMap* _nextMarkBitMap; 2839 ConcurrentMark* _cm; 2840 CMTask* _task; 2841 2842 public: 2843 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2844 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2845 2846 bool do_bit(size_t offset) { 2847 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2848 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2849 assert( addr < _cm->finger(), "invariant"); 2850 assert(addr >= _task->finger(), "invariant"); 2851 2852 // We move that task's local finger along. 2853 _task->move_finger_to(addr); 2854 2855 _task->scan_object(oop(addr)); 2856 // we only partially drain the local queue and global stack 2857 _task->drain_local_queue(true); 2858 _task->drain_global_stack(true); 2859 2860 // if the has_aborted flag has been raised, we need to bail out of 2861 // the iteration 2862 return !_task->has_aborted(); 2863 } 2864 }; 2865 2866 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2867 ReferenceProcessor* result = NULL; 2868 if (G1UseConcMarkReferenceProcessing) { 2869 result = g1h->ref_processor_cm(); 2870 assert(result != NULL, "should not be NULL"); 2871 } 2872 return result; 2873 } 2874 2875 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2876 ConcurrentMark* cm, 2877 CMTask* task) 2878 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2879 _g1h(g1h), _cm(cm), _task(task) 2880 { } 2881 2882 void CMTask::setup_for_region(HeapRegion* hr) { 2883 assert(hr != NULL, 2884 "claim_region() should have filtered out NULL regions"); 2885 _curr_region = hr; 2886 _finger = hr->bottom(); 2887 update_region_limit(); 2888 } 2889 2890 void CMTask::update_region_limit() { 2891 HeapRegion* hr = _curr_region; 2892 HeapWord* bottom = hr->bottom(); 2893 HeapWord* limit = hr->next_top_at_mark_start(); 2894 2895 if (limit == bottom) { 2896 // The region was collected underneath our feet. 2897 // We set the finger to bottom to ensure that the bitmap 2898 // iteration that will follow this will not do anything. 2899 // (this is not a condition that holds when we set the region up, 2900 // as the region is not supposed to be empty in the first place) 2901 _finger = bottom; 2902 } else if (limit >= _region_limit) { 2903 assert(limit >= _finger, "peace of mind"); 2904 } else { 2905 assert(limit < _region_limit, "only way to get here"); 2906 // This can happen under some pretty unusual circumstances. An 2907 // evacuation pause empties the region underneath our feet (NTAMS 2908 // at bottom). We then do some allocation in the region (NTAMS 2909 // stays at bottom), followed by the region being used as a GC 2910 // alloc region (NTAMS will move to top() and the objects 2911 // originally below it will be grayed). All objects now marked in 2912 // the region are explicitly grayed, if below the global finger, 2913 // and we do not need in fact to scan anything else. So, we simply 2914 // set _finger to be limit to ensure that the bitmap iteration 2915 // doesn't do anything. 2916 _finger = limit; 2917 } 2918 2919 _region_limit = limit; 2920 } 2921 2922 void CMTask::giveup_current_region() { 2923 assert(_curr_region != NULL, "invariant"); 2924 clear_region_fields(); 2925 } 2926 2927 void CMTask::clear_region_fields() { 2928 // Values for these three fields that indicate that we're not 2929 // holding on to a region. 2930 _curr_region = NULL; 2931 _finger = NULL; 2932 _region_limit = NULL; 2933 } 2934 2935 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2936 if (cm_oop_closure == NULL) { 2937 assert(_cm_oop_closure != NULL, "invariant"); 2938 } else { 2939 assert(_cm_oop_closure == NULL, "invariant"); 2940 } 2941 _cm_oop_closure = cm_oop_closure; 2942 } 2943 2944 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2945 guarantee(nextMarkBitMap != NULL, "invariant"); 2946 _nextMarkBitMap = nextMarkBitMap; 2947 clear_region_fields(); 2948 2949 _calls = 0; 2950 _elapsed_time_ms = 0.0; 2951 _termination_time_ms = 0.0; 2952 _termination_start_time_ms = 0.0; 2953 } 2954 2955 bool CMTask::should_exit_termination() { 2956 regular_clock_call(); 2957 // This is called when we are in the termination protocol. We should 2958 // quit if, for some reason, this task wants to abort or the global 2959 // stack is not empty (this means that we can get work from it). 2960 return !_cm->mark_stack_empty() || has_aborted(); 2961 } 2962 2963 void CMTask::reached_limit() { 2964 assert(_words_scanned >= _words_scanned_limit || 2965 _refs_reached >= _refs_reached_limit , 2966 "shouldn't have been called otherwise"); 2967 regular_clock_call(); 2968 } 2969 2970 void CMTask::regular_clock_call() { 2971 if (has_aborted()) return; 2972 2973 // First, we need to recalculate the words scanned and refs reached 2974 // limits for the next clock call. 2975 recalculate_limits(); 2976 2977 // During the regular clock call we do the following 2978 2979 // (1) If an overflow has been flagged, then we abort. 2980 if (_cm->has_overflown()) { 2981 set_has_aborted(); 2982 return; 2983 } 2984 2985 // If we are not concurrent (i.e. we're doing remark) we don't need 2986 // to check anything else. The other steps are only needed during 2987 // the concurrent marking phase. 2988 if (!concurrent()) return; 2989 2990 // (2) If marking has been aborted for Full GC, then we also abort. 2991 if (_cm->has_aborted()) { 2992 set_has_aborted(); 2993 return; 2994 } 2995 2996 double curr_time_ms = os::elapsedVTime() * 1000.0; 2997 2998 // (4) We check whether we should yield. If we have to, then we abort. 2999 if (SuspendibleThreadSet::should_yield()) { 3000 // We should yield. To do this we abort the task. The caller is 3001 // responsible for yielding. 3002 set_has_aborted(); 3003 return; 3004 } 3005 3006 // (5) We check whether we've reached our time quota. If we have, 3007 // then we abort. 3008 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3009 if (elapsed_time_ms > _time_target_ms) { 3010 set_has_aborted(); 3011 _has_timed_out = true; 3012 return; 3013 } 3014 3015 // (6) Finally, we check whether there are enough completed STAB 3016 // buffers available for processing. If there are, we abort. 3017 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3018 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3019 // we do need to process SATB buffers, we'll abort and restart 3020 // the marking task to do so 3021 set_has_aborted(); 3022 return; 3023 } 3024 } 3025 3026 void CMTask::recalculate_limits() { 3027 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3028 _words_scanned_limit = _real_words_scanned_limit; 3029 3030 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3031 _refs_reached_limit = _real_refs_reached_limit; 3032 } 3033 3034 void CMTask::decrease_limits() { 3035 // This is called when we believe that we're going to do an infrequent 3036 // operation which will increase the per byte scanned cost (i.e. move 3037 // entries to/from the global stack). It basically tries to decrease the 3038 // scanning limit so that the clock is called earlier. 3039 3040 _words_scanned_limit = _real_words_scanned_limit - 3041 3 * words_scanned_period / 4; 3042 _refs_reached_limit = _real_refs_reached_limit - 3043 3 * refs_reached_period / 4; 3044 } 3045 3046 void CMTask::move_entries_to_global_stack() { 3047 // local array where we'll store the entries that will be popped 3048 // from the local queue 3049 oop buffer[global_stack_transfer_size]; 3050 3051 int n = 0; 3052 oop obj; 3053 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3054 buffer[n] = obj; 3055 ++n; 3056 } 3057 3058 if (n > 0) { 3059 // we popped at least one entry from the local queue 3060 3061 if (!_cm->mark_stack_push(buffer, n)) { 3062 set_has_aborted(); 3063 } 3064 } 3065 3066 // this operation was quite expensive, so decrease the limits 3067 decrease_limits(); 3068 } 3069 3070 void CMTask::get_entries_from_global_stack() { 3071 // local array where we'll store the entries that will be popped 3072 // from the global stack. 3073 oop buffer[global_stack_transfer_size]; 3074 int n; 3075 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3076 assert(n <= global_stack_transfer_size, 3077 "we should not pop more than the given limit"); 3078 if (n > 0) { 3079 // yes, we did actually pop at least one entry 3080 for (int i = 0; i < n; ++i) { 3081 bool success = _task_queue->push(buffer[i]); 3082 // We only call this when the local queue is empty or under a 3083 // given target limit. So, we do not expect this push to fail. 3084 assert(success, "invariant"); 3085 } 3086 } 3087 3088 // this operation was quite expensive, so decrease the limits 3089 decrease_limits(); 3090 } 3091 3092 void CMTask::drain_local_queue(bool partially) { 3093 if (has_aborted()) return; 3094 3095 // Decide what the target size is, depending whether we're going to 3096 // drain it partially (so that other tasks can steal if they run out 3097 // of things to do) or totally (at the very end). 3098 size_t target_size; 3099 if (partially) { 3100 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3101 } else { 3102 target_size = 0; 3103 } 3104 3105 if (_task_queue->size() > target_size) { 3106 oop obj; 3107 bool ret = _task_queue->pop_local(obj); 3108 while (ret) { 3109 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3110 assert(!_g1h->is_on_master_free_list( 3111 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3112 3113 scan_object(obj); 3114 3115 if (_task_queue->size() <= target_size || has_aborted()) { 3116 ret = false; 3117 } else { 3118 ret = _task_queue->pop_local(obj); 3119 } 3120 } 3121 } 3122 } 3123 3124 void CMTask::drain_global_stack(bool partially) { 3125 if (has_aborted()) return; 3126 3127 // We have a policy to drain the local queue before we attempt to 3128 // drain the global stack. 3129 assert(partially || _task_queue->size() == 0, "invariant"); 3130 3131 // Decide what the target size is, depending whether we're going to 3132 // drain it partially (so that other tasks can steal if they run out 3133 // of things to do) or totally (at the very end). Notice that, 3134 // because we move entries from the global stack in chunks or 3135 // because another task might be doing the same, we might in fact 3136 // drop below the target. But, this is not a problem. 3137 size_t target_size; 3138 if (partially) { 3139 target_size = _cm->partial_mark_stack_size_target(); 3140 } else { 3141 target_size = 0; 3142 } 3143 3144 if (_cm->mark_stack_size() > target_size) { 3145 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3146 get_entries_from_global_stack(); 3147 drain_local_queue(partially); 3148 } 3149 } 3150 } 3151 3152 // SATB Queue has several assumptions on whether to call the par or 3153 // non-par versions of the methods. this is why some of the code is 3154 // replicated. We should really get rid of the single-threaded version 3155 // of the code to simplify things. 3156 void CMTask::drain_satb_buffers() { 3157 if (has_aborted()) return; 3158 3159 // We set this so that the regular clock knows that we're in the 3160 // middle of draining buffers and doesn't set the abort flag when it 3161 // notices that SATB buffers are available for draining. It'd be 3162 // very counter productive if it did that. :-) 3163 _draining_satb_buffers = true; 3164 3165 CMSATBBufferClosure satb_cl(this, _g1h); 3166 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3167 3168 // This keeps claiming and applying the closure to completed buffers 3169 // until we run out of buffers or we need to abort. 3170 while (!has_aborted() && 3171 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3172 regular_clock_call(); 3173 } 3174 3175 _draining_satb_buffers = false; 3176 3177 assert(has_aborted() || 3178 concurrent() || 3179 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3180 3181 // again, this was a potentially expensive operation, decrease the 3182 // limits to get the regular clock call early 3183 decrease_limits(); 3184 } 3185 3186 void CMTask::print_stats() { 3187 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3188 _worker_id, _calls); 3189 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3190 _elapsed_time_ms, _termination_time_ms); 3191 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3192 _step_times_ms.num(), _step_times_ms.avg(), 3193 _step_times_ms.sd()); 3194 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3195 _step_times_ms.maximum(), _step_times_ms.sum()); 3196 } 3197 3198 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3199 return _task_queues->steal(worker_id, hash_seed, obj); 3200 } 3201 3202 /***************************************************************************** 3203 3204 The do_marking_step(time_target_ms, ...) method is the building 3205 block of the parallel marking framework. It can be called in parallel 3206 with other invocations of do_marking_step() on different tasks 3207 (but only one per task, obviously) and concurrently with the 3208 mutator threads, or during remark, hence it eliminates the need 3209 for two versions of the code. When called during remark, it will 3210 pick up from where the task left off during the concurrent marking 3211 phase. Interestingly, tasks are also claimable during evacuation 3212 pauses too, since do_marking_step() ensures that it aborts before 3213 it needs to yield. 3214 3215 The data structures that it uses to do marking work are the 3216 following: 3217 3218 (1) Marking Bitmap. If there are gray objects that appear only 3219 on the bitmap (this happens either when dealing with an overflow 3220 or when the initial marking phase has simply marked the roots 3221 and didn't push them on the stack), then tasks claim heap 3222 regions whose bitmap they then scan to find gray objects. A 3223 global finger indicates where the end of the last claimed region 3224 is. A local finger indicates how far into the region a task has 3225 scanned. The two fingers are used to determine how to gray an 3226 object (i.e. whether simply marking it is OK, as it will be 3227 visited by a task in the future, or whether it needs to be also 3228 pushed on a stack). 3229 3230 (2) Local Queue. The local queue of the task which is accessed 3231 reasonably efficiently by the task. Other tasks can steal from 3232 it when they run out of work. Throughout the marking phase, a 3233 task attempts to keep its local queue short but not totally 3234 empty, so that entries are available for stealing by other 3235 tasks. Only when there is no more work, a task will totally 3236 drain its local queue. 3237 3238 (3) Global Mark Stack. This handles local queue overflow. During 3239 marking only sets of entries are moved between it and the local 3240 queues, as access to it requires a mutex and more fine-grain 3241 interaction with it which might cause contention. If it 3242 overflows, then the marking phase should restart and iterate 3243 over the bitmap to identify gray objects. Throughout the marking 3244 phase, tasks attempt to keep the global mark stack at a small 3245 length but not totally empty, so that entries are available for 3246 popping by other tasks. Only when there is no more work, tasks 3247 will totally drain the global mark stack. 3248 3249 (4) SATB Buffer Queue. This is where completed SATB buffers are 3250 made available. Buffers are regularly removed from this queue 3251 and scanned for roots, so that the queue doesn't get too 3252 long. During remark, all completed buffers are processed, as 3253 well as the filled in parts of any uncompleted buffers. 3254 3255 The do_marking_step() method tries to abort when the time target 3256 has been reached. There are a few other cases when the 3257 do_marking_step() method also aborts: 3258 3259 (1) When the marking phase has been aborted (after a Full GC). 3260 3261 (2) When a global overflow (on the global stack) has been 3262 triggered. Before the task aborts, it will actually sync up with 3263 the other tasks to ensure that all the marking data structures 3264 (local queues, stacks, fingers etc.) are re-initialized so that 3265 when do_marking_step() completes, the marking phase can 3266 immediately restart. 3267 3268 (3) When enough completed SATB buffers are available. The 3269 do_marking_step() method only tries to drain SATB buffers right 3270 at the beginning. So, if enough buffers are available, the 3271 marking step aborts and the SATB buffers are processed at 3272 the beginning of the next invocation. 3273 3274 (4) To yield. when we have to yield then we abort and yield 3275 right at the end of do_marking_step(). This saves us from a lot 3276 of hassle as, by yielding we might allow a Full GC. If this 3277 happens then objects will be compacted underneath our feet, the 3278 heap might shrink, etc. We save checking for this by just 3279 aborting and doing the yield right at the end. 3280 3281 From the above it follows that the do_marking_step() method should 3282 be called in a loop (or, otherwise, regularly) until it completes. 3283 3284 If a marking step completes without its has_aborted() flag being 3285 true, it means it has completed the current marking phase (and 3286 also all other marking tasks have done so and have all synced up). 3287 3288 A method called regular_clock_call() is invoked "regularly" (in 3289 sub ms intervals) throughout marking. It is this clock method that 3290 checks all the abort conditions which were mentioned above and 3291 decides when the task should abort. A work-based scheme is used to 3292 trigger this clock method: when the number of object words the 3293 marking phase has scanned or the number of references the marking 3294 phase has visited reach a given limit. Additional invocations to 3295 the method clock have been planted in a few other strategic places 3296 too. The initial reason for the clock method was to avoid calling 3297 vtime too regularly, as it is quite expensive. So, once it was in 3298 place, it was natural to piggy-back all the other conditions on it 3299 too and not constantly check them throughout the code. 3300 3301 If do_termination is true then do_marking_step will enter its 3302 termination protocol. 3303 3304 The value of is_serial must be true when do_marking_step is being 3305 called serially (i.e. by the VMThread) and do_marking_step should 3306 skip any synchronization in the termination and overflow code. 3307 Examples include the serial remark code and the serial reference 3308 processing closures. 3309 3310 The value of is_serial must be false when do_marking_step is 3311 being called by any of the worker threads in a work gang. 3312 Examples include the concurrent marking code (CMMarkingTask), 3313 the MT remark code, and the MT reference processing closures. 3314 3315 *****************************************************************************/ 3316 3317 void CMTask::do_marking_step(double time_target_ms, 3318 bool do_termination, 3319 bool is_serial) { 3320 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3321 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3322 3323 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3324 assert(_task_queues != NULL, "invariant"); 3325 assert(_task_queue != NULL, "invariant"); 3326 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3327 3328 assert(!_claimed, 3329 "only one thread should claim this task at any one time"); 3330 3331 // OK, this doesn't safeguard again all possible scenarios, as it is 3332 // possible for two threads to set the _claimed flag at the same 3333 // time. But it is only for debugging purposes anyway and it will 3334 // catch most problems. 3335 _claimed = true; 3336 3337 _start_time_ms = os::elapsedVTime() * 1000.0; 3338 3339 // If do_stealing is true then do_marking_step will attempt to 3340 // steal work from the other CMTasks. It only makes sense to 3341 // enable stealing when the termination protocol is enabled 3342 // and do_marking_step() is not being called serially. 3343 bool do_stealing = do_termination && !is_serial; 3344 3345 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3346 _time_target_ms = time_target_ms - diff_prediction_ms; 3347 3348 // set up the variables that are used in the work-based scheme to 3349 // call the regular clock method 3350 _words_scanned = 0; 3351 _refs_reached = 0; 3352 recalculate_limits(); 3353 3354 // clear all flags 3355 clear_has_aborted(); 3356 _has_timed_out = false; 3357 _draining_satb_buffers = false; 3358 3359 ++_calls; 3360 3361 // Set up the bitmap and oop closures. Anything that uses them is 3362 // eventually called from this method, so it is OK to allocate these 3363 // statically. 3364 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3365 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3366 set_cm_oop_closure(&cm_oop_closure); 3367 3368 if (_cm->has_overflown()) { 3369 // This can happen if the mark stack overflows during a GC pause 3370 // and this task, after a yield point, restarts. We have to abort 3371 // as we need to get into the overflow protocol which happens 3372 // right at the end of this task. 3373 set_has_aborted(); 3374 } 3375 3376 // First drain any available SATB buffers. After this, we will not 3377 // look at SATB buffers before the next invocation of this method. 3378 // If enough completed SATB buffers are queued up, the regular clock 3379 // will abort this task so that it restarts. 3380 drain_satb_buffers(); 3381 // ...then partially drain the local queue and the global stack 3382 drain_local_queue(true); 3383 drain_global_stack(true); 3384 3385 do { 3386 if (!has_aborted() && _curr_region != NULL) { 3387 // This means that we're already holding on to a region. 3388 assert(_finger != NULL, "if region is not NULL, then the finger " 3389 "should not be NULL either"); 3390 3391 // We might have restarted this task after an evacuation pause 3392 // which might have evacuated the region we're holding on to 3393 // underneath our feet. Let's read its limit again to make sure 3394 // that we do not iterate over a region of the heap that 3395 // contains garbage (update_region_limit() will also move 3396 // _finger to the start of the region if it is found empty). 3397 update_region_limit(); 3398 // We will start from _finger not from the start of the region, 3399 // as we might be restarting this task after aborting half-way 3400 // through scanning this region. In this case, _finger points to 3401 // the address where we last found a marked object. If this is a 3402 // fresh region, _finger points to start(). 3403 MemRegion mr = MemRegion(_finger, _region_limit); 3404 3405 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3406 "humongous regions should go around loop once only"); 3407 3408 // Some special cases: 3409 // If the memory region is empty, we can just give up the region. 3410 // If the current region is humongous then we only need to check 3411 // the bitmap for the bit associated with the start of the object, 3412 // scan the object if it's live, and give up the region. 3413 // Otherwise, let's iterate over the bitmap of the part of the region 3414 // that is left. 3415 // If the iteration is successful, give up the region. 3416 if (mr.is_empty()) { 3417 giveup_current_region(); 3418 regular_clock_call(); 3419 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3420 if (_nextMarkBitMap->isMarked(mr.start())) { 3421 // The object is marked - apply the closure 3422 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3423 bitmap_closure.do_bit(offset); 3424 } 3425 // Even if this task aborted while scanning the humongous object 3426 // we can (and should) give up the current region. 3427 giveup_current_region(); 3428 regular_clock_call(); 3429 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3430 giveup_current_region(); 3431 regular_clock_call(); 3432 } else { 3433 assert(has_aborted(), "currently the only way to do so"); 3434 // The only way to abort the bitmap iteration is to return 3435 // false from the do_bit() method. However, inside the 3436 // do_bit() method we move the _finger to point to the 3437 // object currently being looked at. So, if we bail out, we 3438 // have definitely set _finger to something non-null. 3439 assert(_finger != NULL, "invariant"); 3440 3441 // Region iteration was actually aborted. So now _finger 3442 // points to the address of the object we last scanned. If we 3443 // leave it there, when we restart this task, we will rescan 3444 // the object. It is easy to avoid this. We move the finger by 3445 // enough to point to the next possible object header (the 3446 // bitmap knows by how much we need to move it as it knows its 3447 // granularity). 3448 assert(_finger < _region_limit, "invariant"); 3449 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3450 // Check if bitmap iteration was aborted while scanning the last object 3451 if (new_finger >= _region_limit) { 3452 giveup_current_region(); 3453 } else { 3454 move_finger_to(new_finger); 3455 } 3456 } 3457 } 3458 // At this point we have either completed iterating over the 3459 // region we were holding on to, or we have aborted. 3460 3461 // We then partially drain the local queue and the global stack. 3462 // (Do we really need this?) 3463 drain_local_queue(true); 3464 drain_global_stack(true); 3465 3466 // Read the note on the claim_region() method on why it might 3467 // return NULL with potentially more regions available for 3468 // claiming and why we have to check out_of_regions() to determine 3469 // whether we're done or not. 3470 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3471 // We are going to try to claim a new region. We should have 3472 // given up on the previous one. 3473 // Separated the asserts so that we know which one fires. 3474 assert(_curr_region == NULL, "invariant"); 3475 assert(_finger == NULL, "invariant"); 3476 assert(_region_limit == NULL, "invariant"); 3477 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3478 if (claimed_region != NULL) { 3479 // Yes, we managed to claim one 3480 setup_for_region(claimed_region); 3481 assert(_curr_region == claimed_region, "invariant"); 3482 } 3483 // It is important to call the regular clock here. It might take 3484 // a while to claim a region if, for example, we hit a large 3485 // block of empty regions. So we need to call the regular clock 3486 // method once round the loop to make sure it's called 3487 // frequently enough. 3488 regular_clock_call(); 3489 } 3490 3491 if (!has_aborted() && _curr_region == NULL) { 3492 assert(_cm->out_of_regions(), 3493 "at this point we should be out of regions"); 3494 } 3495 } while ( _curr_region != NULL && !has_aborted()); 3496 3497 if (!has_aborted()) { 3498 // We cannot check whether the global stack is empty, since other 3499 // tasks might be pushing objects to it concurrently. 3500 assert(_cm->out_of_regions(), 3501 "at this point we should be out of regions"); 3502 // Try to reduce the number of available SATB buffers so that 3503 // remark has less work to do. 3504 drain_satb_buffers(); 3505 } 3506 3507 // Since we've done everything else, we can now totally drain the 3508 // local queue and global stack. 3509 drain_local_queue(false); 3510 drain_global_stack(false); 3511 3512 // Attempt at work stealing from other task's queues. 3513 if (do_stealing && !has_aborted()) { 3514 // We have not aborted. This means that we have finished all that 3515 // we could. Let's try to do some stealing... 3516 3517 // We cannot check whether the global stack is empty, since other 3518 // tasks might be pushing objects to it concurrently. 3519 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3520 "only way to reach here"); 3521 while (!has_aborted()) { 3522 oop obj; 3523 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3524 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3525 "any stolen object should be marked"); 3526 scan_object(obj); 3527 3528 // And since we're towards the end, let's totally drain the 3529 // local queue and global stack. 3530 drain_local_queue(false); 3531 drain_global_stack(false); 3532 } else { 3533 break; 3534 } 3535 } 3536 } 3537 3538 // If we are about to wrap up and go into termination, check if we 3539 // should raise the overflow flag. 3540 if (do_termination && !has_aborted()) { 3541 if (_cm->force_overflow()->should_force()) { 3542 _cm->set_has_overflown(); 3543 regular_clock_call(); 3544 } 3545 } 3546 3547 // We still haven't aborted. Now, let's try to get into the 3548 // termination protocol. 3549 if (do_termination && !has_aborted()) { 3550 // We cannot check whether the global stack is empty, since other 3551 // tasks might be concurrently pushing objects on it. 3552 // Separated the asserts so that we know which one fires. 3553 assert(_cm->out_of_regions(), "only way to reach here"); 3554 assert(_task_queue->size() == 0, "only way to reach here"); 3555 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3556 3557 // The CMTask class also extends the TerminatorTerminator class, 3558 // hence its should_exit_termination() method will also decide 3559 // whether to exit the termination protocol or not. 3560 bool finished = (is_serial || 3561 _cm->terminator()->offer_termination(this)); 3562 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3563 _termination_time_ms += 3564 termination_end_time_ms - _termination_start_time_ms; 3565 3566 if (finished) { 3567 // We're all done. 3568 3569 if (_worker_id == 0) { 3570 // let's allow task 0 to do this 3571 if (concurrent()) { 3572 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3573 // we need to set this to false before the next 3574 // safepoint. This way we ensure that the marking phase 3575 // doesn't observe any more heap expansions. 3576 _cm->clear_concurrent_marking_in_progress(); 3577 } 3578 } 3579 3580 // We can now guarantee that the global stack is empty, since 3581 // all other tasks have finished. We separated the guarantees so 3582 // that, if a condition is false, we can immediately find out 3583 // which one. 3584 guarantee(_cm->out_of_regions(), "only way to reach here"); 3585 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3586 guarantee(_task_queue->size() == 0, "only way to reach here"); 3587 guarantee(!_cm->has_overflown(), "only way to reach here"); 3588 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3589 } else { 3590 // Apparently there's more work to do. Let's abort this task. It 3591 // will restart it and we can hopefully find more things to do. 3592 set_has_aborted(); 3593 } 3594 } 3595 3596 // Mainly for debugging purposes to make sure that a pointer to the 3597 // closure which was statically allocated in this frame doesn't 3598 // escape it by accident. 3599 set_cm_oop_closure(NULL); 3600 double end_time_ms = os::elapsedVTime() * 1000.0; 3601 double elapsed_time_ms = end_time_ms - _start_time_ms; 3602 // Update the step history. 3603 _step_times_ms.add(elapsed_time_ms); 3604 3605 if (has_aborted()) { 3606 // The task was aborted for some reason. 3607 if (_has_timed_out) { 3608 double diff_ms = elapsed_time_ms - _time_target_ms; 3609 // Keep statistics of how well we did with respect to hitting 3610 // our target only if we actually timed out (if we aborted for 3611 // other reasons, then the results might get skewed). 3612 _marking_step_diffs_ms.add(diff_ms); 3613 } 3614 3615 if (_cm->has_overflown()) { 3616 // This is the interesting one. We aborted because a global 3617 // overflow was raised. This means we have to restart the 3618 // marking phase and start iterating over regions. However, in 3619 // order to do this we have to make sure that all tasks stop 3620 // what they are doing and re-initialize in a safe manner. We 3621 // will achieve this with the use of two barrier sync points. 3622 3623 if (!is_serial) { 3624 // We only need to enter the sync barrier if being called 3625 // from a parallel context 3626 _cm->enter_first_sync_barrier(_worker_id); 3627 3628 // When we exit this sync barrier we know that all tasks have 3629 // stopped doing marking work. So, it's now safe to 3630 // re-initialize our data structures. At the end of this method, 3631 // task 0 will clear the global data structures. 3632 } 3633 3634 // We clear the local state of this task... 3635 clear_region_fields(); 3636 3637 if (!is_serial) { 3638 // ...and enter the second barrier. 3639 _cm->enter_second_sync_barrier(_worker_id); 3640 } 3641 // At this point, if we're during the concurrent phase of 3642 // marking, everything has been re-initialized and we're 3643 // ready to restart. 3644 } 3645 } 3646 3647 _claimed = false; 3648 } 3649 3650 CMTask::CMTask(uint worker_id, 3651 ConcurrentMark* cm, 3652 size_t* marked_bytes, 3653 BitMap* card_bm, 3654 CMTaskQueue* task_queue, 3655 CMTaskQueueSet* task_queues) 3656 : _g1h(G1CollectedHeap::heap()), 3657 _worker_id(worker_id), _cm(cm), 3658 _claimed(false), 3659 _nextMarkBitMap(NULL), _hash_seed(17), 3660 _task_queue(task_queue), 3661 _task_queues(task_queues), 3662 _cm_oop_closure(NULL), 3663 _marked_bytes_array(marked_bytes), 3664 _card_bm(card_bm) { 3665 guarantee(task_queue != NULL, "invariant"); 3666 guarantee(task_queues != NULL, "invariant"); 3667 3668 _marking_step_diffs_ms.add(0.5); 3669 } 3670 3671 // These are formatting macros that are used below to ensure 3672 // consistent formatting. The *_H_* versions are used to format the 3673 // header for a particular value and they should be kept consistent 3674 // with the corresponding macro. Also note that most of the macros add 3675 // the necessary white space (as a prefix) which makes them a bit 3676 // easier to compose. 3677 3678 // All the output lines are prefixed with this string to be able to 3679 // identify them easily in a large log file. 3680 #define G1PPRL_LINE_PREFIX "###" 3681 3682 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3683 #ifdef _LP64 3684 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3685 #else // _LP64 3686 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3687 #endif // _LP64 3688 3689 // For per-region info 3690 #define G1PPRL_TYPE_FORMAT " %-4s" 3691 #define G1PPRL_TYPE_H_FORMAT " %4s" 3692 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3693 #define G1PPRL_BYTE_H_FORMAT " %9s" 3694 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3695 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3696 3697 // For summary info 3698 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3699 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3700 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3701 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3702 3703 G1PrintRegionLivenessInfoClosure:: 3704 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 3705 : _out(out), 3706 _total_used_bytes(0), _total_capacity_bytes(0), 3707 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3708 _hum_used_bytes(0), _hum_capacity_bytes(0), 3709 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3710 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3711 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3712 MemRegion g1_reserved = g1h->g1_reserved(); 3713 double now = os::elapsedTime(); 3714 3715 // Print the header of the output. 3716 _out->cr(); 3717 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3718 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 3719 G1PPRL_SUM_ADDR_FORMAT("reserved") 3720 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3721 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3722 HeapRegion::GrainBytes); 3723 _out->print_cr(G1PPRL_LINE_PREFIX); 3724 _out->print_cr(G1PPRL_LINE_PREFIX 3725 G1PPRL_TYPE_H_FORMAT 3726 G1PPRL_ADDR_BASE_H_FORMAT 3727 G1PPRL_BYTE_H_FORMAT 3728 G1PPRL_BYTE_H_FORMAT 3729 G1PPRL_BYTE_H_FORMAT 3730 G1PPRL_DOUBLE_H_FORMAT 3731 G1PPRL_BYTE_H_FORMAT 3732 G1PPRL_BYTE_H_FORMAT, 3733 "type", "address-range", 3734 "used", "prev-live", "next-live", "gc-eff", 3735 "remset", "code-roots"); 3736 _out->print_cr(G1PPRL_LINE_PREFIX 3737 G1PPRL_TYPE_H_FORMAT 3738 G1PPRL_ADDR_BASE_H_FORMAT 3739 G1PPRL_BYTE_H_FORMAT 3740 G1PPRL_BYTE_H_FORMAT 3741 G1PPRL_BYTE_H_FORMAT 3742 G1PPRL_DOUBLE_H_FORMAT 3743 G1PPRL_BYTE_H_FORMAT 3744 G1PPRL_BYTE_H_FORMAT, 3745 "", "", 3746 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3747 "(bytes)", "(bytes)"); 3748 } 3749 3750 // It takes as a parameter a reference to one of the _hum_* fields, it 3751 // deduces the corresponding value for a region in a humongous region 3752 // series (either the region size, or what's left if the _hum_* field 3753 // is < the region size), and updates the _hum_* field accordingly. 3754 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3755 size_t bytes = 0; 3756 // The > 0 check is to deal with the prev and next live bytes which 3757 // could be 0. 3758 if (*hum_bytes > 0) { 3759 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3760 *hum_bytes -= bytes; 3761 } 3762 return bytes; 3763 } 3764 3765 // It deduces the values for a region in a humongous region series 3766 // from the _hum_* fields and updates those accordingly. It assumes 3767 // that that _hum_* fields have already been set up from the "starts 3768 // humongous" region and we visit the regions in address order. 3769 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3770 size_t* capacity_bytes, 3771 size_t* prev_live_bytes, 3772 size_t* next_live_bytes) { 3773 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3774 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3775 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3776 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3777 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3778 } 3779 3780 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3781 const char* type = r->get_type_str(); 3782 HeapWord* bottom = r->bottom(); 3783 HeapWord* end = r->end(); 3784 size_t capacity_bytes = r->capacity(); 3785 size_t used_bytes = r->used(); 3786 size_t prev_live_bytes = r->live_bytes(); 3787 size_t next_live_bytes = r->next_live_bytes(); 3788 double gc_eff = r->gc_efficiency(); 3789 size_t remset_bytes = r->rem_set()->mem_size(); 3790 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3791 3792 if (r->is_starts_humongous()) { 3793 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3794 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3795 "they should have been zeroed after the last time we used them"); 3796 // Set up the _hum_* fields. 3797 _hum_capacity_bytes = capacity_bytes; 3798 _hum_used_bytes = used_bytes; 3799 _hum_prev_live_bytes = prev_live_bytes; 3800 _hum_next_live_bytes = next_live_bytes; 3801 get_hum_bytes(&used_bytes, &capacity_bytes, 3802 &prev_live_bytes, &next_live_bytes); 3803 end = bottom + HeapRegion::GrainWords; 3804 } else if (r->is_continues_humongous()) { 3805 get_hum_bytes(&used_bytes, &capacity_bytes, 3806 &prev_live_bytes, &next_live_bytes); 3807 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3808 } 3809 3810 _total_used_bytes += used_bytes; 3811 _total_capacity_bytes += capacity_bytes; 3812 _total_prev_live_bytes += prev_live_bytes; 3813 _total_next_live_bytes += next_live_bytes; 3814 _total_remset_bytes += remset_bytes; 3815 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3816 3817 // Print a line for this particular region. 3818 _out->print_cr(G1PPRL_LINE_PREFIX 3819 G1PPRL_TYPE_FORMAT 3820 G1PPRL_ADDR_BASE_FORMAT 3821 G1PPRL_BYTE_FORMAT 3822 G1PPRL_BYTE_FORMAT 3823 G1PPRL_BYTE_FORMAT 3824 G1PPRL_DOUBLE_FORMAT 3825 G1PPRL_BYTE_FORMAT 3826 G1PPRL_BYTE_FORMAT, 3827 type, p2i(bottom), p2i(end), 3828 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3829 remset_bytes, strong_code_roots_bytes); 3830 3831 return false; 3832 } 3833 3834 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3835 // add static memory usages to remembered set sizes 3836 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3837 // Print the footer of the output. 3838 _out->print_cr(G1PPRL_LINE_PREFIX); 3839 _out->print_cr(G1PPRL_LINE_PREFIX 3840 " SUMMARY" 3841 G1PPRL_SUM_MB_FORMAT("capacity") 3842 G1PPRL_SUM_MB_PERC_FORMAT("used") 3843 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3844 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3845 G1PPRL_SUM_MB_FORMAT("remset") 3846 G1PPRL_SUM_MB_FORMAT("code-roots"), 3847 bytes_to_mb(_total_capacity_bytes), 3848 bytes_to_mb(_total_used_bytes), 3849 perc(_total_used_bytes, _total_capacity_bytes), 3850 bytes_to_mb(_total_prev_live_bytes), 3851 perc(_total_prev_live_bytes, _total_capacity_bytes), 3852 bytes_to_mb(_total_next_live_bytes), 3853 perc(_total_next_live_bytes, _total_capacity_bytes), 3854 bytes_to_mb(_total_remset_bytes), 3855 bytes_to_mb(_total_strong_code_roots_bytes)); 3856 _out->cr(); 3857 }