1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 462 return MAX2((n_par_threads + 2) / 4, 1U); 463 } 464 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 466 _g1h(g1h), 467 _markBitMap1(), 468 _markBitMap2(), 469 _parallel_marking_threads(0), 470 _max_parallel_marking_threads(0), 471 _sleep_factor(0.0), 472 _marking_task_overhead(1.0), 473 _cleanup_sleep_factor(0.0), 474 _cleanup_task_overhead(1.0), 475 _cleanup_list("Cleanup List"), 476 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 477 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 478 CardTableModRefBS::card_shift, 479 false /* in_resource_area*/), 480 481 _prevMarkBitMap(&_markBitMap1), 482 _nextMarkBitMap(&_markBitMap2), 483 484 _markStack(this), 485 // _finger set in set_non_marking_state 486 487 _max_worker_id(ParallelGCThreads), 488 // _active_tasks set in set_non_marking_state 489 // _tasks set inside the constructor 490 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 491 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 492 493 _has_overflown(false), 494 _concurrent(false), 495 _has_aborted(false), 496 _restart_for_overflow(false), 497 _concurrent_marking_in_progress(false), 498 499 // _verbose_level set below 500 501 _init_times(), 502 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 503 _cleanup_times(), 504 _total_counting_time(0.0), 505 _total_rs_scrub_time(0.0), 506 507 _parallel_workers(NULL), 508 509 _count_card_bitmaps(NULL), 510 _count_marked_bytes(NULL), 511 _completed_initialization(false) { 512 513 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 514 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 515 516 // Create & start a ConcurrentMark thread. 517 _cmThread = new ConcurrentMarkThread(this); 518 assert(cmThread() != NULL, "CM Thread should have been created"); 519 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 520 if (_cmThread->osthread() == NULL) { 521 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 522 } 523 524 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 525 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 526 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 527 528 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 529 satb_qs.set_buffer_size(G1SATBBufferSize); 530 531 _root_regions.init(_g1h, this); 532 533 if (ConcGCThreads > ParallelGCThreads) { 534 warning("Can't have more ConcGCThreads (%u) " 535 "than ParallelGCThreads (%u).", 536 ConcGCThreads, ParallelGCThreads); 537 return; 538 } 539 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 540 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 541 // if both are set 542 _sleep_factor = 0.0; 543 _marking_task_overhead = 1.0; 544 } else if (G1MarkingOverheadPercent > 0) { 545 // We will calculate the number of parallel marking threads based 546 // on a target overhead with respect to the soft real-time goal 547 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 548 double overall_cm_overhead = 549 (double) MaxGCPauseMillis * marking_overhead / 550 (double) GCPauseIntervalMillis; 551 double cpu_ratio = 1.0 / (double) os::processor_count(); 552 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 553 double marking_task_overhead = 554 overall_cm_overhead / marking_thread_num * 555 (double) os::processor_count(); 556 double sleep_factor = 557 (1.0 - marking_task_overhead) / marking_task_overhead; 558 559 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 560 _sleep_factor = sleep_factor; 561 _marking_task_overhead = marking_task_overhead; 562 } else { 563 // Calculate the number of parallel marking threads by scaling 564 // the number of parallel GC threads. 565 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 566 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 567 _sleep_factor = 0.0; 568 _marking_task_overhead = 1.0; 569 } 570 571 assert(ConcGCThreads > 0, "Should have been set"); 572 _parallel_marking_threads = ConcGCThreads; 573 _max_parallel_marking_threads = _parallel_marking_threads; 574 575 if (parallel_marking_threads() > 1) { 576 _cleanup_task_overhead = 1.0; 577 } else { 578 _cleanup_task_overhead = marking_task_overhead(); 579 } 580 _cleanup_sleep_factor = 581 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 582 583 #if 0 584 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 585 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 586 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 587 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 588 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 589 #endif 590 591 _parallel_workers = new WorkGang("G1 Marker", 592 _max_parallel_marking_threads, false, true); 593 if (_parallel_workers == NULL) { 594 vm_exit_during_initialization("Failed necessary allocation."); 595 } else { 596 _parallel_workers->initialize_workers(); 597 } 598 599 if (FLAG_IS_DEFAULT(MarkStackSize)) { 600 size_t mark_stack_size = 601 MIN2(MarkStackSizeMax, 602 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 603 // Verify that the calculated value for MarkStackSize is in range. 604 // It would be nice to use the private utility routine from Arguments. 605 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 606 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 607 "must be between 1 and " SIZE_FORMAT, 608 mark_stack_size, MarkStackSizeMax); 609 return; 610 } 611 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 612 } else { 613 // Verify MarkStackSize is in range. 614 if (FLAG_IS_CMDLINE(MarkStackSize)) { 615 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 616 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 617 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 618 "must be between 1 and " SIZE_FORMAT, 619 MarkStackSize, MarkStackSizeMax); 620 return; 621 } 622 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 623 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 624 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 625 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 626 MarkStackSize, MarkStackSizeMax); 627 return; 628 } 629 } 630 } 631 } 632 633 if (!_markStack.allocate(MarkStackSize)) { 634 warning("Failed to allocate CM marking stack"); 635 return; 636 } 637 638 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 639 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 640 641 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 642 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 643 644 BitMap::idx_t card_bm_size = _card_bm.size(); 645 646 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 647 _active_tasks = _max_worker_id; 648 649 uint max_regions = _g1h->max_regions(); 650 for (uint i = 0; i < _max_worker_id; ++i) { 651 CMTaskQueue* task_queue = new CMTaskQueue(); 652 task_queue->initialize(); 653 _task_queues->register_queue(i, task_queue); 654 655 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 656 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 657 658 _tasks[i] = new CMTask(i, this, 659 _count_marked_bytes[i], 660 &_count_card_bitmaps[i], 661 task_queue, _task_queues); 662 663 _accum_task_vtime[i] = 0.0; 664 } 665 666 // Calculate the card number for the bottom of the heap. Used 667 // in biasing indexes into the accounting card bitmaps. 668 _heap_bottom_card_num = 669 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 670 CardTableModRefBS::card_shift); 671 672 // Clear all the liveness counting data 673 clear_all_count_data(); 674 675 // so that the call below can read a sensible value 676 _heap_start = g1h->reserved_region().start(); 677 set_non_marking_state(); 678 _completed_initialization = true; 679 } 680 681 void ConcurrentMark::reset() { 682 // Starting values for these two. This should be called in a STW 683 // phase. 684 MemRegion reserved = _g1h->g1_reserved(); 685 _heap_start = reserved.start(); 686 _heap_end = reserved.end(); 687 688 // Separated the asserts so that we know which one fires. 689 assert(_heap_start != NULL, "heap bounds should look ok"); 690 assert(_heap_end != NULL, "heap bounds should look ok"); 691 assert(_heap_start < _heap_end, "heap bounds should look ok"); 692 693 // Reset all the marking data structures and any necessary flags 694 reset_marking_state(); 695 696 // We do reset all of them, since different phases will use 697 // different number of active threads. So, it's easiest to have all 698 // of them ready. 699 for (uint i = 0; i < _max_worker_id; ++i) { 700 _tasks[i]->reset(_nextMarkBitMap); 701 } 702 703 // we need this to make sure that the flag is on during the evac 704 // pause with initial mark piggy-backed 705 set_concurrent_marking_in_progress(); 706 } 707 708 709 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 710 _markStack.set_should_expand(); 711 _markStack.setEmpty(); // Also clears the _markStack overflow flag 712 if (clear_overflow) { 713 clear_has_overflown(); 714 } else { 715 assert(has_overflown(), "pre-condition"); 716 } 717 _finger = _heap_start; 718 719 for (uint i = 0; i < _max_worker_id; ++i) { 720 CMTaskQueue* queue = _task_queues->queue(i); 721 queue->set_empty(); 722 } 723 } 724 725 void ConcurrentMark::set_concurrency(uint active_tasks) { 726 assert(active_tasks <= _max_worker_id, "we should not have more"); 727 728 _active_tasks = active_tasks; 729 // Need to update the three data structures below according to the 730 // number of active threads for this phase. 731 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 732 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 733 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 734 } 735 736 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 737 set_concurrency(active_tasks); 738 739 _concurrent = concurrent; 740 // We propagate this to all tasks, not just the active ones. 741 for (uint i = 0; i < _max_worker_id; ++i) 742 _tasks[i]->set_concurrent(concurrent); 743 744 if (concurrent) { 745 set_concurrent_marking_in_progress(); 746 } else { 747 // We currently assume that the concurrent flag has been set to 748 // false before we start remark. At this point we should also be 749 // in a STW phase. 750 assert(!concurrent_marking_in_progress(), "invariant"); 751 assert(out_of_regions(), 752 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 753 p2i(_finger), p2i(_heap_end)); 754 } 755 } 756 757 void ConcurrentMark::set_non_marking_state() { 758 // We set the global marking state to some default values when we're 759 // not doing marking. 760 reset_marking_state(); 761 _active_tasks = 0; 762 clear_concurrent_marking_in_progress(); 763 } 764 765 ConcurrentMark::~ConcurrentMark() { 766 // The ConcurrentMark instance is never freed. 767 ShouldNotReachHere(); 768 } 769 770 void ConcurrentMark::clearNextBitmap() { 771 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 772 773 // Make sure that the concurrent mark thread looks to still be in 774 // the current cycle. 775 guarantee(cmThread()->during_cycle(), "invariant"); 776 777 // We are finishing up the current cycle by clearing the next 778 // marking bitmap and getting it ready for the next cycle. During 779 // this time no other cycle can start. So, let's make sure that this 780 // is the case. 781 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 782 783 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 784 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 785 _parallel_workers->run_task(&task); 786 787 // Clear the liveness counting data. If the marking has been aborted, the abort() 788 // call already did that. 789 if (cl.complete()) { 790 clear_all_count_data(); 791 } 792 793 // Repeat the asserts from above. 794 guarantee(cmThread()->during_cycle(), "invariant"); 795 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 796 } 797 798 class CheckBitmapClearHRClosure : public HeapRegionClosure { 799 CMBitMap* _bitmap; 800 bool _error; 801 public: 802 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 803 } 804 805 virtual bool doHeapRegion(HeapRegion* r) { 806 // This closure can be called concurrently to the mutator, so we must make sure 807 // that the result of the getNextMarkedWordAddress() call is compared to the 808 // value passed to it as limit to detect any found bits. 809 // end never changes in G1. 810 HeapWord* end = r->end(); 811 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 812 } 813 }; 814 815 bool ConcurrentMark::nextMarkBitmapIsClear() { 816 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 817 _g1h->heap_region_iterate(&cl); 818 return cl.complete(); 819 } 820 821 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 822 public: 823 bool doHeapRegion(HeapRegion* r) { 824 r->note_start_of_marking(); 825 return false; 826 } 827 }; 828 829 void ConcurrentMark::checkpointRootsInitialPre() { 830 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 831 G1CollectorPolicy* g1p = g1h->g1_policy(); 832 833 _has_aborted = false; 834 835 // Initialize marking structures. This has to be done in a STW phase. 836 reset(); 837 838 // For each region note start of marking. 839 NoteStartOfMarkHRClosure startcl; 840 g1h->heap_region_iterate(&startcl); 841 } 842 843 844 void ConcurrentMark::checkpointRootsInitialPost() { 845 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 846 847 // If we force an overflow during remark, the remark operation will 848 // actually abort and we'll restart concurrent marking. If we always 849 // force an overflow during remark we'll never actually complete the 850 // marking phase. So, we initialize this here, at the start of the 851 // cycle, so that at the remaining overflow number will decrease at 852 // every remark and we'll eventually not need to cause one. 853 force_overflow_stw()->init(); 854 855 // Start Concurrent Marking weak-reference discovery. 856 ReferenceProcessor* rp = g1h->ref_processor_cm(); 857 // enable ("weak") refs discovery 858 rp->enable_discovery(); 859 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 860 861 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 862 // This is the start of the marking cycle, we're expected all 863 // threads to have SATB queues with active set to false. 864 satb_mq_set.set_active_all_threads(true, /* new active value */ 865 false /* expected_active */); 866 867 _root_regions.prepare_for_scan(); 868 869 // update_g1_committed() will be called at the end of an evac pause 870 // when marking is on. So, it's also called at the end of the 871 // initial-mark pause to update the heap end, if the heap expands 872 // during it. No need to call it here. 873 } 874 875 /* 876 * Notice that in the next two methods, we actually leave the STS 877 * during the barrier sync and join it immediately afterwards. If we 878 * do not do this, the following deadlock can occur: one thread could 879 * be in the barrier sync code, waiting for the other thread to also 880 * sync up, whereas another one could be trying to yield, while also 881 * waiting for the other threads to sync up too. 882 * 883 * Note, however, that this code is also used during remark and in 884 * this case we should not attempt to leave / enter the STS, otherwise 885 * we'll either hit an assert (debug / fastdebug) or deadlock 886 * (product). So we should only leave / enter the STS if we are 887 * operating concurrently. 888 * 889 * Because the thread that does the sync barrier has left the STS, it 890 * is possible to be suspended for a Full GC or an evacuation pause 891 * could occur. This is actually safe, since the entering the sync 892 * barrier is one of the last things do_marking_step() does, and it 893 * doesn't manipulate any data structures afterwards. 894 */ 895 896 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 897 bool barrier_aborted; 898 { 899 SuspendibleThreadSetLeaver sts_leave(concurrent()); 900 barrier_aborted = !_first_overflow_barrier_sync.enter(); 901 } 902 903 // at this point everyone should have synced up and not be doing any 904 // more work 905 906 if (barrier_aborted) { 907 // If the barrier aborted we ignore the overflow condition and 908 // just abort the whole marking phase as quickly as possible. 909 return; 910 } 911 912 // If we're executing the concurrent phase of marking, reset the marking 913 // state; otherwise the marking state is reset after reference processing, 914 // during the remark pause. 915 // If we reset here as a result of an overflow during the remark we will 916 // see assertion failures from any subsequent set_concurrency_and_phase() 917 // calls. 918 if (concurrent()) { 919 // let the task associated with with worker 0 do this 920 if (worker_id == 0) { 921 // task 0 is responsible for clearing the global data structures 922 // We should be here because of an overflow. During STW we should 923 // not clear the overflow flag since we rely on it being true when 924 // we exit this method to abort the pause and restart concurrent 925 // marking. 926 reset_marking_state(true /* clear_overflow */); 927 force_overflow()->update(); 928 929 if (G1Log::fine()) { 930 gclog_or_tty->gclog_stamp(); 931 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 932 } 933 } 934 } 935 936 // after this, each task should reset its own data structures then 937 // then go into the second barrier 938 } 939 940 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 941 SuspendibleThreadSetLeaver sts_leave(concurrent()); 942 _second_overflow_barrier_sync.enter(); 943 944 // at this point everything should be re-initialized and ready to go 945 } 946 947 #ifndef PRODUCT 948 void ForceOverflowSettings::init() { 949 _num_remaining = G1ConcMarkForceOverflow; 950 _force = false; 951 update(); 952 } 953 954 void ForceOverflowSettings::update() { 955 if (_num_remaining > 0) { 956 _num_remaining -= 1; 957 _force = true; 958 } else { 959 _force = false; 960 } 961 } 962 963 bool ForceOverflowSettings::should_force() { 964 if (_force) { 965 _force = false; 966 return true; 967 } else { 968 return false; 969 } 970 } 971 #endif // !PRODUCT 972 973 class CMConcurrentMarkingTask: public AbstractGangTask { 974 private: 975 ConcurrentMark* _cm; 976 ConcurrentMarkThread* _cmt; 977 978 public: 979 void work(uint worker_id) { 980 assert(Thread::current()->is_ConcurrentGC_thread(), 981 "this should only be done by a conc GC thread"); 982 ResourceMark rm; 983 984 double start_vtime = os::elapsedVTime(); 985 986 { 987 SuspendibleThreadSetJoiner sts_join; 988 989 assert(worker_id < _cm->active_tasks(), "invariant"); 990 CMTask* the_task = _cm->task(worker_id); 991 the_task->record_start_time(); 992 if (!_cm->has_aborted()) { 993 do { 994 double start_vtime_sec = os::elapsedVTime(); 995 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 996 997 the_task->do_marking_step(mark_step_duration_ms, 998 true /* do_termination */, 999 false /* is_serial*/); 1000 1001 double end_vtime_sec = os::elapsedVTime(); 1002 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1003 _cm->clear_has_overflown(); 1004 1005 _cm->do_yield_check(worker_id); 1006 1007 jlong sleep_time_ms; 1008 if (!_cm->has_aborted() && the_task->has_aborted()) { 1009 sleep_time_ms = 1010 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1011 { 1012 SuspendibleThreadSetLeaver sts_leave; 1013 os::sleep(Thread::current(), sleep_time_ms, false); 1014 } 1015 } 1016 } while (!_cm->has_aborted() && the_task->has_aborted()); 1017 } 1018 the_task->record_end_time(); 1019 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1020 } 1021 1022 double end_vtime = os::elapsedVTime(); 1023 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1024 } 1025 1026 CMConcurrentMarkingTask(ConcurrentMark* cm, 1027 ConcurrentMarkThread* cmt) : 1028 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1029 1030 ~CMConcurrentMarkingTask() { } 1031 }; 1032 1033 // Calculates the number of active workers for a concurrent 1034 // phase. 1035 uint ConcurrentMark::calc_parallel_marking_threads() { 1036 uint n_conc_workers = 0; 1037 if (!UseDynamicNumberOfGCThreads || 1038 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1039 !ForceDynamicNumberOfGCThreads)) { 1040 n_conc_workers = max_parallel_marking_threads(); 1041 } else { 1042 n_conc_workers = 1043 AdaptiveSizePolicy::calc_default_active_workers( 1044 max_parallel_marking_threads(), 1045 1, /* Minimum workers */ 1046 parallel_marking_threads(), 1047 Threads::number_of_non_daemon_threads()); 1048 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1049 // that scaling has already gone into "_max_parallel_marking_threads". 1050 } 1051 assert(n_conc_workers > 0, "Always need at least 1"); 1052 return n_conc_workers; 1053 } 1054 1055 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1056 // Currently, only survivors can be root regions. 1057 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1058 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1059 1060 const uintx interval = PrefetchScanIntervalInBytes; 1061 HeapWord* curr = hr->bottom(); 1062 const HeapWord* end = hr->top(); 1063 while (curr < end) { 1064 Prefetch::read(curr, interval); 1065 oop obj = oop(curr); 1066 int size = obj->oop_iterate_size(&cl); 1067 assert(size == obj->size(), "sanity"); 1068 curr += size; 1069 } 1070 } 1071 1072 class CMRootRegionScanTask : public AbstractGangTask { 1073 private: 1074 ConcurrentMark* _cm; 1075 1076 public: 1077 CMRootRegionScanTask(ConcurrentMark* cm) : 1078 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1079 1080 void work(uint worker_id) { 1081 assert(Thread::current()->is_ConcurrentGC_thread(), 1082 "this should only be done by a conc GC thread"); 1083 1084 CMRootRegions* root_regions = _cm->root_regions(); 1085 HeapRegion* hr = root_regions->claim_next(); 1086 while (hr != NULL) { 1087 _cm->scanRootRegion(hr, worker_id); 1088 hr = root_regions->claim_next(); 1089 } 1090 } 1091 }; 1092 1093 void ConcurrentMark::scanRootRegions() { 1094 double scan_start = os::elapsedTime(); 1095 1096 // Start of concurrent marking. 1097 ClassLoaderDataGraph::clear_claimed_marks(); 1098 1099 // scan_in_progress() will have been set to true only if there was 1100 // at least one root region to scan. So, if it's false, we 1101 // should not attempt to do any further work. 1102 if (root_regions()->scan_in_progress()) { 1103 if (G1Log::fine()) { 1104 gclog_or_tty->gclog_stamp(); 1105 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1106 } 1107 1108 _parallel_marking_threads = calc_parallel_marking_threads(); 1109 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1110 "Maximum number of marking threads exceeded"); 1111 uint active_workers = MAX2(1U, parallel_marking_threads()); 1112 1113 CMRootRegionScanTask task(this); 1114 _parallel_workers->set_active_workers(active_workers); 1115 _parallel_workers->run_task(&task); 1116 1117 if (G1Log::fine()) { 1118 gclog_or_tty->gclog_stamp(); 1119 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1120 } 1121 1122 // It's possible that has_aborted() is true here without actually 1123 // aborting the survivor scan earlier. This is OK as it's 1124 // mainly used for sanity checking. 1125 root_regions()->scan_finished(); 1126 } 1127 } 1128 1129 void ConcurrentMark::markFromRoots() { 1130 // we might be tempted to assert that: 1131 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1132 // "inconsistent argument?"); 1133 // However that wouldn't be right, because it's possible that 1134 // a safepoint is indeed in progress as a younger generation 1135 // stop-the-world GC happens even as we mark in this generation. 1136 1137 _restart_for_overflow = false; 1138 force_overflow_conc()->init(); 1139 1140 // _g1h has _n_par_threads 1141 _parallel_marking_threads = calc_parallel_marking_threads(); 1142 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1143 "Maximum number of marking threads exceeded"); 1144 1145 uint active_workers = MAX2(1U, parallel_marking_threads()); 1146 assert(active_workers > 0, "Should have been set"); 1147 1148 // Parallel task terminator is set in "set_concurrency_and_phase()" 1149 set_concurrency_and_phase(active_workers, true /* concurrent */); 1150 1151 CMConcurrentMarkingTask markingTask(this, cmThread()); 1152 _parallel_workers->set_active_workers(active_workers); 1153 _parallel_workers->run_task(&markingTask); 1154 print_stats(); 1155 } 1156 1157 // Helper class to get rid of some boilerplate code. 1158 class G1CMTraceTime : public StackObj { 1159 GCTraceTimeImpl _gc_trace_time; 1160 static bool doit_and_prepend(bool doit) { 1161 if (doit) { 1162 gclog_or_tty->put(' '); 1163 } 1164 return doit; 1165 } 1166 1167 public: 1168 G1CMTraceTime(const char* title, bool doit) 1169 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1170 } 1171 }; 1172 1173 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1174 // world is stopped at this checkpoint 1175 assert(SafepointSynchronize::is_at_safepoint(), 1176 "world should be stopped"); 1177 1178 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1179 1180 // If a full collection has happened, we shouldn't do this. 1181 if (has_aborted()) { 1182 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1183 return; 1184 } 1185 1186 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1187 1188 if (VerifyDuringGC) { 1189 HandleMark hm; // handle scope 1190 g1h->prepare_for_verify(); 1191 Universe::verify(VerifyOption_G1UsePrevMarking, 1192 " VerifyDuringGC:(before)"); 1193 } 1194 g1h->check_bitmaps("Remark Start"); 1195 1196 G1CollectorPolicy* g1p = g1h->g1_policy(); 1197 g1p->record_concurrent_mark_remark_start(); 1198 1199 double start = os::elapsedTime(); 1200 1201 checkpointRootsFinalWork(); 1202 1203 double mark_work_end = os::elapsedTime(); 1204 1205 weakRefsWork(clear_all_soft_refs); 1206 1207 if (has_overflown()) { 1208 // Oops. We overflowed. Restart concurrent marking. 1209 _restart_for_overflow = true; 1210 if (G1TraceMarkStackOverflow) { 1211 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1212 } 1213 1214 // Verify the heap w.r.t. the previous marking bitmap. 1215 if (VerifyDuringGC) { 1216 HandleMark hm; // handle scope 1217 g1h->prepare_for_verify(); 1218 Universe::verify(VerifyOption_G1UsePrevMarking, 1219 " VerifyDuringGC:(overflow)"); 1220 } 1221 1222 // Clear the marking state because we will be restarting 1223 // marking due to overflowing the global mark stack. 1224 reset_marking_state(); 1225 } else { 1226 { 1227 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1228 1229 // Aggregate the per-task counting data that we have accumulated 1230 // while marking. 1231 aggregate_count_data(); 1232 } 1233 1234 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1235 // We're done with marking. 1236 // This is the end of the marking cycle, we're expected all 1237 // threads to have SATB queues with active set to true. 1238 satb_mq_set.set_active_all_threads(false, /* new active value */ 1239 true /* expected_active */); 1240 1241 if (VerifyDuringGC) { 1242 HandleMark hm; // handle scope 1243 g1h->prepare_for_verify(); 1244 Universe::verify(VerifyOption_G1UseNextMarking, 1245 " VerifyDuringGC:(after)"); 1246 } 1247 g1h->check_bitmaps("Remark End"); 1248 assert(!restart_for_overflow(), "sanity"); 1249 // Completely reset the marking state since marking completed 1250 set_non_marking_state(); 1251 } 1252 1253 // Expand the marking stack, if we have to and if we can. 1254 if (_markStack.should_expand()) { 1255 _markStack.expand(); 1256 } 1257 1258 // Statistics 1259 double now = os::elapsedTime(); 1260 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1261 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1262 _remark_times.add((now - start) * 1000.0); 1263 1264 g1p->record_concurrent_mark_remark_end(); 1265 1266 G1CMIsAliveClosure is_alive(g1h); 1267 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1268 } 1269 1270 // Base class of the closures that finalize and verify the 1271 // liveness counting data. 1272 class CMCountDataClosureBase: public HeapRegionClosure { 1273 protected: 1274 G1CollectedHeap* _g1h; 1275 ConcurrentMark* _cm; 1276 CardTableModRefBS* _ct_bs; 1277 1278 BitMap* _region_bm; 1279 BitMap* _card_bm; 1280 1281 // Takes a region that's not empty (i.e., it has at least one 1282 // live object in it and sets its corresponding bit on the region 1283 // bitmap to 1. 1284 void set_bit_for_region(HeapRegion* hr) { 1285 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1286 _region_bm->par_at_put(index, true); 1287 } 1288 1289 public: 1290 CMCountDataClosureBase(G1CollectedHeap* g1h, 1291 BitMap* region_bm, BitMap* card_bm): 1292 _g1h(g1h), _cm(g1h->concurrent_mark()), 1293 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1294 _region_bm(region_bm), _card_bm(card_bm) { } 1295 }; 1296 1297 // Closure that calculates the # live objects per region. Used 1298 // for verification purposes during the cleanup pause. 1299 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1300 CMBitMapRO* _bm; 1301 size_t _region_marked_bytes; 1302 1303 public: 1304 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1305 BitMap* region_bm, BitMap* card_bm) : 1306 CMCountDataClosureBase(g1h, region_bm, card_bm), 1307 _bm(bm), _region_marked_bytes(0) { } 1308 1309 bool doHeapRegion(HeapRegion* hr) { 1310 HeapWord* ntams = hr->next_top_at_mark_start(); 1311 HeapWord* start = hr->bottom(); 1312 1313 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1314 "Preconditions not met - " 1315 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1316 p2i(start), p2i(ntams), p2i(hr->end())); 1317 1318 // Find the first marked object at or after "start". 1319 start = _bm->getNextMarkedWordAddress(start, ntams); 1320 1321 size_t marked_bytes = 0; 1322 1323 while (start < ntams) { 1324 oop obj = oop(start); 1325 int obj_sz = obj->size(); 1326 HeapWord* obj_end = start + obj_sz; 1327 1328 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1329 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1330 1331 // Note: if we're looking at the last region in heap - obj_end 1332 // could be actually just beyond the end of the heap; end_idx 1333 // will then correspond to a (non-existent) card that is also 1334 // just beyond the heap. 1335 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1336 // end of object is not card aligned - increment to cover 1337 // all the cards spanned by the object 1338 end_idx += 1; 1339 } 1340 1341 // Set the bits in the card BM for the cards spanned by this object. 1342 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1343 1344 // Add the size of this object to the number of marked bytes. 1345 marked_bytes += (size_t)obj_sz * HeapWordSize; 1346 1347 // This will happen if we are handling a humongous object that spans 1348 // several heap regions. 1349 if (obj_end > hr->end()) { 1350 break; 1351 } 1352 // Find the next marked object after this one. 1353 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1354 } 1355 1356 // Mark the allocated-since-marking portion... 1357 HeapWord* top = hr->top(); 1358 if (ntams < top) { 1359 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1360 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1361 1362 // Note: if we're looking at the last region in heap - top 1363 // could be actually just beyond the end of the heap; end_idx 1364 // will then correspond to a (non-existent) card that is also 1365 // just beyond the heap. 1366 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1367 // end of object is not card aligned - increment to cover 1368 // all the cards spanned by the object 1369 end_idx += 1; 1370 } 1371 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1372 1373 // This definitely means the region has live objects. 1374 set_bit_for_region(hr); 1375 } 1376 1377 // Update the live region bitmap. 1378 if (marked_bytes > 0) { 1379 set_bit_for_region(hr); 1380 } 1381 1382 // Set the marked bytes for the current region so that 1383 // it can be queried by a calling verification routine 1384 _region_marked_bytes = marked_bytes; 1385 1386 return false; 1387 } 1388 1389 size_t region_marked_bytes() const { return _region_marked_bytes; } 1390 }; 1391 1392 // Heap region closure used for verifying the counting data 1393 // that was accumulated concurrently and aggregated during 1394 // the remark pause. This closure is applied to the heap 1395 // regions during the STW cleanup pause. 1396 1397 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1398 G1CollectedHeap* _g1h; 1399 ConcurrentMark* _cm; 1400 CalcLiveObjectsClosure _calc_cl; 1401 BitMap* _region_bm; // Region BM to be verified 1402 BitMap* _card_bm; // Card BM to be verified 1403 1404 BitMap* _exp_region_bm; // Expected Region BM values 1405 BitMap* _exp_card_bm; // Expected card BM values 1406 1407 int _failures; 1408 1409 public: 1410 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1411 BitMap* region_bm, 1412 BitMap* card_bm, 1413 BitMap* exp_region_bm, 1414 BitMap* exp_card_bm) : 1415 _g1h(g1h), _cm(g1h->concurrent_mark()), 1416 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1417 _region_bm(region_bm), _card_bm(card_bm), 1418 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1419 _failures(0) { } 1420 1421 int failures() const { return _failures; } 1422 1423 bool doHeapRegion(HeapRegion* hr) { 1424 int failures = 0; 1425 1426 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1427 // this region and set the corresponding bits in the expected region 1428 // and card bitmaps. 1429 bool res = _calc_cl.doHeapRegion(hr); 1430 assert(res == false, "should be continuing"); 1431 1432 // Verify the marked bytes for this region. 1433 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1434 size_t act_marked_bytes = hr->next_marked_bytes(); 1435 1436 // We're not OK if expected marked bytes > actual marked bytes. It means 1437 // we have missed accounting some objects during the actual marking. 1438 // For start_humongous regions, the size of the whole object will be 1439 // in exp_marked_bytes, so this check does not apply in this case. 1440 if (exp_marked_bytes > act_marked_bytes && !hr->is_starts_humongous()) { 1441 failures += 1; 1442 } 1443 1444 // Verify the bit, for this region, in the actual and expected 1445 // (which was just calculated) region bit maps. 1446 // We're not OK if the bit in the calculated expected region 1447 // bitmap is set and the bit in the actual region bitmap is not. 1448 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1449 1450 bool expected = _exp_region_bm->at(index); 1451 bool actual = _region_bm->at(index); 1452 if (expected && !actual) { 1453 failures += 1; 1454 } 1455 1456 // Verify that the card bit maps for the cards spanned by the current 1457 // region match. We have an error if we have a set bit in the expected 1458 // bit map and the corresponding bit in the actual bitmap is not set. 1459 1460 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1461 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1462 1463 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1464 expected = _exp_card_bm->at(i); 1465 actual = _card_bm->at(i); 1466 1467 if (expected && !actual) { 1468 failures += 1; 1469 } 1470 } 1471 1472 _failures += failures; 1473 1474 // We could stop iteration over the heap when we 1475 // find the first violating region by returning true. 1476 return false; 1477 } 1478 }; 1479 1480 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1481 protected: 1482 G1CollectedHeap* _g1h; 1483 ConcurrentMark* _cm; 1484 BitMap* _actual_region_bm; 1485 BitMap* _actual_card_bm; 1486 1487 uint _n_workers; 1488 1489 BitMap* _expected_region_bm; 1490 BitMap* _expected_card_bm; 1491 1492 int _failures; 1493 1494 HeapRegionClaimer _hrclaimer; 1495 1496 public: 1497 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1498 BitMap* region_bm, BitMap* card_bm, 1499 BitMap* expected_region_bm, BitMap* expected_card_bm) 1500 : AbstractGangTask("G1 verify final counting"), 1501 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1502 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1503 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1504 _failures(0), 1505 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1506 assert(VerifyDuringGC, "don't call this otherwise"); 1507 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1508 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1509 } 1510 1511 void work(uint worker_id) { 1512 assert(worker_id < _n_workers, "invariant"); 1513 1514 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1515 _actual_region_bm, _actual_card_bm, 1516 _expected_region_bm, 1517 _expected_card_bm); 1518 1519 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1520 1521 Atomic::add(verify_cl.failures(), &_failures); 1522 } 1523 1524 int failures() const { return _failures; } 1525 }; 1526 1527 // Closure that finalizes the liveness counting data. 1528 // Used during the cleanup pause. 1529 // Sets the bits corresponding to the interval [NTAMS, top] 1530 // (which contains the implicitly live objects) in the 1531 // card liveness bitmap. Also sets the bit for each region, 1532 // containing live data, in the region liveness bitmap. 1533 1534 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1535 public: 1536 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1537 BitMap* region_bm, 1538 BitMap* card_bm) : 1539 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1540 1541 bool doHeapRegion(HeapRegion* hr) { 1542 HeapWord* ntams = hr->next_top_at_mark_start(); 1543 HeapWord* top = hr->top(); 1544 1545 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1546 1547 // Mark the allocated-since-marking portion... 1548 if (ntams < top) { 1549 // This definitely means the region has live objects. 1550 set_bit_for_region(hr); 1551 1552 // Now set the bits in the card bitmap for [ntams, top) 1553 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1554 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1555 1556 // Note: if we're looking at the last region in heap - top 1557 // could be actually just beyond the end of the heap; end_idx 1558 // will then correspond to a (non-existent) card that is also 1559 // just beyond the heap. 1560 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1561 // end of object is not card aligned - increment to cover 1562 // all the cards spanned by the object 1563 end_idx += 1; 1564 } 1565 1566 assert(end_idx <= _card_bm->size(), 1567 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1568 end_idx, _card_bm->size()); 1569 assert(start_idx < _card_bm->size(), 1570 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1571 start_idx, _card_bm->size()); 1572 1573 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1574 } 1575 1576 // Set the bit for the region if it contains live data 1577 if (hr->next_marked_bytes() > 0) { 1578 set_bit_for_region(hr); 1579 } 1580 1581 return false; 1582 } 1583 }; 1584 1585 class G1ParFinalCountTask: public AbstractGangTask { 1586 protected: 1587 G1CollectedHeap* _g1h; 1588 ConcurrentMark* _cm; 1589 BitMap* _actual_region_bm; 1590 BitMap* _actual_card_bm; 1591 1592 uint _n_workers; 1593 HeapRegionClaimer _hrclaimer; 1594 1595 public: 1596 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1597 : AbstractGangTask("G1 final counting"), 1598 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1599 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1600 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1601 } 1602 1603 void work(uint worker_id) { 1604 assert(worker_id < _n_workers, "invariant"); 1605 1606 FinalCountDataUpdateClosure final_update_cl(_g1h, 1607 _actual_region_bm, 1608 _actual_card_bm); 1609 1610 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1611 } 1612 }; 1613 1614 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1615 G1CollectedHeap* _g1; 1616 size_t _freed_bytes; 1617 FreeRegionList* _local_cleanup_list; 1618 HeapRegionSetCount _old_regions_removed; 1619 HeapRegionSetCount _humongous_regions_removed; 1620 HRRSCleanupTask* _hrrs_cleanup_task; 1621 1622 public: 1623 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1624 FreeRegionList* local_cleanup_list, 1625 HRRSCleanupTask* hrrs_cleanup_task) : 1626 _g1(g1), 1627 _freed_bytes(0), 1628 _local_cleanup_list(local_cleanup_list), 1629 _old_regions_removed(), 1630 _humongous_regions_removed(), 1631 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1632 1633 size_t freed_bytes() { return _freed_bytes; } 1634 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1635 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1636 1637 bool doHeapRegion(HeapRegion *hr) { 1638 if (hr->is_archive()) { 1639 return false; 1640 } 1641 // We use a claim value of zero here because all regions 1642 // were claimed with value 1 in the FinalCount task. 1643 _g1->reset_gc_time_stamps(hr); 1644 hr->note_end_of_marking(); 1645 1646 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1647 _freed_bytes += hr->used(); 1648 hr->set_containing_set(NULL); 1649 if (hr->is_humongous()) { 1650 _humongous_regions_removed.increment(1u, hr->capacity()); 1651 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1652 } else { 1653 _old_regions_removed.increment(1u, hr->capacity()); 1654 _g1->free_region(hr, _local_cleanup_list, true); 1655 } 1656 } else { 1657 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1658 } 1659 1660 return false; 1661 } 1662 }; 1663 1664 class G1ParNoteEndTask: public AbstractGangTask { 1665 friend class G1NoteEndOfConcMarkClosure; 1666 1667 protected: 1668 G1CollectedHeap* _g1h; 1669 FreeRegionList* _cleanup_list; 1670 HeapRegionClaimer _hrclaimer; 1671 1672 public: 1673 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1674 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1675 } 1676 1677 void work(uint worker_id) { 1678 FreeRegionList local_cleanup_list("Local Cleanup List"); 1679 HRRSCleanupTask hrrs_cleanup_task; 1680 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1681 &hrrs_cleanup_task); 1682 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1683 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1684 1685 // Now update the lists 1686 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1687 { 1688 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1689 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1690 1691 // If we iterate over the global cleanup list at the end of 1692 // cleanup to do this printing we will not guarantee to only 1693 // generate output for the newly-reclaimed regions (the list 1694 // might not be empty at the beginning of cleanup; we might 1695 // still be working on its previous contents). So we do the 1696 // printing here, before we append the new regions to the global 1697 // cleanup list. 1698 1699 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1700 if (hr_printer->is_active()) { 1701 FreeRegionListIterator iter(&local_cleanup_list); 1702 while (iter.more_available()) { 1703 HeapRegion* hr = iter.get_next(); 1704 hr_printer->cleanup(hr); 1705 } 1706 } 1707 1708 _cleanup_list->add_ordered(&local_cleanup_list); 1709 assert(local_cleanup_list.is_empty(), "post-condition"); 1710 1711 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1712 } 1713 } 1714 }; 1715 1716 class G1ParScrubRemSetTask: public AbstractGangTask { 1717 protected: 1718 G1RemSet* _g1rs; 1719 BitMap* _region_bm; 1720 BitMap* _card_bm; 1721 HeapRegionClaimer _hrclaimer; 1722 1723 public: 1724 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1725 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1726 } 1727 1728 void work(uint worker_id) { 1729 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1730 } 1731 1732 }; 1733 1734 void ConcurrentMark::cleanup() { 1735 // world is stopped at this checkpoint 1736 assert(SafepointSynchronize::is_at_safepoint(), 1737 "world should be stopped"); 1738 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1739 1740 // If a full collection has happened, we shouldn't do this. 1741 if (has_aborted()) { 1742 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1743 return; 1744 } 1745 1746 g1h->verify_region_sets_optional(); 1747 1748 if (VerifyDuringGC) { 1749 HandleMark hm; // handle scope 1750 g1h->prepare_for_verify(); 1751 Universe::verify(VerifyOption_G1UsePrevMarking, 1752 " VerifyDuringGC:(before)"); 1753 } 1754 g1h->check_bitmaps("Cleanup Start"); 1755 1756 G1CollectorPolicy* g1p = g1h->g1_policy(); 1757 g1p->record_concurrent_mark_cleanup_start(); 1758 1759 double start = os::elapsedTime(); 1760 1761 HeapRegionRemSet::reset_for_cleanup_tasks(); 1762 1763 // Do counting once more with the world stopped for good measure. 1764 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1765 1766 g1h->workers()->run_task(&g1_par_count_task); 1767 1768 if (VerifyDuringGC) { 1769 // Verify that the counting data accumulated during marking matches 1770 // that calculated by walking the marking bitmap. 1771 1772 // Bitmaps to hold expected values 1773 BitMap expected_region_bm(_region_bm.size(), true); 1774 BitMap expected_card_bm(_card_bm.size(), true); 1775 1776 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1777 &_region_bm, 1778 &_card_bm, 1779 &expected_region_bm, 1780 &expected_card_bm); 1781 1782 g1h->workers()->run_task(&g1_par_verify_task); 1783 1784 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1785 } 1786 1787 size_t start_used_bytes = g1h->used(); 1788 g1h->collector_state()->set_mark_in_progress(false); 1789 1790 double count_end = os::elapsedTime(); 1791 double this_final_counting_time = (count_end - start); 1792 _total_counting_time += this_final_counting_time; 1793 1794 if (G1PrintRegionLivenessInfo) { 1795 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1796 _g1h->heap_region_iterate(&cl); 1797 } 1798 1799 // Install newly created mark bitMap as "prev". 1800 swapMarkBitMaps(); 1801 1802 g1h->reset_gc_time_stamp(); 1803 1804 uint n_workers = _g1h->workers()->active_workers(); 1805 1806 // Note end of marking in all heap regions. 1807 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1808 g1h->workers()->run_task(&g1_par_note_end_task); 1809 g1h->check_gc_time_stamps(); 1810 1811 if (!cleanup_list_is_empty()) { 1812 // The cleanup list is not empty, so we'll have to process it 1813 // concurrently. Notify anyone else that might be wanting free 1814 // regions that there will be more free regions coming soon. 1815 g1h->set_free_regions_coming(); 1816 } 1817 1818 // call below, since it affects the metric by which we sort the heap 1819 // regions. 1820 if (G1ScrubRemSets) { 1821 double rs_scrub_start = os::elapsedTime(); 1822 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1823 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1824 1825 double rs_scrub_end = os::elapsedTime(); 1826 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1827 _total_rs_scrub_time += this_rs_scrub_time; 1828 } 1829 1830 // this will also free any regions totally full of garbage objects, 1831 // and sort the regions. 1832 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1833 1834 // Statistics. 1835 double end = os::elapsedTime(); 1836 _cleanup_times.add((end - start) * 1000.0); 1837 1838 if (G1Log::fine()) { 1839 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1840 } 1841 1842 // Clean up will have freed any regions completely full of garbage. 1843 // Update the soft reference policy with the new heap occupancy. 1844 Universe::update_heap_info_at_gc(); 1845 1846 if (VerifyDuringGC) { 1847 HandleMark hm; // handle scope 1848 g1h->prepare_for_verify(); 1849 Universe::verify(VerifyOption_G1UsePrevMarking, 1850 " VerifyDuringGC:(after)"); 1851 } 1852 1853 g1h->check_bitmaps("Cleanup End"); 1854 1855 g1h->verify_region_sets_optional(); 1856 1857 // We need to make this be a "collection" so any collection pause that 1858 // races with it goes around and waits for completeCleanup to finish. 1859 g1h->increment_total_collections(); 1860 1861 // Clean out dead classes and update Metaspace sizes. 1862 if (ClassUnloadingWithConcurrentMark) { 1863 ClassLoaderDataGraph::purge(); 1864 } 1865 MetaspaceGC::compute_new_size(); 1866 1867 // We reclaimed old regions so we should calculate the sizes to make 1868 // sure we update the old gen/space data. 1869 g1h->g1mm()->update_sizes(); 1870 g1h->allocation_context_stats().update_after_mark(); 1871 1872 g1h->trace_heap_after_concurrent_cycle(); 1873 } 1874 1875 void ConcurrentMark::completeCleanup() { 1876 if (has_aborted()) return; 1877 1878 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1879 1880 _cleanup_list.verify_optional(); 1881 FreeRegionList tmp_free_list("Tmp Free List"); 1882 1883 if (G1ConcRegionFreeingVerbose) { 1884 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1885 "cleanup list has %u entries", 1886 _cleanup_list.length()); 1887 } 1888 1889 // No one else should be accessing the _cleanup_list at this point, 1890 // so it is not necessary to take any locks 1891 while (!_cleanup_list.is_empty()) { 1892 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1893 assert(hr != NULL, "Got NULL from a non-empty list"); 1894 hr->par_clear(); 1895 tmp_free_list.add_ordered(hr); 1896 1897 // Instead of adding one region at a time to the secondary_free_list, 1898 // we accumulate them in the local list and move them a few at a 1899 // time. This also cuts down on the number of notify_all() calls 1900 // we do during this process. We'll also append the local list when 1901 // _cleanup_list is empty (which means we just removed the last 1902 // region from the _cleanup_list). 1903 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1904 _cleanup_list.is_empty()) { 1905 if (G1ConcRegionFreeingVerbose) { 1906 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1907 "appending %u entries to the secondary_free_list, " 1908 "cleanup list still has %u entries", 1909 tmp_free_list.length(), 1910 _cleanup_list.length()); 1911 } 1912 1913 { 1914 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1915 g1h->secondary_free_list_add(&tmp_free_list); 1916 SecondaryFreeList_lock->notify_all(); 1917 } 1918 #ifndef PRODUCT 1919 if (G1StressConcRegionFreeing) { 1920 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1921 os::sleep(Thread::current(), (jlong) 1, false); 1922 } 1923 } 1924 #endif 1925 } 1926 } 1927 assert(tmp_free_list.is_empty(), "post-condition"); 1928 } 1929 1930 // Supporting Object and Oop closures for reference discovery 1931 // and processing in during marking 1932 1933 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1934 HeapWord* addr = (HeapWord*)obj; 1935 return addr != NULL && 1936 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1937 } 1938 1939 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1940 // Uses the CMTask associated with a worker thread (for serial reference 1941 // processing the CMTask for worker 0 is used) to preserve (mark) and 1942 // trace referent objects. 1943 // 1944 // Using the CMTask and embedded local queues avoids having the worker 1945 // threads operating on the global mark stack. This reduces the risk 1946 // of overflowing the stack - which we would rather avoid at this late 1947 // state. Also using the tasks' local queues removes the potential 1948 // of the workers interfering with each other that could occur if 1949 // operating on the global stack. 1950 1951 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1952 ConcurrentMark* _cm; 1953 CMTask* _task; 1954 int _ref_counter_limit; 1955 int _ref_counter; 1956 bool _is_serial; 1957 public: 1958 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1959 _cm(cm), _task(task), _is_serial(is_serial), 1960 _ref_counter_limit(G1RefProcDrainInterval) { 1961 assert(_ref_counter_limit > 0, "sanity"); 1962 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1963 _ref_counter = _ref_counter_limit; 1964 } 1965 1966 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1967 virtual void do_oop( oop* p) { do_oop_work(p); } 1968 1969 template <class T> void do_oop_work(T* p) { 1970 if (!_cm->has_overflown()) { 1971 oop obj = oopDesc::load_decode_heap_oop(p); 1972 _task->deal_with_reference(obj); 1973 _ref_counter--; 1974 1975 if (_ref_counter == 0) { 1976 // We have dealt with _ref_counter_limit references, pushing them 1977 // and objects reachable from them on to the local stack (and 1978 // possibly the global stack). Call CMTask::do_marking_step() to 1979 // process these entries. 1980 // 1981 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1982 // there's nothing more to do (i.e. we're done with the entries that 1983 // were pushed as a result of the CMTask::deal_with_reference() calls 1984 // above) or we overflow. 1985 // 1986 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1987 // flag while there may still be some work to do. (See the comment at 1988 // the beginning of CMTask::do_marking_step() for those conditions - 1989 // one of which is reaching the specified time target.) It is only 1990 // when CMTask::do_marking_step() returns without setting the 1991 // has_aborted() flag that the marking step has completed. 1992 do { 1993 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1994 _task->do_marking_step(mark_step_duration_ms, 1995 false /* do_termination */, 1996 _is_serial); 1997 } while (_task->has_aborted() && !_cm->has_overflown()); 1998 _ref_counter = _ref_counter_limit; 1999 } 2000 } 2001 } 2002 }; 2003 2004 // 'Drain' oop closure used by both serial and parallel reference processing. 2005 // Uses the CMTask associated with a given worker thread (for serial 2006 // reference processing the CMtask for worker 0 is used). Calls the 2007 // do_marking_step routine, with an unbelievably large timeout value, 2008 // to drain the marking data structures of the remaining entries 2009 // added by the 'keep alive' oop closure above. 2010 2011 class G1CMDrainMarkingStackClosure: public VoidClosure { 2012 ConcurrentMark* _cm; 2013 CMTask* _task; 2014 bool _is_serial; 2015 public: 2016 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2017 _cm(cm), _task(task), _is_serial(is_serial) { 2018 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2019 } 2020 2021 void do_void() { 2022 do { 2023 // We call CMTask::do_marking_step() to completely drain the local 2024 // and global marking stacks of entries pushed by the 'keep alive' 2025 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2026 // 2027 // CMTask::do_marking_step() is called in a loop, which we'll exit 2028 // if there's nothing more to do (i.e. we've completely drained the 2029 // entries that were pushed as a a result of applying the 'keep alive' 2030 // closure to the entries on the discovered ref lists) or we overflow 2031 // the global marking stack. 2032 // 2033 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2034 // flag while there may still be some work to do. (See the comment at 2035 // the beginning of CMTask::do_marking_step() for those conditions - 2036 // one of which is reaching the specified time target.) It is only 2037 // when CMTask::do_marking_step() returns without setting the 2038 // has_aborted() flag that the marking step has completed. 2039 2040 _task->do_marking_step(1000000000.0 /* something very large */, 2041 true /* do_termination */, 2042 _is_serial); 2043 } while (_task->has_aborted() && !_cm->has_overflown()); 2044 } 2045 }; 2046 2047 // Implementation of AbstractRefProcTaskExecutor for parallel 2048 // reference processing at the end of G1 concurrent marking 2049 2050 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2051 private: 2052 G1CollectedHeap* _g1h; 2053 ConcurrentMark* _cm; 2054 WorkGang* _workers; 2055 uint _active_workers; 2056 2057 public: 2058 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2059 ConcurrentMark* cm, 2060 WorkGang* workers, 2061 uint n_workers) : 2062 _g1h(g1h), _cm(cm), 2063 _workers(workers), _active_workers(n_workers) { } 2064 2065 // Executes the given task using concurrent marking worker threads. 2066 virtual void execute(ProcessTask& task); 2067 virtual void execute(EnqueueTask& task); 2068 }; 2069 2070 class G1CMRefProcTaskProxy: public AbstractGangTask { 2071 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2072 ProcessTask& _proc_task; 2073 G1CollectedHeap* _g1h; 2074 ConcurrentMark* _cm; 2075 2076 public: 2077 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2078 G1CollectedHeap* g1h, 2079 ConcurrentMark* cm) : 2080 AbstractGangTask("Process reference objects in parallel"), 2081 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2082 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2083 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2084 } 2085 2086 virtual void work(uint worker_id) { 2087 ResourceMark rm; 2088 HandleMark hm; 2089 CMTask* task = _cm->task(worker_id); 2090 G1CMIsAliveClosure g1_is_alive(_g1h); 2091 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2092 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2093 2094 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2095 } 2096 }; 2097 2098 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2099 assert(_workers != NULL, "Need parallel worker threads."); 2100 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2101 2102 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2103 2104 // We need to reset the concurrency level before each 2105 // proxy task execution, so that the termination protocol 2106 // and overflow handling in CMTask::do_marking_step() knows 2107 // how many workers to wait for. 2108 _cm->set_concurrency(_active_workers); 2109 _workers->run_task(&proc_task_proxy); 2110 } 2111 2112 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2113 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2114 EnqueueTask& _enq_task; 2115 2116 public: 2117 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2118 AbstractGangTask("Enqueue reference objects in parallel"), 2119 _enq_task(enq_task) { } 2120 2121 virtual void work(uint worker_id) { 2122 _enq_task.work(worker_id); 2123 } 2124 }; 2125 2126 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2127 assert(_workers != NULL, "Need parallel worker threads."); 2128 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2129 2130 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2131 2132 // Not strictly necessary but... 2133 // 2134 // We need to reset the concurrency level before each 2135 // proxy task execution, so that the termination protocol 2136 // and overflow handling in CMTask::do_marking_step() knows 2137 // how many workers to wait for. 2138 _cm->set_concurrency(_active_workers); 2139 _workers->run_task(&enq_task_proxy); 2140 } 2141 2142 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2143 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2144 } 2145 2146 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2147 if (has_overflown()) { 2148 // Skip processing the discovered references if we have 2149 // overflown the global marking stack. Reference objects 2150 // only get discovered once so it is OK to not 2151 // de-populate the discovered reference lists. We could have, 2152 // but the only benefit would be that, when marking restarts, 2153 // less reference objects are discovered. 2154 return; 2155 } 2156 2157 ResourceMark rm; 2158 HandleMark hm; 2159 2160 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2161 2162 // Is alive closure. 2163 G1CMIsAliveClosure g1_is_alive(g1h); 2164 2165 // Inner scope to exclude the cleaning of the string and symbol 2166 // tables from the displayed time. 2167 { 2168 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2169 2170 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2171 2172 // See the comment in G1CollectedHeap::ref_processing_init() 2173 // about how reference processing currently works in G1. 2174 2175 // Set the soft reference policy 2176 rp->setup_policy(clear_all_soft_refs); 2177 assert(_markStack.isEmpty(), "mark stack should be empty"); 2178 2179 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2180 // in serial reference processing. Note these closures are also 2181 // used for serially processing (by the the current thread) the 2182 // JNI references during parallel reference processing. 2183 // 2184 // These closures do not need to synchronize with the worker 2185 // threads involved in parallel reference processing as these 2186 // instances are executed serially by the current thread (e.g. 2187 // reference processing is not multi-threaded and is thus 2188 // performed by the current thread instead of a gang worker). 2189 // 2190 // The gang tasks involved in parallel reference processing create 2191 // their own instances of these closures, which do their own 2192 // synchronization among themselves. 2193 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2194 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2195 2196 // We need at least one active thread. If reference processing 2197 // is not multi-threaded we use the current (VMThread) thread, 2198 // otherwise we use the work gang from the G1CollectedHeap and 2199 // we utilize all the worker threads we can. 2200 bool processing_is_mt = rp->processing_is_mt(); 2201 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2202 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2203 2204 // Parallel processing task executor. 2205 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2206 g1h->workers(), active_workers); 2207 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2208 2209 // Set the concurrency level. The phase was already set prior to 2210 // executing the remark task. 2211 set_concurrency(active_workers); 2212 2213 // Set the degree of MT processing here. If the discovery was done MT, 2214 // the number of threads involved during discovery could differ from 2215 // the number of active workers. This is OK as long as the discovered 2216 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2217 rp->set_active_mt_degree(active_workers); 2218 2219 // Process the weak references. 2220 const ReferenceProcessorStats& stats = 2221 rp->process_discovered_references(&g1_is_alive, 2222 &g1_keep_alive, 2223 &g1_drain_mark_stack, 2224 executor, 2225 g1h->gc_timer_cm()); 2226 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2227 2228 // The do_oop work routines of the keep_alive and drain_marking_stack 2229 // oop closures will set the has_overflown flag if we overflow the 2230 // global marking stack. 2231 2232 assert(_markStack.overflow() || _markStack.isEmpty(), 2233 "mark stack should be empty (unless it overflowed)"); 2234 2235 if (_markStack.overflow()) { 2236 // This should have been done already when we tried to push an 2237 // entry on to the global mark stack. But let's do it again. 2238 set_has_overflown(); 2239 } 2240 2241 assert(rp->num_q() == active_workers, "why not"); 2242 2243 rp->enqueue_discovered_references(executor); 2244 2245 rp->verify_no_references_recorded(); 2246 assert(!rp->discovery_enabled(), "Post condition"); 2247 } 2248 2249 if (has_overflown()) { 2250 // We can not trust g1_is_alive if the marking stack overflowed 2251 return; 2252 } 2253 2254 assert(_markStack.isEmpty(), "Marking should have completed"); 2255 2256 // Unload Klasses, String, Symbols, Code Cache, etc. 2257 { 2258 G1CMTraceTime trace("Unloading", G1Log::finer()); 2259 2260 if (ClassUnloadingWithConcurrentMark) { 2261 bool purged_classes; 2262 2263 { 2264 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2265 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2266 } 2267 2268 { 2269 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2270 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2271 } 2272 } 2273 2274 if (G1StringDedup::is_enabled()) { 2275 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2276 G1StringDedup::unlink(&g1_is_alive); 2277 } 2278 } 2279 } 2280 2281 void ConcurrentMark::swapMarkBitMaps() { 2282 CMBitMapRO* temp = _prevMarkBitMap; 2283 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2284 _nextMarkBitMap = (CMBitMap*) temp; 2285 } 2286 2287 // Closure for marking entries in SATB buffers. 2288 class CMSATBBufferClosure : public SATBBufferClosure { 2289 private: 2290 CMTask* _task; 2291 G1CollectedHeap* _g1h; 2292 2293 // This is very similar to CMTask::deal_with_reference, but with 2294 // more relaxed requirements for the argument, so this must be more 2295 // circumspect about treating the argument as an object. 2296 void do_entry(void* entry) const { 2297 _task->increment_refs_reached(); 2298 HeapRegion* hr = _g1h->heap_region_containing(entry); 2299 if (entry < hr->next_top_at_mark_start()) { 2300 // Until we get here, we don't know whether entry refers to a valid 2301 // object; it could instead have been a stale reference. 2302 oop obj = static_cast<oop>(entry); 2303 assert(obj->is_oop(true /* ignore mark word */), 2304 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2305 _task->make_reference_grey(obj, hr); 2306 } 2307 } 2308 2309 public: 2310 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2311 : _task(task), _g1h(g1h) { } 2312 2313 virtual void do_buffer(void** buffer, size_t size) { 2314 for (size_t i = 0; i < size; ++i) { 2315 do_entry(buffer[i]); 2316 } 2317 } 2318 }; 2319 2320 class G1RemarkThreadsClosure : public ThreadClosure { 2321 CMSATBBufferClosure _cm_satb_cl; 2322 G1CMOopClosure _cm_cl; 2323 MarkingCodeBlobClosure _code_cl; 2324 int _thread_parity; 2325 2326 public: 2327 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2328 _cm_satb_cl(task, g1h), 2329 _cm_cl(g1h, g1h->concurrent_mark(), task), 2330 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2331 _thread_parity(Threads::thread_claim_parity()) {} 2332 2333 void do_thread(Thread* thread) { 2334 if (thread->is_Java_thread()) { 2335 if (thread->claim_oops_do(true, _thread_parity)) { 2336 JavaThread* jt = (JavaThread*)thread; 2337 2338 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2339 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2340 // * Alive if on the stack of an executing method 2341 // * Weakly reachable otherwise 2342 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2343 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2344 jt->nmethods_do(&_code_cl); 2345 2346 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2347 } 2348 } else if (thread->is_VM_thread()) { 2349 if (thread->claim_oops_do(true, _thread_parity)) { 2350 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2351 } 2352 } 2353 } 2354 }; 2355 2356 class CMRemarkTask: public AbstractGangTask { 2357 private: 2358 ConcurrentMark* _cm; 2359 public: 2360 void work(uint worker_id) { 2361 // Since all available tasks are actually started, we should 2362 // only proceed if we're supposed to be active. 2363 if (worker_id < _cm->active_tasks()) { 2364 CMTask* task = _cm->task(worker_id); 2365 task->record_start_time(); 2366 { 2367 ResourceMark rm; 2368 HandleMark hm; 2369 2370 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2371 Threads::threads_do(&threads_f); 2372 } 2373 2374 do { 2375 task->do_marking_step(1000000000.0 /* something very large */, 2376 true /* do_termination */, 2377 false /* is_serial */); 2378 } while (task->has_aborted() && !_cm->has_overflown()); 2379 // If we overflow, then we do not want to restart. We instead 2380 // want to abort remark and do concurrent marking again. 2381 task->record_end_time(); 2382 } 2383 } 2384 2385 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2386 AbstractGangTask("Par Remark"), _cm(cm) { 2387 _cm->terminator()->reset_for_reuse(active_workers); 2388 } 2389 }; 2390 2391 void ConcurrentMark::checkpointRootsFinalWork() { 2392 ResourceMark rm; 2393 HandleMark hm; 2394 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2395 2396 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2397 2398 g1h->ensure_parsability(false); 2399 2400 // this is remark, so we'll use up all active threads 2401 uint active_workers = g1h->workers()->active_workers(); 2402 set_concurrency_and_phase(active_workers, false /* concurrent */); 2403 // Leave _parallel_marking_threads at it's 2404 // value originally calculated in the ConcurrentMark 2405 // constructor and pass values of the active workers 2406 // through the gang in the task. 2407 2408 { 2409 StrongRootsScope srs(active_workers); 2410 2411 CMRemarkTask remarkTask(this, active_workers); 2412 // We will start all available threads, even if we decide that the 2413 // active_workers will be fewer. The extra ones will just bail out 2414 // immediately. 2415 g1h->workers()->run_task(&remarkTask); 2416 } 2417 2418 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2419 guarantee(has_overflown() || 2420 satb_mq_set.completed_buffers_num() == 0, 2421 "Invariant: has_overflown = %s, num buffers = %d", 2422 BOOL_TO_STR(has_overflown()), 2423 satb_mq_set.completed_buffers_num()); 2424 2425 print_stats(); 2426 } 2427 2428 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2429 // Note we are overriding the read-only view of the prev map here, via 2430 // the cast. 2431 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2432 } 2433 2434 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2435 _nextMarkBitMap->clearRange(mr); 2436 } 2437 2438 HeapRegion* 2439 ConcurrentMark::claim_region(uint worker_id) { 2440 // "checkpoint" the finger 2441 HeapWord* finger = _finger; 2442 2443 // _heap_end will not change underneath our feet; it only changes at 2444 // yield points. 2445 while (finger < _heap_end) { 2446 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2447 2448 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2449 2450 // Above heap_region_containing may return NULL as we always scan claim 2451 // until the end of the heap. In this case, just jump to the next region. 2452 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2453 2454 // Is the gap between reading the finger and doing the CAS too long? 2455 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2456 if (res == finger && curr_region != NULL) { 2457 // we succeeded 2458 HeapWord* bottom = curr_region->bottom(); 2459 HeapWord* limit = curr_region->next_top_at_mark_start(); 2460 2461 // notice that _finger == end cannot be guaranteed here since, 2462 // someone else might have moved the finger even further 2463 assert(_finger >= end, "the finger should have moved forward"); 2464 2465 if (limit > bottom) { 2466 return curr_region; 2467 } else { 2468 assert(limit == bottom, 2469 "the region limit should be at bottom"); 2470 // we return NULL and the caller should try calling 2471 // claim_region() again. 2472 return NULL; 2473 } 2474 } else { 2475 assert(_finger > finger, "the finger should have moved forward"); 2476 // read it again 2477 finger = _finger; 2478 } 2479 } 2480 2481 return NULL; 2482 } 2483 2484 #ifndef PRODUCT 2485 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2486 private: 2487 G1CollectedHeap* _g1h; 2488 const char* _phase; 2489 int _info; 2490 2491 public: 2492 VerifyNoCSetOops(const char* phase, int info = -1) : 2493 _g1h(G1CollectedHeap::heap()), 2494 _phase(phase), 2495 _info(info) 2496 { } 2497 2498 void operator()(oop obj) const { 2499 guarantee(obj->is_oop(), 2500 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2501 p2i(obj), _phase, _info); 2502 guarantee(!_g1h->obj_in_cs(obj), 2503 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2504 p2i(obj), _phase, _info); 2505 } 2506 }; 2507 2508 void ConcurrentMark::verify_no_cset_oops() { 2509 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2510 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2511 return; 2512 } 2513 2514 // Verify entries on the global mark stack 2515 _markStack.iterate(VerifyNoCSetOops("Stack")); 2516 2517 // Verify entries on the task queues 2518 for (uint i = 0; i < _max_worker_id; ++i) { 2519 CMTaskQueue* queue = _task_queues->queue(i); 2520 queue->iterate(VerifyNoCSetOops("Queue", i)); 2521 } 2522 2523 // Verify the global finger 2524 HeapWord* global_finger = finger(); 2525 if (global_finger != NULL && global_finger < _heap_end) { 2526 // Since we always iterate over all regions, we might get a NULL HeapRegion 2527 // here. 2528 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2529 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2530 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2531 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2532 } 2533 2534 // Verify the task fingers 2535 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2536 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2537 CMTask* task = _tasks[i]; 2538 HeapWord* task_finger = task->finger(); 2539 if (task_finger != NULL && task_finger < _heap_end) { 2540 // See above note on the global finger verification. 2541 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2542 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2543 !task_hr->in_collection_set(), 2544 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2545 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2546 } 2547 } 2548 } 2549 #endif // PRODUCT 2550 2551 // Aggregate the counting data that was constructed concurrently 2552 // with marking. 2553 class AggregateCountDataHRClosure: public HeapRegionClosure { 2554 G1CollectedHeap* _g1h; 2555 ConcurrentMark* _cm; 2556 CardTableModRefBS* _ct_bs; 2557 BitMap* _cm_card_bm; 2558 uint _max_worker_id; 2559 2560 public: 2561 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2562 BitMap* cm_card_bm, 2563 uint max_worker_id) : 2564 _g1h(g1h), _cm(g1h->concurrent_mark()), 2565 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2566 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2567 2568 bool doHeapRegion(HeapRegion* hr) { 2569 HeapWord* start = hr->bottom(); 2570 HeapWord* limit = hr->next_top_at_mark_start(); 2571 HeapWord* end = hr->end(); 2572 2573 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2574 "Preconditions not met - " 2575 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2576 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2577 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2578 2579 assert(hr->next_marked_bytes() == 0, "Precondition"); 2580 2581 if (start == limit) { 2582 // NTAMS of this region has not been set so nothing to do. 2583 return false; 2584 } 2585 2586 // 'start' should be in the heap. 2587 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2588 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2589 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2590 2591 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2592 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2593 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2594 2595 // If ntams is not card aligned then we bump card bitmap index 2596 // for limit so that we get the all the cards spanned by 2597 // the object ending at ntams. 2598 // Note: if this is the last region in the heap then ntams 2599 // could be actually just beyond the end of the the heap; 2600 // limit_idx will then correspond to a (non-existent) card 2601 // that is also outside the heap. 2602 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2603 limit_idx += 1; 2604 } 2605 2606 assert(limit_idx <= end_idx, "or else use atomics"); 2607 2608 // Aggregate the "stripe" in the count data associated with hr. 2609 uint hrm_index = hr->hrm_index(); 2610 size_t marked_bytes = 0; 2611 2612 for (uint i = 0; i < _max_worker_id; i += 1) { 2613 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2614 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2615 2616 // Fetch the marked_bytes in this region for task i and 2617 // add it to the running total for this region. 2618 marked_bytes += marked_bytes_array[hrm_index]; 2619 2620 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2621 // into the global card bitmap. 2622 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2623 2624 while (scan_idx < limit_idx) { 2625 assert(task_card_bm->at(scan_idx) == true, "should be"); 2626 _cm_card_bm->set_bit(scan_idx); 2627 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2628 2629 // BitMap::get_next_one_offset() can handle the case when 2630 // its left_offset parameter is greater than its right_offset 2631 // parameter. It does, however, have an early exit if 2632 // left_offset == right_offset. So let's limit the value 2633 // passed in for left offset here. 2634 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2635 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2636 } 2637 } 2638 2639 // Update the marked bytes for this region. 2640 hr->add_to_marked_bytes(marked_bytes); 2641 2642 // Next heap region 2643 return false; 2644 } 2645 }; 2646 2647 class G1AggregateCountDataTask: public AbstractGangTask { 2648 protected: 2649 G1CollectedHeap* _g1h; 2650 ConcurrentMark* _cm; 2651 BitMap* _cm_card_bm; 2652 uint _max_worker_id; 2653 uint _active_workers; 2654 HeapRegionClaimer _hrclaimer; 2655 2656 public: 2657 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2658 ConcurrentMark* cm, 2659 BitMap* cm_card_bm, 2660 uint max_worker_id, 2661 uint n_workers) : 2662 AbstractGangTask("Count Aggregation"), 2663 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2664 _max_worker_id(max_worker_id), 2665 _active_workers(n_workers), 2666 _hrclaimer(_active_workers) { 2667 } 2668 2669 void work(uint worker_id) { 2670 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2671 2672 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2673 } 2674 }; 2675 2676 2677 void ConcurrentMark::aggregate_count_data() { 2678 uint n_workers = _g1h->workers()->active_workers(); 2679 2680 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2681 _max_worker_id, n_workers); 2682 2683 _g1h->workers()->run_task(&g1_par_agg_task); 2684 } 2685 2686 // Clear the per-worker arrays used to store the per-region counting data 2687 void ConcurrentMark::clear_all_count_data() { 2688 // Clear the global card bitmap - it will be filled during 2689 // liveness count aggregation (during remark) and the 2690 // final counting task. 2691 _card_bm.clear(); 2692 2693 // Clear the global region bitmap - it will be filled as part 2694 // of the final counting task. 2695 _region_bm.clear(); 2696 2697 uint max_regions = _g1h->max_regions(); 2698 assert(_max_worker_id > 0, "uninitialized"); 2699 2700 for (uint i = 0; i < _max_worker_id; i += 1) { 2701 BitMap* task_card_bm = count_card_bitmap_for(i); 2702 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2703 2704 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2705 assert(marked_bytes_array != NULL, "uninitialized"); 2706 2707 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2708 task_card_bm->clear(); 2709 } 2710 } 2711 2712 void ConcurrentMark::print_stats() { 2713 if (G1MarkingVerboseLevel > 0) { 2714 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2715 for (size_t i = 0; i < _active_tasks; ++i) { 2716 _tasks[i]->print_stats(); 2717 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2718 } 2719 } 2720 } 2721 2722 // abandon current marking iteration due to a Full GC 2723 void ConcurrentMark::abort() { 2724 if (!cmThread()->during_cycle() || _has_aborted) { 2725 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2726 return; 2727 } 2728 2729 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2730 // concurrent bitmap clearing. 2731 _nextMarkBitMap->clearAll(); 2732 2733 // Note we cannot clear the previous marking bitmap here 2734 // since VerifyDuringGC verifies the objects marked during 2735 // a full GC against the previous bitmap. 2736 2737 // Clear the liveness counting data 2738 clear_all_count_data(); 2739 // Empty mark stack 2740 reset_marking_state(); 2741 for (uint i = 0; i < _max_worker_id; ++i) { 2742 _tasks[i]->clear_region_fields(); 2743 } 2744 _first_overflow_barrier_sync.abort(); 2745 _second_overflow_barrier_sync.abort(); 2746 _has_aborted = true; 2747 2748 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2749 satb_mq_set.abandon_partial_marking(); 2750 // This can be called either during or outside marking, we'll read 2751 // the expected_active value from the SATB queue set. 2752 satb_mq_set.set_active_all_threads( 2753 false, /* new active value */ 2754 satb_mq_set.is_active() /* expected_active */); 2755 2756 _g1h->trace_heap_after_concurrent_cycle(); 2757 _g1h->register_concurrent_cycle_end(); 2758 } 2759 2760 static void print_ms_time_info(const char* prefix, const char* name, 2761 NumberSeq& ns) { 2762 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2763 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2764 if (ns.num() > 0) { 2765 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2766 prefix, ns.sd(), ns.maximum()); 2767 } 2768 } 2769 2770 void ConcurrentMark::print_summary_info() { 2771 gclog_or_tty->print_cr(" Concurrent marking:"); 2772 print_ms_time_info(" ", "init marks", _init_times); 2773 print_ms_time_info(" ", "remarks", _remark_times); 2774 { 2775 print_ms_time_info(" ", "final marks", _remark_mark_times); 2776 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2777 2778 } 2779 print_ms_time_info(" ", "cleanups", _cleanup_times); 2780 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2781 _total_counting_time, 2782 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2783 (double)_cleanup_times.num() 2784 : 0.0)); 2785 if (G1ScrubRemSets) { 2786 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2787 _total_rs_scrub_time, 2788 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2789 (double)_cleanup_times.num() 2790 : 0.0)); 2791 } 2792 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2793 (_init_times.sum() + _remark_times.sum() + 2794 _cleanup_times.sum())/1000.0); 2795 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2796 "(%8.2f s marking).", 2797 cmThread()->vtime_accum(), 2798 cmThread()->vtime_mark_accum()); 2799 } 2800 2801 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2802 _parallel_workers->print_worker_threads_on(st); 2803 } 2804 2805 void ConcurrentMark::print_on_error(outputStream* st) const { 2806 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2807 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2808 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2809 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2810 } 2811 2812 // We take a break if someone is trying to stop the world. 2813 bool ConcurrentMark::do_yield_check(uint worker_id) { 2814 if (SuspendibleThreadSet::should_yield()) { 2815 if (worker_id == 0) { 2816 _g1h->g1_policy()->record_concurrent_pause(); 2817 } 2818 SuspendibleThreadSet::yield(); 2819 return true; 2820 } else { 2821 return false; 2822 } 2823 } 2824 2825 // Closure for iteration over bitmaps 2826 class CMBitMapClosure : public BitMapClosure { 2827 private: 2828 // the bitmap that is being iterated over 2829 CMBitMap* _nextMarkBitMap; 2830 ConcurrentMark* _cm; 2831 CMTask* _task; 2832 2833 public: 2834 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2835 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2836 2837 bool do_bit(size_t offset) { 2838 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2839 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2840 assert( addr < _cm->finger(), "invariant"); 2841 assert(addr >= _task->finger(), "invariant"); 2842 2843 // We move that task's local finger along. 2844 _task->move_finger_to(addr); 2845 2846 _task->scan_object(oop(addr)); 2847 // we only partially drain the local queue and global stack 2848 _task->drain_local_queue(true); 2849 _task->drain_global_stack(true); 2850 2851 // if the has_aborted flag has been raised, we need to bail out of 2852 // the iteration 2853 return !_task->has_aborted(); 2854 } 2855 }; 2856 2857 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2858 ReferenceProcessor* result = NULL; 2859 if (G1UseConcMarkReferenceProcessing) { 2860 result = g1h->ref_processor_cm(); 2861 assert(result != NULL, "should not be NULL"); 2862 } 2863 return result; 2864 } 2865 2866 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2867 ConcurrentMark* cm, 2868 CMTask* task) 2869 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2870 _g1h(g1h), _cm(cm), _task(task) 2871 { } 2872 2873 void CMTask::setup_for_region(HeapRegion* hr) { 2874 assert(hr != NULL, 2875 "claim_region() should have filtered out NULL regions"); 2876 _curr_region = hr; 2877 _finger = hr->bottom(); 2878 update_region_limit(); 2879 } 2880 2881 void CMTask::update_region_limit() { 2882 HeapRegion* hr = _curr_region; 2883 HeapWord* bottom = hr->bottom(); 2884 HeapWord* limit = hr->next_top_at_mark_start(); 2885 2886 if (limit == bottom) { 2887 // The region was collected underneath our feet. 2888 // We set the finger to bottom to ensure that the bitmap 2889 // iteration that will follow this will not do anything. 2890 // (this is not a condition that holds when we set the region up, 2891 // as the region is not supposed to be empty in the first place) 2892 _finger = bottom; 2893 } else if (limit >= _region_limit) { 2894 assert(limit >= _finger, "peace of mind"); 2895 } else { 2896 assert(limit < _region_limit, "only way to get here"); 2897 // This can happen under some pretty unusual circumstances. An 2898 // evacuation pause empties the region underneath our feet (NTAMS 2899 // at bottom). We then do some allocation in the region (NTAMS 2900 // stays at bottom), followed by the region being used as a GC 2901 // alloc region (NTAMS will move to top() and the objects 2902 // originally below it will be grayed). All objects now marked in 2903 // the region are explicitly grayed, if below the global finger, 2904 // and we do not need in fact to scan anything else. So, we simply 2905 // set _finger to be limit to ensure that the bitmap iteration 2906 // doesn't do anything. 2907 _finger = limit; 2908 } 2909 2910 _region_limit = limit; 2911 } 2912 2913 void CMTask::giveup_current_region() { 2914 assert(_curr_region != NULL, "invariant"); 2915 clear_region_fields(); 2916 } 2917 2918 void CMTask::clear_region_fields() { 2919 // Values for these three fields that indicate that we're not 2920 // holding on to a region. 2921 _curr_region = NULL; 2922 _finger = NULL; 2923 _region_limit = NULL; 2924 } 2925 2926 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2927 if (cm_oop_closure == NULL) { 2928 assert(_cm_oop_closure != NULL, "invariant"); 2929 } else { 2930 assert(_cm_oop_closure == NULL, "invariant"); 2931 } 2932 _cm_oop_closure = cm_oop_closure; 2933 } 2934 2935 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2936 guarantee(nextMarkBitMap != NULL, "invariant"); 2937 _nextMarkBitMap = nextMarkBitMap; 2938 clear_region_fields(); 2939 2940 _calls = 0; 2941 _elapsed_time_ms = 0.0; 2942 _termination_time_ms = 0.0; 2943 _termination_start_time_ms = 0.0; 2944 } 2945 2946 bool CMTask::should_exit_termination() { 2947 regular_clock_call(); 2948 // This is called when we are in the termination protocol. We should 2949 // quit if, for some reason, this task wants to abort or the global 2950 // stack is not empty (this means that we can get work from it). 2951 return !_cm->mark_stack_empty() || has_aborted(); 2952 } 2953 2954 void CMTask::reached_limit() { 2955 assert(_words_scanned >= _words_scanned_limit || 2956 _refs_reached >= _refs_reached_limit , 2957 "shouldn't have been called otherwise"); 2958 regular_clock_call(); 2959 } 2960 2961 void CMTask::regular_clock_call() { 2962 if (has_aborted()) return; 2963 2964 // First, we need to recalculate the words scanned and refs reached 2965 // limits for the next clock call. 2966 recalculate_limits(); 2967 2968 // During the regular clock call we do the following 2969 2970 // (1) If an overflow has been flagged, then we abort. 2971 if (_cm->has_overflown()) { 2972 set_has_aborted(); 2973 return; 2974 } 2975 2976 // If we are not concurrent (i.e. we're doing remark) we don't need 2977 // to check anything else. The other steps are only needed during 2978 // the concurrent marking phase. 2979 if (!concurrent()) return; 2980 2981 // (2) If marking has been aborted for Full GC, then we also abort. 2982 if (_cm->has_aborted()) { 2983 set_has_aborted(); 2984 return; 2985 } 2986 2987 double curr_time_ms = os::elapsedVTime() * 1000.0; 2988 2989 // (4) We check whether we should yield. If we have to, then we abort. 2990 if (SuspendibleThreadSet::should_yield()) { 2991 // We should yield. To do this we abort the task. The caller is 2992 // responsible for yielding. 2993 set_has_aborted(); 2994 return; 2995 } 2996 2997 // (5) We check whether we've reached our time quota. If we have, 2998 // then we abort. 2999 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3000 if (elapsed_time_ms > _time_target_ms) { 3001 set_has_aborted(); 3002 _has_timed_out = true; 3003 return; 3004 } 3005 3006 // (6) Finally, we check whether there are enough completed STAB 3007 // buffers available for processing. If there are, we abort. 3008 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3009 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3010 // we do need to process SATB buffers, we'll abort and restart 3011 // the marking task to do so 3012 set_has_aborted(); 3013 return; 3014 } 3015 } 3016 3017 void CMTask::recalculate_limits() { 3018 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3019 _words_scanned_limit = _real_words_scanned_limit; 3020 3021 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3022 _refs_reached_limit = _real_refs_reached_limit; 3023 } 3024 3025 void CMTask::decrease_limits() { 3026 // This is called when we believe that we're going to do an infrequent 3027 // operation which will increase the per byte scanned cost (i.e. move 3028 // entries to/from the global stack). It basically tries to decrease the 3029 // scanning limit so that the clock is called earlier. 3030 3031 _words_scanned_limit = _real_words_scanned_limit - 3032 3 * words_scanned_period / 4; 3033 _refs_reached_limit = _real_refs_reached_limit - 3034 3 * refs_reached_period / 4; 3035 } 3036 3037 void CMTask::move_entries_to_global_stack() { 3038 // local array where we'll store the entries that will be popped 3039 // from the local queue 3040 oop buffer[global_stack_transfer_size]; 3041 3042 int n = 0; 3043 oop obj; 3044 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3045 buffer[n] = obj; 3046 ++n; 3047 } 3048 3049 if (n > 0) { 3050 // we popped at least one entry from the local queue 3051 3052 if (!_cm->mark_stack_push(buffer, n)) { 3053 set_has_aborted(); 3054 } 3055 } 3056 3057 // this operation was quite expensive, so decrease the limits 3058 decrease_limits(); 3059 } 3060 3061 void CMTask::get_entries_from_global_stack() { 3062 // local array where we'll store the entries that will be popped 3063 // from the global stack. 3064 oop buffer[global_stack_transfer_size]; 3065 int n; 3066 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3067 assert(n <= global_stack_transfer_size, 3068 "we should not pop more than the given limit"); 3069 if (n > 0) { 3070 // yes, we did actually pop at least one entry 3071 for (int i = 0; i < n; ++i) { 3072 bool success = _task_queue->push(buffer[i]); 3073 // We only call this when the local queue is empty or under a 3074 // given target limit. So, we do not expect this push to fail. 3075 assert(success, "invariant"); 3076 } 3077 } 3078 3079 // this operation was quite expensive, so decrease the limits 3080 decrease_limits(); 3081 } 3082 3083 void CMTask::drain_local_queue(bool partially) { 3084 if (has_aborted()) return; 3085 3086 // Decide what the target size is, depending whether we're going to 3087 // drain it partially (so that other tasks can steal if they run out 3088 // of things to do) or totally (at the very end). 3089 size_t target_size; 3090 if (partially) { 3091 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3092 } else { 3093 target_size = 0; 3094 } 3095 3096 if (_task_queue->size() > target_size) { 3097 oop obj; 3098 bool ret = _task_queue->pop_local(obj); 3099 while (ret) { 3100 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3101 assert(!_g1h->is_on_master_free_list( 3102 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3103 3104 scan_object(obj); 3105 3106 if (_task_queue->size() <= target_size || has_aborted()) { 3107 ret = false; 3108 } else { 3109 ret = _task_queue->pop_local(obj); 3110 } 3111 } 3112 } 3113 } 3114 3115 void CMTask::drain_global_stack(bool partially) { 3116 if (has_aborted()) return; 3117 3118 // We have a policy to drain the local queue before we attempt to 3119 // drain the global stack. 3120 assert(partially || _task_queue->size() == 0, "invariant"); 3121 3122 // Decide what the target size is, depending whether we're going to 3123 // drain it partially (so that other tasks can steal if they run out 3124 // of things to do) or totally (at the very end). Notice that, 3125 // because we move entries from the global stack in chunks or 3126 // because another task might be doing the same, we might in fact 3127 // drop below the target. But, this is not a problem. 3128 size_t target_size; 3129 if (partially) { 3130 target_size = _cm->partial_mark_stack_size_target(); 3131 } else { 3132 target_size = 0; 3133 } 3134 3135 if (_cm->mark_stack_size() > target_size) { 3136 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3137 get_entries_from_global_stack(); 3138 drain_local_queue(partially); 3139 } 3140 } 3141 } 3142 3143 // SATB Queue has several assumptions on whether to call the par or 3144 // non-par versions of the methods. this is why some of the code is 3145 // replicated. We should really get rid of the single-threaded version 3146 // of the code to simplify things. 3147 void CMTask::drain_satb_buffers() { 3148 if (has_aborted()) return; 3149 3150 // We set this so that the regular clock knows that we're in the 3151 // middle of draining buffers and doesn't set the abort flag when it 3152 // notices that SATB buffers are available for draining. It'd be 3153 // very counter productive if it did that. :-) 3154 _draining_satb_buffers = true; 3155 3156 CMSATBBufferClosure satb_cl(this, _g1h); 3157 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3158 3159 // This keeps claiming and applying the closure to completed buffers 3160 // until we run out of buffers or we need to abort. 3161 while (!has_aborted() && 3162 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3163 regular_clock_call(); 3164 } 3165 3166 _draining_satb_buffers = false; 3167 3168 assert(has_aborted() || 3169 concurrent() || 3170 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3171 3172 // again, this was a potentially expensive operation, decrease the 3173 // limits to get the regular clock call early 3174 decrease_limits(); 3175 } 3176 3177 void CMTask::print_stats() { 3178 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3179 _worker_id, _calls); 3180 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3181 _elapsed_time_ms, _termination_time_ms); 3182 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3183 _step_times_ms.num(), _step_times_ms.avg(), 3184 _step_times_ms.sd()); 3185 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3186 _step_times_ms.maximum(), _step_times_ms.sum()); 3187 } 3188 3189 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3190 return _task_queues->steal(worker_id, hash_seed, obj); 3191 } 3192 3193 /***************************************************************************** 3194 3195 The do_marking_step(time_target_ms, ...) method is the building 3196 block of the parallel marking framework. It can be called in parallel 3197 with other invocations of do_marking_step() on different tasks 3198 (but only one per task, obviously) and concurrently with the 3199 mutator threads, or during remark, hence it eliminates the need 3200 for two versions of the code. When called during remark, it will 3201 pick up from where the task left off during the concurrent marking 3202 phase. Interestingly, tasks are also claimable during evacuation 3203 pauses too, since do_marking_step() ensures that it aborts before 3204 it needs to yield. 3205 3206 The data structures that it uses to do marking work are the 3207 following: 3208 3209 (1) Marking Bitmap. If there are gray objects that appear only 3210 on the bitmap (this happens either when dealing with an overflow 3211 or when the initial marking phase has simply marked the roots 3212 and didn't push them on the stack), then tasks claim heap 3213 regions whose bitmap they then scan to find gray objects. A 3214 global finger indicates where the end of the last claimed region 3215 is. A local finger indicates how far into the region a task has 3216 scanned. The two fingers are used to determine how to gray an 3217 object (i.e. whether simply marking it is OK, as it will be 3218 visited by a task in the future, or whether it needs to be also 3219 pushed on a stack). 3220 3221 (2) Local Queue. The local queue of the task which is accessed 3222 reasonably efficiently by the task. Other tasks can steal from 3223 it when they run out of work. Throughout the marking phase, a 3224 task attempts to keep its local queue short but not totally 3225 empty, so that entries are available for stealing by other 3226 tasks. Only when there is no more work, a task will totally 3227 drain its local queue. 3228 3229 (3) Global Mark Stack. This handles local queue overflow. During 3230 marking only sets of entries are moved between it and the local 3231 queues, as access to it requires a mutex and more fine-grain 3232 interaction with it which might cause contention. If it 3233 overflows, then the marking phase should restart and iterate 3234 over the bitmap to identify gray objects. Throughout the marking 3235 phase, tasks attempt to keep the global mark stack at a small 3236 length but not totally empty, so that entries are available for 3237 popping by other tasks. Only when there is no more work, tasks 3238 will totally drain the global mark stack. 3239 3240 (4) SATB Buffer Queue. This is where completed SATB buffers are 3241 made available. Buffers are regularly removed from this queue 3242 and scanned for roots, so that the queue doesn't get too 3243 long. During remark, all completed buffers are processed, as 3244 well as the filled in parts of any uncompleted buffers. 3245 3246 The do_marking_step() method tries to abort when the time target 3247 has been reached. There are a few other cases when the 3248 do_marking_step() method also aborts: 3249 3250 (1) When the marking phase has been aborted (after a Full GC). 3251 3252 (2) When a global overflow (on the global stack) has been 3253 triggered. Before the task aborts, it will actually sync up with 3254 the other tasks to ensure that all the marking data structures 3255 (local queues, stacks, fingers etc.) are re-initialized so that 3256 when do_marking_step() completes, the marking phase can 3257 immediately restart. 3258 3259 (3) When enough completed SATB buffers are available. The 3260 do_marking_step() method only tries to drain SATB buffers right 3261 at the beginning. So, if enough buffers are available, the 3262 marking step aborts and the SATB buffers are processed at 3263 the beginning of the next invocation. 3264 3265 (4) To yield. when we have to yield then we abort and yield 3266 right at the end of do_marking_step(). This saves us from a lot 3267 of hassle as, by yielding we might allow a Full GC. If this 3268 happens then objects will be compacted underneath our feet, the 3269 heap might shrink, etc. We save checking for this by just 3270 aborting and doing the yield right at the end. 3271 3272 From the above it follows that the do_marking_step() method should 3273 be called in a loop (or, otherwise, regularly) until it completes. 3274 3275 If a marking step completes without its has_aborted() flag being 3276 true, it means it has completed the current marking phase (and 3277 also all other marking tasks have done so and have all synced up). 3278 3279 A method called regular_clock_call() is invoked "regularly" (in 3280 sub ms intervals) throughout marking. It is this clock method that 3281 checks all the abort conditions which were mentioned above and 3282 decides when the task should abort. A work-based scheme is used to 3283 trigger this clock method: when the number of object words the 3284 marking phase has scanned or the number of references the marking 3285 phase has visited reach a given limit. Additional invocations to 3286 the method clock have been planted in a few other strategic places 3287 too. The initial reason for the clock method was to avoid calling 3288 vtime too regularly, as it is quite expensive. So, once it was in 3289 place, it was natural to piggy-back all the other conditions on it 3290 too and not constantly check them throughout the code. 3291 3292 If do_termination is true then do_marking_step will enter its 3293 termination protocol. 3294 3295 The value of is_serial must be true when do_marking_step is being 3296 called serially (i.e. by the VMThread) and do_marking_step should 3297 skip any synchronization in the termination and overflow code. 3298 Examples include the serial remark code and the serial reference 3299 processing closures. 3300 3301 The value of is_serial must be false when do_marking_step is 3302 being called by any of the worker threads in a work gang. 3303 Examples include the concurrent marking code (CMMarkingTask), 3304 the MT remark code, and the MT reference processing closures. 3305 3306 *****************************************************************************/ 3307 3308 void CMTask::do_marking_step(double time_target_ms, 3309 bool do_termination, 3310 bool is_serial) { 3311 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3312 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3313 3314 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3315 assert(_task_queues != NULL, "invariant"); 3316 assert(_task_queue != NULL, "invariant"); 3317 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3318 3319 assert(!_claimed, 3320 "only one thread should claim this task at any one time"); 3321 3322 // OK, this doesn't safeguard again all possible scenarios, as it is 3323 // possible for two threads to set the _claimed flag at the same 3324 // time. But it is only for debugging purposes anyway and it will 3325 // catch most problems. 3326 _claimed = true; 3327 3328 _start_time_ms = os::elapsedVTime() * 1000.0; 3329 3330 // If do_stealing is true then do_marking_step will attempt to 3331 // steal work from the other CMTasks. It only makes sense to 3332 // enable stealing when the termination protocol is enabled 3333 // and do_marking_step() is not being called serially. 3334 bool do_stealing = do_termination && !is_serial; 3335 3336 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3337 _time_target_ms = time_target_ms - diff_prediction_ms; 3338 3339 // set up the variables that are used in the work-based scheme to 3340 // call the regular clock method 3341 _words_scanned = 0; 3342 _refs_reached = 0; 3343 recalculate_limits(); 3344 3345 // clear all flags 3346 clear_has_aborted(); 3347 _has_timed_out = false; 3348 _draining_satb_buffers = false; 3349 3350 ++_calls; 3351 3352 // Set up the bitmap and oop closures. Anything that uses them is 3353 // eventually called from this method, so it is OK to allocate these 3354 // statically. 3355 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3356 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3357 set_cm_oop_closure(&cm_oop_closure); 3358 3359 if (_cm->has_overflown()) { 3360 // This can happen if the mark stack overflows during a GC pause 3361 // and this task, after a yield point, restarts. We have to abort 3362 // as we need to get into the overflow protocol which happens 3363 // right at the end of this task. 3364 set_has_aborted(); 3365 } 3366 3367 // First drain any available SATB buffers. After this, we will not 3368 // look at SATB buffers before the next invocation of this method. 3369 // If enough completed SATB buffers are queued up, the regular clock 3370 // will abort this task so that it restarts. 3371 drain_satb_buffers(); 3372 // ...then partially drain the local queue and the global stack 3373 drain_local_queue(true); 3374 drain_global_stack(true); 3375 3376 do { 3377 if (!has_aborted() && _curr_region != NULL) { 3378 // This means that we're already holding on to a region. 3379 assert(_finger != NULL, "if region is not NULL, then the finger " 3380 "should not be NULL either"); 3381 3382 // We might have restarted this task after an evacuation pause 3383 // which might have evacuated the region we're holding on to 3384 // underneath our feet. Let's read its limit again to make sure 3385 // that we do not iterate over a region of the heap that 3386 // contains garbage (update_region_limit() will also move 3387 // _finger to the start of the region if it is found empty). 3388 update_region_limit(); 3389 // We will start from _finger not from the start of the region, 3390 // as we might be restarting this task after aborting half-way 3391 // through scanning this region. In this case, _finger points to 3392 // the address where we last found a marked object. If this is a 3393 // fresh region, _finger points to start(). 3394 MemRegion mr = MemRegion(_finger, _region_limit); 3395 3396 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3397 "humongous regions should go around loop once only"); 3398 3399 // Some special cases: 3400 // If the memory region is empty, we can just give up the region. 3401 // If the current region is humongous then we only need to check 3402 // the bitmap for the bit associated with the start of the object, 3403 // scan the object if it's live, and give up the region. 3404 // Otherwise, let's iterate over the bitmap of the part of the region 3405 // that is left. 3406 // If the iteration is successful, give up the region. 3407 if (mr.is_empty()) { 3408 giveup_current_region(); 3409 regular_clock_call(); 3410 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3411 if (_nextMarkBitMap->isMarked(mr.start())) { 3412 // The object is marked - apply the closure 3413 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3414 bitmap_closure.do_bit(offset); 3415 } 3416 // Even if this task aborted while scanning the humongous object 3417 // we can (and should) give up the current region. 3418 giveup_current_region(); 3419 regular_clock_call(); 3420 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3421 giveup_current_region(); 3422 regular_clock_call(); 3423 } else { 3424 assert(has_aborted(), "currently the only way to do so"); 3425 // The only way to abort the bitmap iteration is to return 3426 // false from the do_bit() method. However, inside the 3427 // do_bit() method we move the _finger to point to the 3428 // object currently being looked at. So, if we bail out, we 3429 // have definitely set _finger to something non-null. 3430 assert(_finger != NULL, "invariant"); 3431 3432 // Region iteration was actually aborted. So now _finger 3433 // points to the address of the object we last scanned. If we 3434 // leave it there, when we restart this task, we will rescan 3435 // the object. It is easy to avoid this. We move the finger by 3436 // enough to point to the next possible object header (the 3437 // bitmap knows by how much we need to move it as it knows its 3438 // granularity). 3439 assert(_finger < _region_limit, "invariant"); 3440 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3441 // Check if bitmap iteration was aborted while scanning the last object 3442 if (new_finger >= _region_limit) { 3443 giveup_current_region(); 3444 } else { 3445 move_finger_to(new_finger); 3446 } 3447 } 3448 } 3449 // At this point we have either completed iterating over the 3450 // region we were holding on to, or we have aborted. 3451 3452 // We then partially drain the local queue and the global stack. 3453 // (Do we really need this?) 3454 drain_local_queue(true); 3455 drain_global_stack(true); 3456 3457 // Read the note on the claim_region() method on why it might 3458 // return NULL with potentially more regions available for 3459 // claiming and why we have to check out_of_regions() to determine 3460 // whether we're done or not. 3461 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3462 // We are going to try to claim a new region. We should have 3463 // given up on the previous one. 3464 // Separated the asserts so that we know which one fires. 3465 assert(_curr_region == NULL, "invariant"); 3466 assert(_finger == NULL, "invariant"); 3467 assert(_region_limit == NULL, "invariant"); 3468 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3469 if (claimed_region != NULL) { 3470 // Yes, we managed to claim one 3471 setup_for_region(claimed_region); 3472 assert(_curr_region == claimed_region, "invariant"); 3473 } 3474 // It is important to call the regular clock here. It might take 3475 // a while to claim a region if, for example, we hit a large 3476 // block of empty regions. So we need to call the regular clock 3477 // method once round the loop to make sure it's called 3478 // frequently enough. 3479 regular_clock_call(); 3480 } 3481 3482 if (!has_aborted() && _curr_region == NULL) { 3483 assert(_cm->out_of_regions(), 3484 "at this point we should be out of regions"); 3485 } 3486 } while ( _curr_region != NULL && !has_aborted()); 3487 3488 if (!has_aborted()) { 3489 // We cannot check whether the global stack is empty, since other 3490 // tasks might be pushing objects to it concurrently. 3491 assert(_cm->out_of_regions(), 3492 "at this point we should be out of regions"); 3493 // Try to reduce the number of available SATB buffers so that 3494 // remark has less work to do. 3495 drain_satb_buffers(); 3496 } 3497 3498 // Since we've done everything else, we can now totally drain the 3499 // local queue and global stack. 3500 drain_local_queue(false); 3501 drain_global_stack(false); 3502 3503 // Attempt at work stealing from other task's queues. 3504 if (do_stealing && !has_aborted()) { 3505 // We have not aborted. This means that we have finished all that 3506 // we could. Let's try to do some stealing... 3507 3508 // We cannot check whether the global stack is empty, since other 3509 // tasks might be pushing objects to it concurrently. 3510 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3511 "only way to reach here"); 3512 while (!has_aborted()) { 3513 oop obj; 3514 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3515 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3516 "any stolen object should be marked"); 3517 scan_object(obj); 3518 3519 // And since we're towards the end, let's totally drain the 3520 // local queue and global stack. 3521 drain_local_queue(false); 3522 drain_global_stack(false); 3523 } else { 3524 break; 3525 } 3526 } 3527 } 3528 3529 // If we are about to wrap up and go into termination, check if we 3530 // should raise the overflow flag. 3531 if (do_termination && !has_aborted()) { 3532 if (_cm->force_overflow()->should_force()) { 3533 _cm->set_has_overflown(); 3534 regular_clock_call(); 3535 } 3536 } 3537 3538 // We still haven't aborted. Now, let's try to get into the 3539 // termination protocol. 3540 if (do_termination && !has_aborted()) { 3541 // We cannot check whether the global stack is empty, since other 3542 // tasks might be concurrently pushing objects on it. 3543 // Separated the asserts so that we know which one fires. 3544 assert(_cm->out_of_regions(), "only way to reach here"); 3545 assert(_task_queue->size() == 0, "only way to reach here"); 3546 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3547 3548 // The CMTask class also extends the TerminatorTerminator class, 3549 // hence its should_exit_termination() method will also decide 3550 // whether to exit the termination protocol or not. 3551 bool finished = (is_serial || 3552 _cm->terminator()->offer_termination(this)); 3553 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3554 _termination_time_ms += 3555 termination_end_time_ms - _termination_start_time_ms; 3556 3557 if (finished) { 3558 // We're all done. 3559 3560 if (_worker_id == 0) { 3561 // let's allow task 0 to do this 3562 if (concurrent()) { 3563 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3564 // we need to set this to false before the next 3565 // safepoint. This way we ensure that the marking phase 3566 // doesn't observe any more heap expansions. 3567 _cm->clear_concurrent_marking_in_progress(); 3568 } 3569 } 3570 3571 // We can now guarantee that the global stack is empty, since 3572 // all other tasks have finished. We separated the guarantees so 3573 // that, if a condition is false, we can immediately find out 3574 // which one. 3575 guarantee(_cm->out_of_regions(), "only way to reach here"); 3576 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3577 guarantee(_task_queue->size() == 0, "only way to reach here"); 3578 guarantee(!_cm->has_overflown(), "only way to reach here"); 3579 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3580 } else { 3581 // Apparently there's more work to do. Let's abort this task. It 3582 // will restart it and we can hopefully find more things to do. 3583 set_has_aborted(); 3584 } 3585 } 3586 3587 // Mainly for debugging purposes to make sure that a pointer to the 3588 // closure which was statically allocated in this frame doesn't 3589 // escape it by accident. 3590 set_cm_oop_closure(NULL); 3591 double end_time_ms = os::elapsedVTime() * 1000.0; 3592 double elapsed_time_ms = end_time_ms - _start_time_ms; 3593 // Update the step history. 3594 _step_times_ms.add(elapsed_time_ms); 3595 3596 if (has_aborted()) { 3597 // The task was aborted for some reason. 3598 if (_has_timed_out) { 3599 double diff_ms = elapsed_time_ms - _time_target_ms; 3600 // Keep statistics of how well we did with respect to hitting 3601 // our target only if we actually timed out (if we aborted for 3602 // other reasons, then the results might get skewed). 3603 _marking_step_diffs_ms.add(diff_ms); 3604 } 3605 3606 if (_cm->has_overflown()) { 3607 // This is the interesting one. We aborted because a global 3608 // overflow was raised. This means we have to restart the 3609 // marking phase and start iterating over regions. However, in 3610 // order to do this we have to make sure that all tasks stop 3611 // what they are doing and re-initialize in a safe manner. We 3612 // will achieve this with the use of two barrier sync points. 3613 3614 if (!is_serial) { 3615 // We only need to enter the sync barrier if being called 3616 // from a parallel context 3617 _cm->enter_first_sync_barrier(_worker_id); 3618 3619 // When we exit this sync barrier we know that all tasks have 3620 // stopped doing marking work. So, it's now safe to 3621 // re-initialize our data structures. At the end of this method, 3622 // task 0 will clear the global data structures. 3623 } 3624 3625 // We clear the local state of this task... 3626 clear_region_fields(); 3627 3628 if (!is_serial) { 3629 // ...and enter the second barrier. 3630 _cm->enter_second_sync_barrier(_worker_id); 3631 } 3632 // At this point, if we're during the concurrent phase of 3633 // marking, everything has been re-initialized and we're 3634 // ready to restart. 3635 } 3636 } 3637 3638 _claimed = false; 3639 } 3640 3641 CMTask::CMTask(uint worker_id, 3642 ConcurrentMark* cm, 3643 size_t* marked_bytes, 3644 BitMap* card_bm, 3645 CMTaskQueue* task_queue, 3646 CMTaskQueueSet* task_queues) 3647 : _g1h(G1CollectedHeap::heap()), 3648 _worker_id(worker_id), _cm(cm), 3649 _claimed(false), 3650 _nextMarkBitMap(NULL), _hash_seed(17), 3651 _task_queue(task_queue), 3652 _task_queues(task_queues), 3653 _cm_oop_closure(NULL), 3654 _marked_bytes_array(marked_bytes), 3655 _card_bm(card_bm) { 3656 guarantee(task_queue != NULL, "invariant"); 3657 guarantee(task_queues != NULL, "invariant"); 3658 3659 _marking_step_diffs_ms.add(0.5); 3660 } 3661 3662 // These are formatting macros that are used below to ensure 3663 // consistent formatting. The *_H_* versions are used to format the 3664 // header for a particular value and they should be kept consistent 3665 // with the corresponding macro. Also note that most of the macros add 3666 // the necessary white space (as a prefix) which makes them a bit 3667 // easier to compose. 3668 3669 // All the output lines are prefixed with this string to be able to 3670 // identify them easily in a large log file. 3671 #define G1PPRL_LINE_PREFIX "###" 3672 3673 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3674 #ifdef _LP64 3675 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3676 #else // _LP64 3677 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3678 #endif // _LP64 3679 3680 // For per-region info 3681 #define G1PPRL_TYPE_FORMAT " %-4s" 3682 #define G1PPRL_TYPE_H_FORMAT " %4s" 3683 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3684 #define G1PPRL_BYTE_H_FORMAT " %9s" 3685 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3686 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3687 3688 // For summary info 3689 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3690 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3691 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3692 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3693 3694 G1PrintRegionLivenessInfoClosure:: 3695 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 3696 : _out(out), 3697 _total_used_bytes(0), _total_capacity_bytes(0), 3698 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3699 _hum_used_bytes(0), _hum_capacity_bytes(0), 3700 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3701 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3702 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3703 MemRegion g1_reserved = g1h->g1_reserved(); 3704 double now = os::elapsedTime(); 3705 3706 // Print the header of the output. 3707 _out->cr(); 3708 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3709 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 3710 G1PPRL_SUM_ADDR_FORMAT("reserved") 3711 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3712 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3713 HeapRegion::GrainBytes); 3714 _out->print_cr(G1PPRL_LINE_PREFIX); 3715 _out->print_cr(G1PPRL_LINE_PREFIX 3716 G1PPRL_TYPE_H_FORMAT 3717 G1PPRL_ADDR_BASE_H_FORMAT 3718 G1PPRL_BYTE_H_FORMAT 3719 G1PPRL_BYTE_H_FORMAT 3720 G1PPRL_BYTE_H_FORMAT 3721 G1PPRL_DOUBLE_H_FORMAT 3722 G1PPRL_BYTE_H_FORMAT 3723 G1PPRL_BYTE_H_FORMAT, 3724 "type", "address-range", 3725 "used", "prev-live", "next-live", "gc-eff", 3726 "remset", "code-roots"); 3727 _out->print_cr(G1PPRL_LINE_PREFIX 3728 G1PPRL_TYPE_H_FORMAT 3729 G1PPRL_ADDR_BASE_H_FORMAT 3730 G1PPRL_BYTE_H_FORMAT 3731 G1PPRL_BYTE_H_FORMAT 3732 G1PPRL_BYTE_H_FORMAT 3733 G1PPRL_DOUBLE_H_FORMAT 3734 G1PPRL_BYTE_H_FORMAT 3735 G1PPRL_BYTE_H_FORMAT, 3736 "", "", 3737 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3738 "(bytes)", "(bytes)"); 3739 } 3740 3741 // It takes as a parameter a reference to one of the _hum_* fields, it 3742 // deduces the corresponding value for a region in a humongous region 3743 // series (either the region size, or what's left if the _hum_* field 3744 // is < the region size), and updates the _hum_* field accordingly. 3745 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3746 size_t bytes = 0; 3747 // The > 0 check is to deal with the prev and next live bytes which 3748 // could be 0. 3749 if (*hum_bytes > 0) { 3750 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3751 *hum_bytes -= bytes; 3752 } 3753 return bytes; 3754 } 3755 3756 // It deduces the values for a region in a humongous region series 3757 // from the _hum_* fields and updates those accordingly. It assumes 3758 // that that _hum_* fields have already been set up from the "starts 3759 // humongous" region and we visit the regions in address order. 3760 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3761 size_t* capacity_bytes, 3762 size_t* prev_live_bytes, 3763 size_t* next_live_bytes) { 3764 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3765 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3766 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3767 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3768 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3769 } 3770 3771 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3772 const char* type = r->get_type_str(); 3773 HeapWord* bottom = r->bottom(); 3774 HeapWord* end = r->end(); 3775 size_t capacity_bytes = r->capacity(); 3776 size_t used_bytes = r->used(); 3777 size_t prev_live_bytes = r->live_bytes(); 3778 size_t next_live_bytes = r->next_live_bytes(); 3779 double gc_eff = r->gc_efficiency(); 3780 size_t remset_bytes = r->rem_set()->mem_size(); 3781 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3782 3783 if (r->is_starts_humongous()) { 3784 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3785 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3786 "they should have been zeroed after the last time we used them"); 3787 // Set up the _hum_* fields. 3788 _hum_capacity_bytes = capacity_bytes; 3789 _hum_used_bytes = used_bytes; 3790 _hum_prev_live_bytes = prev_live_bytes; 3791 _hum_next_live_bytes = next_live_bytes; 3792 get_hum_bytes(&used_bytes, &capacity_bytes, 3793 &prev_live_bytes, &next_live_bytes); 3794 end = bottom + HeapRegion::GrainWords; 3795 } else if (r->is_continues_humongous()) { 3796 get_hum_bytes(&used_bytes, &capacity_bytes, 3797 &prev_live_bytes, &next_live_bytes); 3798 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3799 } 3800 3801 _total_used_bytes += used_bytes; 3802 _total_capacity_bytes += capacity_bytes; 3803 _total_prev_live_bytes += prev_live_bytes; 3804 _total_next_live_bytes += next_live_bytes; 3805 _total_remset_bytes += remset_bytes; 3806 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3807 3808 // Print a line for this particular region. 3809 _out->print_cr(G1PPRL_LINE_PREFIX 3810 G1PPRL_TYPE_FORMAT 3811 G1PPRL_ADDR_BASE_FORMAT 3812 G1PPRL_BYTE_FORMAT 3813 G1PPRL_BYTE_FORMAT 3814 G1PPRL_BYTE_FORMAT 3815 G1PPRL_DOUBLE_FORMAT 3816 G1PPRL_BYTE_FORMAT 3817 G1PPRL_BYTE_FORMAT, 3818 type, p2i(bottom), p2i(end), 3819 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3820 remset_bytes, strong_code_roots_bytes); 3821 3822 return false; 3823 } 3824 3825 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3826 // add static memory usages to remembered set sizes 3827 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3828 // Print the footer of the output. 3829 _out->print_cr(G1PPRL_LINE_PREFIX); 3830 _out->print_cr(G1PPRL_LINE_PREFIX 3831 " SUMMARY" 3832 G1PPRL_SUM_MB_FORMAT("capacity") 3833 G1PPRL_SUM_MB_PERC_FORMAT("used") 3834 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3835 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3836 G1PPRL_SUM_MB_FORMAT("remset") 3837 G1PPRL_SUM_MB_FORMAT("code-roots"), 3838 bytes_to_mb(_total_capacity_bytes), 3839 bytes_to_mb(_total_used_bytes), 3840 perc(_total_used_bytes, _total_capacity_bytes), 3841 bytes_to_mb(_total_prev_live_bytes), 3842 perc(_total_prev_live_bytes, _total_capacity_bytes), 3843 bytes_to_mb(_total_next_live_bytes), 3844 perc(_total_next_live_bytes, _total_capacity_bytes), 3845 bytes_to_mb(_total_remset_bytes), 3846 bytes_to_mb(_total_strong_code_roots_bytes)); 3847 _out->cr(); 3848 }