1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1ErgoVerbose.hpp" 35 #include "gc/g1/g1Log.hpp" 36 #include "gc/g1/g1OopClosures.inline.hpp" 37 #include "gc/g1/g1RemSet.hpp" 38 #include "gc/g1/g1StringDedup.hpp" 39 #include "gc/g1/heapRegion.inline.hpp" 40 #include "gc/g1/heapRegionManager.inline.hpp" 41 #include "gc/g1/heapRegionRemSet.hpp" 42 #include "gc/g1/heapRegionSet.inline.hpp" 43 #include "gc/g1/suspendibleThreadSet.hpp" 44 #include "gc/shared/gcId.hpp" 45 #include "gc/shared/gcTimer.hpp" 46 #include "gc/shared/gcTrace.hpp" 47 #include "gc/shared/gcTraceTime.hpp" 48 #include "gc/shared/genOopClosures.inline.hpp" 49 #include "gc/shared/referencePolicy.hpp" 50 #include "gc/shared/strongRootsScope.hpp" 51 #include "gc/shared/taskqueue.inline.hpp" 52 #include "gc/shared/vmGCOperations.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.inline.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 CMBitMapRO::CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 if (limit == NULL) { 78 limit = _bmStartWord + _bmWordSize; 79 } 80 size_t limitOffset = heapWordToOffset(limit); 81 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 83 assert(nextAddr >= addr, "get_next_one postcondition"); 84 assert(nextAddr == limit || isMarked(nextAddr), 85 "get_next_one postcondition"); 86 return nextAddr; 87 } 88 89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr, 90 const HeapWord* limit) const { 91 size_t addrOffset = heapWordToOffset(addr); 92 if (limit == NULL) { 93 limit = _bmStartWord + _bmWordSize; 94 } 95 size_t limitOffset = heapWordToOffset(limit); 96 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 97 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 98 assert(nextAddr >= addr, "get_next_one postcondition"); 99 assert(nextAddr == limit || !isMarked(nextAddr), 100 "get_next_one postcondition"); 101 return nextAddr; 102 } 103 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 105 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 106 return (int) (diff >> _shifter); 107 } 108 109 #ifndef PRODUCT 110 bool CMBitMapRO::covers(MemRegion heap_rs) const { 111 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 112 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 113 "size inconsistency"); 114 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 115 _bmWordSize == heap_rs.word_size(); 116 } 117 #endif 118 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 120 _bm.print_on_error(st, prefix); 121 } 122 123 size_t CMBitMap::compute_size(size_t heap_size) { 124 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 125 } 126 127 size_t CMBitMap::mark_distance() { 128 return MinObjAlignmentInBytes * BitsPerByte; 129 } 130 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 132 _bmStartWord = heap.start(); 133 _bmWordSize = heap.word_size(); 134 135 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 136 _bm.set_size(_bmWordSize >> _shifter); 137 138 storage->set_mapping_changed_listener(&_listener); 139 } 140 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 142 if (zero_filled) { 143 return; 144 } 145 // We need to clear the bitmap on commit, removing any existing information. 146 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 147 _bm->clearRange(mr); 148 } 149 150 // Closure used for clearing the given mark bitmap. 151 class ClearBitmapHRClosure : public HeapRegionClosure { 152 private: 153 ConcurrentMark* _cm; 154 CMBitMap* _bitmap; 155 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 156 public: 157 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 158 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 159 } 160 161 virtual bool doHeapRegion(HeapRegion* r) { 162 size_t const chunk_size_in_words = M / HeapWordSize; 163 164 HeapWord* cur = r->bottom(); 165 HeapWord* const end = r->end(); 166 167 while (cur < end) { 168 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 169 _bitmap->clearRange(mr); 170 171 cur += chunk_size_in_words; 172 173 // Abort iteration if after yielding the marking has been aborted. 174 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 175 return true; 176 } 177 // Repeat the asserts from before the start of the closure. We will do them 178 // as asserts here to minimize their overhead on the product. However, we 179 // will have them as guarantees at the beginning / end of the bitmap 180 // clearing to get some checking in the product. 181 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 182 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 183 } 184 185 return false; 186 } 187 }; 188 189 class ParClearNextMarkBitmapTask : public AbstractGangTask { 190 ClearBitmapHRClosure* _cl; 191 HeapRegionClaimer _hrclaimer; 192 bool _suspendible; // If the task is suspendible, workers must join the STS. 193 194 public: 195 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 196 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 197 198 void work(uint worker_id) { 199 SuspendibleThreadSetJoiner sts_join(_suspendible); 200 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 201 } 202 }; 203 204 void CMBitMap::clearAll() { 205 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 206 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 207 uint n_workers = g1h->workers()->active_workers(); 208 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 209 g1h->workers()->run_task(&task); 210 guarantee(cl.complete(), "Must have completed iteration."); 211 return; 212 } 213 214 void CMBitMap::markRange(MemRegion mr) { 215 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 216 assert(!mr.is_empty(), "unexpected empty region"); 217 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 218 ((HeapWord *) mr.end())), 219 "markRange memory region end is not card aligned"); 220 // convert address range into offset range 221 _bm.at_put_range(heapWordToOffset(mr.start()), 222 heapWordToOffset(mr.end()), true); 223 } 224 225 void CMBitMap::clearRange(MemRegion mr) { 226 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 227 assert(!mr.is_empty(), "unexpected empty region"); 228 // convert address range into offset range 229 _bm.at_put_range(heapWordToOffset(mr.start()), 230 heapWordToOffset(mr.end()), false); 231 } 232 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 234 HeapWord* end_addr) { 235 HeapWord* start = getNextMarkedWordAddress(addr); 236 start = MIN2(start, end_addr); 237 HeapWord* end = getNextUnmarkedWordAddress(start); 238 end = MIN2(end, end_addr); 239 assert(start <= end, "Consistency check"); 240 MemRegion mr(start, end); 241 if (!mr.is_empty()) { 242 clearRange(mr); 243 } 244 return mr; 245 } 246 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 248 _base(NULL), _cm(cm) 249 {} 250 251 bool CMMarkStack::allocate(size_t capacity) { 252 // allocate a stack of the requisite depth 253 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 254 if (!rs.is_reserved()) { 255 warning("ConcurrentMark MarkStack allocation failure"); 256 return false; 257 } 258 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 259 if (!_virtual_space.initialize(rs, rs.size())) { 260 warning("ConcurrentMark MarkStack backing store failure"); 261 // Release the virtual memory reserved for the marking stack 262 rs.release(); 263 return false; 264 } 265 assert(_virtual_space.committed_size() == rs.size(), 266 "Didn't reserve backing store for all of ConcurrentMark stack?"); 267 _base = (oop*) _virtual_space.low(); 268 setEmpty(); 269 _capacity = (jint) capacity; 270 _saved_index = -1; 271 _should_expand = false; 272 return true; 273 } 274 275 void CMMarkStack::expand() { 276 // Called, during remark, if we've overflown the marking stack during marking. 277 assert(isEmpty(), "stack should been emptied while handling overflow"); 278 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 279 // Clear expansion flag 280 _should_expand = false; 281 if (_capacity == (jint) MarkStackSizeMax) { 282 if (PrintGCDetails && Verbose) { 283 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit"); 284 } 285 return; 286 } 287 // Double capacity if possible 288 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 289 // Do not give up existing stack until we have managed to 290 // get the double capacity that we desired. 291 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 292 sizeof(oop))); 293 if (rs.is_reserved()) { 294 // Release the backing store associated with old stack 295 _virtual_space.release(); 296 // Reinitialize virtual space for new stack 297 if (!_virtual_space.initialize(rs, rs.size())) { 298 fatal("Not enough swap for expanded marking stack capacity"); 299 } 300 _base = (oop*)(_virtual_space.low()); 301 _index = 0; 302 _capacity = new_capacity; 303 } else { 304 if (PrintGCDetails && Verbose) { 305 // Failed to double capacity, continue; 306 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from " 307 SIZE_FORMAT "K to " SIZE_FORMAT "K", 308 _capacity / K, new_capacity / K); 309 } 310 } 311 } 312 313 void CMMarkStack::set_should_expand() { 314 // If we're resetting the marking state because of an 315 // marking stack overflow, record that we should, if 316 // possible, expand the stack. 317 _should_expand = _cm->has_overflown(); 318 } 319 320 CMMarkStack::~CMMarkStack() { 321 if (_base != NULL) { 322 _base = NULL; 323 _virtual_space.release(); 324 } 325 } 326 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 328 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 329 jint start = _index; 330 jint next_index = start + n; 331 if (next_index > _capacity) { 332 _overflow = true; 333 return; 334 } 335 // Otherwise. 336 _index = next_index; 337 for (int i = 0; i < n; i++) { 338 int ind = start + i; 339 assert(ind < _capacity, "By overflow test above."); 340 _base[ind] = ptr_arr[i]; 341 } 342 } 343 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 345 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 346 jint index = _index; 347 if (index == 0) { 348 *n = 0; 349 return false; 350 } else { 351 int k = MIN2(max, index); 352 jint new_ind = index - k; 353 for (int j = 0; j < k; j++) { 354 ptr_arr[j] = _base[new_ind + j]; 355 } 356 _index = new_ind; 357 *n = k; 358 return true; 359 } 360 } 361 362 void CMMarkStack::note_start_of_gc() { 363 assert(_saved_index == -1, 364 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 365 _saved_index = _index; 366 } 367 368 void CMMarkStack::note_end_of_gc() { 369 // This is intentionally a guarantee, instead of an assert. If we 370 // accidentally add something to the mark stack during GC, it 371 // will be a correctness issue so it's better if we crash. we'll 372 // only check this once per GC anyway, so it won't be a performance 373 // issue in any way. 374 guarantee(_saved_index == _index, 375 "saved index: %d index: %d", _saved_index, _index); 376 _saved_index = -1; 377 } 378 379 CMRootRegions::CMRootRegions() : 380 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 381 _should_abort(false), _next_survivor(NULL) { } 382 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 384 _young_list = g1h->young_list(); 385 _cm = cm; 386 } 387 388 void CMRootRegions::prepare_for_scan() { 389 assert(!scan_in_progress(), "pre-condition"); 390 391 // Currently, only survivors can be root regions. 392 assert(_next_survivor == NULL, "pre-condition"); 393 _next_survivor = _young_list->first_survivor_region(); 394 _scan_in_progress = (_next_survivor != NULL); 395 _should_abort = false; 396 } 397 398 HeapRegion* CMRootRegions::claim_next() { 399 if (_should_abort) { 400 // If someone has set the should_abort flag, we return NULL to 401 // force the caller to bail out of their loop. 402 return NULL; 403 } 404 405 // Currently, only survivors can be root regions. 406 HeapRegion* res = _next_survivor; 407 if (res != NULL) { 408 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 409 // Read it again in case it changed while we were waiting for the lock. 410 res = _next_survivor; 411 if (res != NULL) { 412 if (res == _young_list->last_survivor_region()) { 413 // We just claimed the last survivor so store NULL to indicate 414 // that we're done. 415 _next_survivor = NULL; 416 } else { 417 _next_survivor = res->get_next_young_region(); 418 } 419 } else { 420 // Someone else claimed the last survivor while we were trying 421 // to take the lock so nothing else to do. 422 } 423 } 424 assert(res == NULL || res->is_survivor(), "post-condition"); 425 426 return res; 427 } 428 429 void CMRootRegions::scan_finished() { 430 assert(scan_in_progress(), "pre-condition"); 431 432 // Currently, only survivors can be root regions. 433 if (!_should_abort) { 434 assert(_next_survivor == NULL, "we should have claimed all survivors"); 435 } 436 _next_survivor = NULL; 437 438 { 439 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 440 _scan_in_progress = false; 441 RootRegionScan_lock->notify_all(); 442 } 443 } 444 445 bool CMRootRegions::wait_until_scan_finished() { 446 if (!scan_in_progress()) return false; 447 448 { 449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 450 while (scan_in_progress()) { 451 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 452 } 453 } 454 return true; 455 } 456 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 459 #endif // _MSC_VER 460 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 462 return MAX2((n_par_threads + 2) / 4, 1U); 463 } 464 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 466 _g1h(g1h), 467 _markBitMap1(), 468 _markBitMap2(), 469 _parallel_marking_threads(0), 470 _max_parallel_marking_threads(0), 471 _sleep_factor(0.0), 472 _marking_task_overhead(1.0), 473 _cleanup_sleep_factor(0.0), 474 _cleanup_task_overhead(1.0), 475 _cleanup_list("Cleanup List"), 476 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 477 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 478 CardTableModRefBS::card_shift, 479 false /* in_resource_area*/), 480 481 _prevMarkBitMap(&_markBitMap1), 482 _nextMarkBitMap(&_markBitMap2), 483 484 _markStack(this), 485 // _finger set in set_non_marking_state 486 487 _max_worker_id(ParallelGCThreads), 488 // _active_tasks set in set_non_marking_state 489 // _tasks set inside the constructor 490 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 491 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 492 493 _has_overflown(false), 494 _concurrent(false), 495 _has_aborted(false), 496 _restart_for_overflow(false), 497 _concurrent_marking_in_progress(false), 498 499 // _verbose_level set below 500 501 _init_times(), 502 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 503 _cleanup_times(), 504 _total_counting_time(0.0), 505 _total_rs_scrub_time(0.0), 506 507 _parallel_workers(NULL), 508 509 _count_card_bitmaps(NULL), 510 _count_marked_bytes(NULL), 511 _completed_initialization(false) { 512 513 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 514 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 515 516 // Create & start a ConcurrentMark thread. 517 _cmThread = new ConcurrentMarkThread(this); 518 assert(cmThread() != NULL, "CM Thread should have been created"); 519 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 520 if (_cmThread->osthread() == NULL) { 521 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 522 } 523 524 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 525 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 526 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 527 528 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 529 satb_qs.set_buffer_size(G1SATBBufferSize); 530 531 _root_regions.init(_g1h, this); 532 533 if (ConcGCThreads > ParallelGCThreads) { 534 warning("Can't have more ConcGCThreads (%u) " 535 "than ParallelGCThreads (%u).", 536 ConcGCThreads, ParallelGCThreads); 537 return; 538 } 539 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 540 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 541 // if both are set 542 _sleep_factor = 0.0; 543 _marking_task_overhead = 1.0; 544 } else if (G1MarkingOverheadPercent > 0) { 545 // We will calculate the number of parallel marking threads based 546 // on a target overhead with respect to the soft real-time goal 547 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 548 double overall_cm_overhead = 549 (double) MaxGCPauseMillis * marking_overhead / 550 (double) GCPauseIntervalMillis; 551 double cpu_ratio = 1.0 / (double) os::processor_count(); 552 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 553 double marking_task_overhead = 554 overall_cm_overhead / marking_thread_num * 555 (double) os::processor_count(); 556 double sleep_factor = 557 (1.0 - marking_task_overhead) / marking_task_overhead; 558 559 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 560 _sleep_factor = sleep_factor; 561 _marking_task_overhead = marking_task_overhead; 562 } else { 563 // Calculate the number of parallel marking threads by scaling 564 // the number of parallel GC threads. 565 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 566 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 567 _sleep_factor = 0.0; 568 _marking_task_overhead = 1.0; 569 } 570 571 assert(ConcGCThreads > 0, "Should have been set"); 572 _parallel_marking_threads = ConcGCThreads; 573 _max_parallel_marking_threads = _parallel_marking_threads; 574 575 if (parallel_marking_threads() > 1) { 576 _cleanup_task_overhead = 1.0; 577 } else { 578 _cleanup_task_overhead = marking_task_overhead(); 579 } 580 _cleanup_sleep_factor = 581 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 582 583 #if 0 584 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 585 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 586 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 587 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 588 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 589 #endif 590 591 _parallel_workers = new WorkGang("G1 Marker", 592 _max_parallel_marking_threads, false, true); 593 if (_parallel_workers == NULL) { 594 vm_exit_during_initialization("Failed necessary allocation."); 595 } else { 596 _parallel_workers->initialize_workers(); 597 } 598 599 if (FLAG_IS_DEFAULT(MarkStackSize)) { 600 size_t mark_stack_size = 601 MIN2(MarkStackSizeMax, 602 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 603 // Verify that the calculated value for MarkStackSize is in range. 604 // It would be nice to use the private utility routine from Arguments. 605 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 606 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 607 "must be between 1 and " SIZE_FORMAT, 608 mark_stack_size, MarkStackSizeMax); 609 return; 610 } 611 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 612 } else { 613 // Verify MarkStackSize is in range. 614 if (FLAG_IS_CMDLINE(MarkStackSize)) { 615 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 616 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 617 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 618 "must be between 1 and " SIZE_FORMAT, 619 MarkStackSize, MarkStackSizeMax); 620 return; 621 } 622 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 623 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 624 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 625 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 626 MarkStackSize, MarkStackSizeMax); 627 return; 628 } 629 } 630 } 631 } 632 633 if (!_markStack.allocate(MarkStackSize)) { 634 warning("Failed to allocate CM marking stack"); 635 return; 636 } 637 638 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 639 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 640 641 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 642 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 643 644 BitMap::idx_t card_bm_size = _card_bm.size(); 645 646 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 647 _active_tasks = _max_worker_id; 648 649 uint max_regions = _g1h->max_regions(); 650 for (uint i = 0; i < _max_worker_id; ++i) { 651 CMTaskQueue* task_queue = new CMTaskQueue(); 652 task_queue->initialize(); 653 _task_queues->register_queue(i, task_queue); 654 655 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 656 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 657 658 _tasks[i] = new CMTask(i, this, 659 _count_marked_bytes[i], 660 &_count_card_bitmaps[i], 661 task_queue, _task_queues); 662 663 _accum_task_vtime[i] = 0.0; 664 } 665 666 // Calculate the card number for the bottom of the heap. Used 667 // in biasing indexes into the accounting card bitmaps. 668 _heap_bottom_card_num = 669 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 670 CardTableModRefBS::card_shift); 671 672 // Clear all the liveness counting data 673 clear_all_count_data(); 674 675 // so that the call below can read a sensible value 676 _heap_start = g1h->reserved_region().start(); 677 set_non_marking_state(); 678 _completed_initialization = true; 679 } 680 681 void ConcurrentMark::reset() { 682 // Starting values for these two. This should be called in a STW 683 // phase. 684 MemRegion reserved = _g1h->g1_reserved(); 685 _heap_start = reserved.start(); 686 _heap_end = reserved.end(); 687 688 // Separated the asserts so that we know which one fires. 689 assert(_heap_start != NULL, "heap bounds should look ok"); 690 assert(_heap_end != NULL, "heap bounds should look ok"); 691 assert(_heap_start < _heap_end, "heap bounds should look ok"); 692 693 // Reset all the marking data structures and any necessary flags 694 reset_marking_state(); 695 696 // We do reset all of them, since different phases will use 697 // different number of active threads. So, it's easiest to have all 698 // of them ready. 699 for (uint i = 0; i < _max_worker_id; ++i) { 700 _tasks[i]->reset(_nextMarkBitMap); 701 } 702 703 // we need this to make sure that the flag is on during the evac 704 // pause with initial mark piggy-backed 705 set_concurrent_marking_in_progress(); 706 } 707 708 709 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 710 _markStack.set_should_expand(); 711 _markStack.setEmpty(); // Also clears the _markStack overflow flag 712 if (clear_overflow) { 713 clear_has_overflown(); 714 } else { 715 assert(has_overflown(), "pre-condition"); 716 } 717 _finger = _heap_start; 718 719 for (uint i = 0; i < _max_worker_id; ++i) { 720 CMTaskQueue* queue = _task_queues->queue(i); 721 queue->set_empty(); 722 } 723 } 724 725 void ConcurrentMark::set_concurrency(uint active_tasks) { 726 assert(active_tasks <= _max_worker_id, "we should not have more"); 727 728 _active_tasks = active_tasks; 729 // Need to update the three data structures below according to the 730 // number of active threads for this phase. 731 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 732 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 733 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 734 } 735 736 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 737 set_concurrency(active_tasks); 738 739 _concurrent = concurrent; 740 // We propagate this to all tasks, not just the active ones. 741 for (uint i = 0; i < _max_worker_id; ++i) 742 _tasks[i]->set_concurrent(concurrent); 743 744 if (concurrent) { 745 set_concurrent_marking_in_progress(); 746 } else { 747 // We currently assume that the concurrent flag has been set to 748 // false before we start remark. At this point we should also be 749 // in a STW phase. 750 assert(!concurrent_marking_in_progress(), "invariant"); 751 assert(out_of_regions(), 752 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 753 p2i(_finger), p2i(_heap_end)); 754 } 755 } 756 757 void ConcurrentMark::set_non_marking_state() { 758 // We set the global marking state to some default values when we're 759 // not doing marking. 760 reset_marking_state(); 761 _active_tasks = 0; 762 clear_concurrent_marking_in_progress(); 763 } 764 765 ConcurrentMark::~ConcurrentMark() { 766 // The ConcurrentMark instance is never freed. 767 ShouldNotReachHere(); 768 } 769 770 void ConcurrentMark::clearNextBitmap() { 771 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 772 773 // Make sure that the concurrent mark thread looks to still be in 774 // the current cycle. 775 guarantee(cmThread()->during_cycle(), "invariant"); 776 777 // We are finishing up the current cycle by clearing the next 778 // marking bitmap and getting it ready for the next cycle. During 779 // this time no other cycle can start. So, let's make sure that this 780 // is the case. 781 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 782 783 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 784 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 785 _parallel_workers->run_task(&task); 786 787 // Clear the liveness counting data. If the marking has been aborted, the abort() 788 // call already did that. 789 if (cl.complete()) { 790 clear_all_count_data(); 791 } 792 793 // Repeat the asserts from above. 794 guarantee(cmThread()->during_cycle(), "invariant"); 795 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 796 } 797 798 class CheckBitmapClearHRClosure : public HeapRegionClosure { 799 CMBitMap* _bitmap; 800 bool _error; 801 public: 802 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 803 } 804 805 virtual bool doHeapRegion(HeapRegion* r) { 806 // This closure can be called concurrently to the mutator, so we must make sure 807 // that the result of the getNextMarkedWordAddress() call is compared to the 808 // value passed to it as limit to detect any found bits. 809 // end never changes in G1. 810 HeapWord* end = r->end(); 811 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 812 } 813 }; 814 815 bool ConcurrentMark::nextMarkBitmapIsClear() { 816 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 817 _g1h->heap_region_iterate(&cl); 818 return cl.complete(); 819 } 820 821 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 822 public: 823 bool doHeapRegion(HeapRegion* r) { 824 r->note_start_of_marking(); 825 return false; 826 } 827 }; 828 829 void ConcurrentMark::checkpointRootsInitialPre() { 830 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 831 G1CollectorPolicy* g1p = g1h->g1_policy(); 832 833 _has_aborted = false; 834 835 // Initialize marking structures. This has to be done in a STW phase. 836 reset(); 837 838 // For each region note start of marking. 839 NoteStartOfMarkHRClosure startcl; 840 g1h->heap_region_iterate(&startcl); 841 } 842 843 844 void ConcurrentMark::checkpointRootsInitialPost() { 845 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 846 847 // If we force an overflow during remark, the remark operation will 848 // actually abort and we'll restart concurrent marking. If we always 849 // force an overflow during remark we'll never actually complete the 850 // marking phase. So, we initialize this here, at the start of the 851 // cycle, so that at the remaining overflow number will decrease at 852 // every remark and we'll eventually not need to cause one. 853 force_overflow_stw()->init(); 854 855 // Start Concurrent Marking weak-reference discovery. 856 ReferenceProcessor* rp = g1h->ref_processor_cm(); 857 // enable ("weak") refs discovery 858 rp->enable_discovery(); 859 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 860 861 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 862 // This is the start of the marking cycle, we're expected all 863 // threads to have SATB queues with active set to false. 864 satb_mq_set.set_active_all_threads(true, /* new active value */ 865 false /* expected_active */); 866 867 _root_regions.prepare_for_scan(); 868 869 // update_g1_committed() will be called at the end of an evac pause 870 // when marking is on. So, it's also called at the end of the 871 // initial-mark pause to update the heap end, if the heap expands 872 // during it. No need to call it here. 873 } 874 875 /* 876 * Notice that in the next two methods, we actually leave the STS 877 * during the barrier sync and join it immediately afterwards. If we 878 * do not do this, the following deadlock can occur: one thread could 879 * be in the barrier sync code, waiting for the other thread to also 880 * sync up, whereas another one could be trying to yield, while also 881 * waiting for the other threads to sync up too. 882 * 883 * Note, however, that this code is also used during remark and in 884 * this case we should not attempt to leave / enter the STS, otherwise 885 * we'll either hit an assert (debug / fastdebug) or deadlock 886 * (product). So we should only leave / enter the STS if we are 887 * operating concurrently. 888 * 889 * Because the thread that does the sync barrier has left the STS, it 890 * is possible to be suspended for a Full GC or an evacuation pause 891 * could occur. This is actually safe, since the entering the sync 892 * barrier is one of the last things do_marking_step() does, and it 893 * doesn't manipulate any data structures afterwards. 894 */ 895 896 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 897 bool barrier_aborted; 898 { 899 SuspendibleThreadSetLeaver sts_leave(concurrent()); 900 barrier_aborted = !_first_overflow_barrier_sync.enter(); 901 } 902 903 // at this point everyone should have synced up and not be doing any 904 // more work 905 906 if (barrier_aborted) { 907 // If the barrier aborted we ignore the overflow condition and 908 // just abort the whole marking phase as quickly as possible. 909 return; 910 } 911 912 // If we're executing the concurrent phase of marking, reset the marking 913 // state; otherwise the marking state is reset after reference processing, 914 // during the remark pause. 915 // If we reset here as a result of an overflow during the remark we will 916 // see assertion failures from any subsequent set_concurrency_and_phase() 917 // calls. 918 if (concurrent()) { 919 // let the task associated with with worker 0 do this 920 if (worker_id == 0) { 921 // task 0 is responsible for clearing the global data structures 922 // We should be here because of an overflow. During STW we should 923 // not clear the overflow flag since we rely on it being true when 924 // we exit this method to abort the pause and restart concurrent 925 // marking. 926 reset_marking_state(true /* clear_overflow */); 927 force_overflow()->update(); 928 929 if (G1Log::fine()) { 930 gclog_or_tty->gclog_stamp(); 931 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 932 } 933 } 934 } 935 936 // after this, each task should reset its own data structures then 937 // then go into the second barrier 938 } 939 940 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 941 SuspendibleThreadSetLeaver sts_leave(concurrent()); 942 _second_overflow_barrier_sync.enter(); 943 944 // at this point everything should be re-initialized and ready to go 945 } 946 947 #ifndef PRODUCT 948 void ForceOverflowSettings::init() { 949 _num_remaining = G1ConcMarkForceOverflow; 950 _force = false; 951 update(); 952 } 953 954 void ForceOverflowSettings::update() { 955 if (_num_remaining > 0) { 956 _num_remaining -= 1; 957 _force = true; 958 } else { 959 _force = false; 960 } 961 } 962 963 bool ForceOverflowSettings::should_force() { 964 if (_force) { 965 _force = false; 966 return true; 967 } else { 968 return false; 969 } 970 } 971 #endif // !PRODUCT 972 973 class CMConcurrentMarkingTask: public AbstractGangTask { 974 private: 975 ConcurrentMark* _cm; 976 ConcurrentMarkThread* _cmt; 977 978 public: 979 void work(uint worker_id) { 980 assert(Thread::current()->is_ConcurrentGC_thread(), 981 "this should only be done by a conc GC thread"); 982 ResourceMark rm; 983 984 double start_vtime = os::elapsedVTime(); 985 986 { 987 SuspendibleThreadSetJoiner sts_join; 988 989 assert(worker_id < _cm->active_tasks(), "invariant"); 990 CMTask* the_task = _cm->task(worker_id); 991 the_task->record_start_time(); 992 if (!_cm->has_aborted()) { 993 do { 994 double start_vtime_sec = os::elapsedVTime(); 995 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 996 997 the_task->do_marking_step(mark_step_duration_ms, 998 true /* do_termination */, 999 false /* is_serial*/); 1000 1001 double end_vtime_sec = os::elapsedVTime(); 1002 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1003 _cm->clear_has_overflown(); 1004 1005 _cm->do_yield_check(worker_id); 1006 1007 jlong sleep_time_ms; 1008 if (!_cm->has_aborted() && the_task->has_aborted()) { 1009 sleep_time_ms = 1010 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1011 { 1012 SuspendibleThreadSetLeaver sts_leave; 1013 os::sleep(Thread::current(), sleep_time_ms, false); 1014 } 1015 } 1016 } while (!_cm->has_aborted() && the_task->has_aborted()); 1017 } 1018 the_task->record_end_time(); 1019 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 1020 } 1021 1022 double end_vtime = os::elapsedVTime(); 1023 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 1024 } 1025 1026 CMConcurrentMarkingTask(ConcurrentMark* cm, 1027 ConcurrentMarkThread* cmt) : 1028 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1029 1030 ~CMConcurrentMarkingTask() { } 1031 }; 1032 1033 // Calculates the number of active workers for a concurrent 1034 // phase. 1035 uint ConcurrentMark::calc_parallel_marking_threads() { 1036 uint n_conc_workers = 0; 1037 if (!UseDynamicNumberOfGCThreads || 1038 (!FLAG_IS_DEFAULT(ConcGCThreads) && 1039 !ForceDynamicNumberOfGCThreads)) { 1040 n_conc_workers = max_parallel_marking_threads(); 1041 } else { 1042 n_conc_workers = 1043 AdaptiveSizePolicy::calc_default_active_workers( 1044 max_parallel_marking_threads(), 1045 1, /* Minimum workers */ 1046 parallel_marking_threads(), 1047 Threads::number_of_non_daemon_threads()); 1048 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 1049 // that scaling has already gone into "_max_parallel_marking_threads". 1050 } 1051 assert(n_conc_workers > 0, "Always need at least 1"); 1052 return n_conc_workers; 1053 } 1054 1055 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1056 // Currently, only survivors can be root regions. 1057 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1058 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1059 1060 const uintx interval = PrefetchScanIntervalInBytes; 1061 HeapWord* curr = hr->bottom(); 1062 const HeapWord* end = hr->top(); 1063 while (curr < end) { 1064 Prefetch::read(curr, interval); 1065 oop obj = oop(curr); 1066 int size = obj->oop_iterate_size(&cl); 1067 assert(size == obj->size(), "sanity"); 1068 curr += size; 1069 } 1070 } 1071 1072 class CMRootRegionScanTask : public AbstractGangTask { 1073 private: 1074 ConcurrentMark* _cm; 1075 1076 public: 1077 CMRootRegionScanTask(ConcurrentMark* cm) : 1078 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1079 1080 void work(uint worker_id) { 1081 assert(Thread::current()->is_ConcurrentGC_thread(), 1082 "this should only be done by a conc GC thread"); 1083 1084 CMRootRegions* root_regions = _cm->root_regions(); 1085 HeapRegion* hr = root_regions->claim_next(); 1086 while (hr != NULL) { 1087 _cm->scanRootRegion(hr, worker_id); 1088 hr = root_regions->claim_next(); 1089 } 1090 } 1091 }; 1092 1093 void ConcurrentMark::scanRootRegions() { 1094 double scan_start = os::elapsedTime(); 1095 1096 // Start of concurrent marking. 1097 ClassLoaderDataGraph::clear_claimed_marks(); 1098 1099 // scan_in_progress() will have been set to true only if there was 1100 // at least one root region to scan. So, if it's false, we 1101 // should not attempt to do any further work. 1102 if (root_regions()->scan_in_progress()) { 1103 if (G1Log::fine()) { 1104 gclog_or_tty->gclog_stamp(); 1105 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]"); 1106 } 1107 1108 _parallel_marking_threads = calc_parallel_marking_threads(); 1109 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1110 "Maximum number of marking threads exceeded"); 1111 uint active_workers = MAX2(1U, parallel_marking_threads()); 1112 1113 CMRootRegionScanTask task(this); 1114 _parallel_workers->set_active_workers(active_workers); 1115 _parallel_workers->run_task(&task); 1116 1117 if (G1Log::fine()) { 1118 gclog_or_tty->gclog_stamp(); 1119 gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start); 1120 } 1121 1122 // It's possible that has_aborted() is true here without actually 1123 // aborting the survivor scan earlier. This is OK as it's 1124 // mainly used for sanity checking. 1125 root_regions()->scan_finished(); 1126 } 1127 } 1128 1129 void ConcurrentMark::markFromRoots() { 1130 // we might be tempted to assert that: 1131 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1132 // "inconsistent argument?"); 1133 // However that wouldn't be right, because it's possible that 1134 // a safepoint is indeed in progress as a younger generation 1135 // stop-the-world GC happens even as we mark in this generation. 1136 1137 _restart_for_overflow = false; 1138 force_overflow_conc()->init(); 1139 1140 // _g1h has _n_par_threads 1141 _parallel_marking_threads = calc_parallel_marking_threads(); 1142 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1143 "Maximum number of marking threads exceeded"); 1144 1145 uint active_workers = MAX2(1U, parallel_marking_threads()); 1146 assert(active_workers > 0, "Should have been set"); 1147 1148 // Parallel task terminator is set in "set_concurrency_and_phase()" 1149 set_concurrency_and_phase(active_workers, true /* concurrent */); 1150 1151 CMConcurrentMarkingTask markingTask(this, cmThread()); 1152 _parallel_workers->set_active_workers(active_workers); 1153 _parallel_workers->run_task(&markingTask); 1154 print_stats(); 1155 } 1156 1157 // Helper class to get rid of some boilerplate code. 1158 class G1CMTraceTime : public StackObj { 1159 GCTraceTimeImpl _gc_trace_time; 1160 static bool doit_and_prepend(bool doit) { 1161 if (doit) { 1162 gclog_or_tty->put(' '); 1163 } 1164 return doit; 1165 } 1166 1167 public: 1168 G1CMTraceTime(const char* title, bool doit) 1169 : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) { 1170 } 1171 }; 1172 1173 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1174 // world is stopped at this checkpoint 1175 assert(SafepointSynchronize::is_at_safepoint(), 1176 "world should be stopped"); 1177 1178 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1179 1180 // If a full collection has happened, we shouldn't do this. 1181 if (has_aborted()) { 1182 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1183 return; 1184 } 1185 1186 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1187 1188 if (VerifyDuringGC) { 1189 HandleMark hm; // handle scope 1190 g1h->prepare_for_verify(); 1191 Universe::verify(VerifyOption_G1UsePrevMarking, 1192 " VerifyDuringGC:(before)"); 1193 } 1194 g1h->check_bitmaps("Remark Start"); 1195 1196 G1CollectorPolicy* g1p = g1h->g1_policy(); 1197 g1p->record_concurrent_mark_remark_start(); 1198 1199 double start = os::elapsedTime(); 1200 1201 checkpointRootsFinalWork(); 1202 1203 double mark_work_end = os::elapsedTime(); 1204 1205 weakRefsWork(clear_all_soft_refs); 1206 1207 if (has_overflown()) { 1208 // Oops. We overflowed. Restart concurrent marking. 1209 _restart_for_overflow = true; 1210 if (G1TraceMarkStackOverflow) { 1211 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1212 } 1213 1214 // Verify the heap w.r.t. the previous marking bitmap. 1215 if (VerifyDuringGC) { 1216 HandleMark hm; // handle scope 1217 g1h->prepare_for_verify(); 1218 Universe::verify(VerifyOption_G1UsePrevMarking, 1219 " VerifyDuringGC:(overflow)"); 1220 } 1221 1222 // Clear the marking state because we will be restarting 1223 // marking due to overflowing the global mark stack. 1224 reset_marking_state(); 1225 } else { 1226 { 1227 G1CMTraceTime trace("GC aggregate-data", G1Log::finer()); 1228 1229 // Aggregate the per-task counting data that we have accumulated 1230 // while marking. 1231 aggregate_count_data(); 1232 } 1233 1234 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1235 // We're done with marking. 1236 // This is the end of the marking cycle, we're expected all 1237 // threads to have SATB queues with active set to true. 1238 satb_mq_set.set_active_all_threads(false, /* new active value */ 1239 true /* expected_active */); 1240 1241 if (VerifyDuringGC) { 1242 HandleMark hm; // handle scope 1243 g1h->prepare_for_verify(); 1244 Universe::verify(VerifyOption_G1UseNextMarking, 1245 " VerifyDuringGC:(after)"); 1246 } 1247 g1h->check_bitmaps("Remark End"); 1248 assert(!restart_for_overflow(), "sanity"); 1249 // Completely reset the marking state since marking completed 1250 set_non_marking_state(); 1251 } 1252 1253 // Expand the marking stack, if we have to and if we can. 1254 if (_markStack.should_expand()) { 1255 _markStack.expand(); 1256 } 1257 1258 // Statistics 1259 double now = os::elapsedTime(); 1260 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1261 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1262 _remark_times.add((now - start) * 1000.0); 1263 1264 g1p->record_concurrent_mark_remark_end(); 1265 1266 G1CMIsAliveClosure is_alive(g1h); 1267 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1268 } 1269 1270 // Base class of the closures that finalize and verify the 1271 // liveness counting data. 1272 class CMCountDataClosureBase: public HeapRegionClosure { 1273 protected: 1274 G1CollectedHeap* _g1h; 1275 ConcurrentMark* _cm; 1276 CardTableModRefBS* _ct_bs; 1277 1278 BitMap* _region_bm; 1279 BitMap* _card_bm; 1280 1281 // Takes a region that's not empty (i.e., it has at least one 1282 // live object in it and sets its corresponding bit on the region 1283 // bitmap to 1. 1284 void set_bit_for_region(HeapRegion* hr) { 1285 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1286 _region_bm->par_at_put(index, true); 1287 } 1288 1289 public: 1290 CMCountDataClosureBase(G1CollectedHeap* g1h, 1291 BitMap* region_bm, BitMap* card_bm): 1292 _g1h(g1h), _cm(g1h->concurrent_mark()), 1293 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1294 _region_bm(region_bm), _card_bm(card_bm) { } 1295 }; 1296 1297 // Closure that calculates the # live objects per region. Used 1298 // for verification purposes during the cleanup pause. 1299 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1300 CMBitMapRO* _bm; 1301 size_t _region_marked_bytes; 1302 1303 public: 1304 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1305 BitMap* region_bm, BitMap* card_bm) : 1306 CMCountDataClosureBase(g1h, region_bm, card_bm), 1307 _bm(bm), _region_marked_bytes(0) { } 1308 1309 bool doHeapRegion(HeapRegion* hr) { 1310 HeapWord* ntams = hr->next_top_at_mark_start(); 1311 HeapWord* start = hr->bottom(); 1312 1313 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1314 "Preconditions not met - " 1315 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1316 p2i(start), p2i(ntams), p2i(hr->end())); 1317 1318 // Find the first marked object at or after "start". 1319 start = _bm->getNextMarkedWordAddress(start, ntams); 1320 1321 size_t marked_bytes = 0; 1322 1323 while (start < ntams) { 1324 oop obj = oop(start); 1325 int obj_sz = obj->size(); 1326 HeapWord* obj_end = start + obj_sz; 1327 1328 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1329 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1330 1331 // Note: if we're looking at the last region in heap - obj_end 1332 // could be actually just beyond the end of the heap; end_idx 1333 // will then correspond to a (non-existent) card that is also 1334 // just beyond the heap. 1335 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1336 // end of object is not card aligned - increment to cover 1337 // all the cards spanned by the object 1338 end_idx += 1; 1339 } 1340 1341 // Set the bits in the card BM for the cards spanned by this object. 1342 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1343 1344 // Add the size of this object to the number of marked bytes. 1345 marked_bytes += (size_t)obj_sz * HeapWordSize; 1346 1347 // This will happen if we are handling a humongous object that spans 1348 // several heap regions. 1349 if (obj_end > hr->end()) { 1350 break; 1351 } 1352 // Find the next marked object after this one. 1353 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1354 } 1355 1356 // Mark the allocated-since-marking portion... 1357 HeapWord* top = hr->top(); 1358 if (ntams < top) { 1359 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1360 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1361 1362 // Note: if we're looking at the last region in heap - top 1363 // could be actually just beyond the end of the heap; end_idx 1364 // will then correspond to a (non-existent) card that is also 1365 // just beyond the heap. 1366 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1367 // end of object is not card aligned - increment to cover 1368 // all the cards spanned by the object 1369 end_idx += 1; 1370 } 1371 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1372 1373 // This definitely means the region has live objects. 1374 set_bit_for_region(hr); 1375 } 1376 1377 // Update the live region bitmap. 1378 if (marked_bytes > 0) { 1379 set_bit_for_region(hr); 1380 } 1381 1382 // Set the marked bytes for the current region so that 1383 // it can be queried by a calling verification routine 1384 _region_marked_bytes = marked_bytes; 1385 1386 return false; 1387 } 1388 1389 size_t region_marked_bytes() const { return _region_marked_bytes; } 1390 }; 1391 1392 // Heap region closure used for verifying the counting data 1393 // that was accumulated concurrently and aggregated during 1394 // the remark pause. This closure is applied to the heap 1395 // regions during the STW cleanup pause. 1396 1397 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1398 G1CollectedHeap* _g1h; 1399 ConcurrentMark* _cm; 1400 CalcLiveObjectsClosure _calc_cl; 1401 BitMap* _region_bm; // Region BM to be verified 1402 BitMap* _card_bm; // Card BM to be verified 1403 1404 BitMap* _exp_region_bm; // Expected Region BM values 1405 BitMap* _exp_card_bm; // Expected card BM values 1406 1407 int _failures; 1408 1409 public: 1410 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1411 BitMap* region_bm, 1412 BitMap* card_bm, 1413 BitMap* exp_region_bm, 1414 BitMap* exp_card_bm) : 1415 _g1h(g1h), _cm(g1h->concurrent_mark()), 1416 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1417 _region_bm(region_bm), _card_bm(card_bm), 1418 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1419 _failures(0) { } 1420 1421 int failures() const { return _failures; } 1422 1423 bool doHeapRegion(HeapRegion* hr) { 1424 int failures = 0; 1425 1426 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1427 // this region and set the corresponding bits in the expected region 1428 // and card bitmaps. 1429 bool res = _calc_cl.doHeapRegion(hr); 1430 assert(res == false, "should be continuing"); 1431 1432 // Verify the marked bytes for this region. 1433 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1434 size_t act_marked_bytes = hr->next_marked_bytes(); 1435 1436 if (exp_marked_bytes > act_marked_bytes) { 1437 if (hr->is_starts_humongous()) { 1438 // For start_humongous regions, the size of the whole object will be 1439 // in exp_marked_bytes. 1440 HeapRegion* region = hr; 1441 int num_regions; 1442 for (num_regions = 0; region != NULL; num_regions++) { 1443 region = _g1h->next_region_in_humongous(region); 1444 } 1445 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1446 failures += 1; 1447 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1448 failures += 1; 1449 } 1450 } else { 1451 // We're not OK if expected marked bytes > actual marked bytes. It means 1452 // we have missed accounting some objects during the actual marking. 1453 failures += 1; 1454 } 1455 } 1456 1457 // Verify the bit, for this region, in the actual and expected 1458 // (which was just calculated) region bit maps. 1459 // We're not OK if the bit in the calculated expected region 1460 // bitmap is set and the bit in the actual region bitmap is not. 1461 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1462 1463 bool expected = _exp_region_bm->at(index); 1464 bool actual = _region_bm->at(index); 1465 if (expected && !actual) { 1466 failures += 1; 1467 } 1468 1469 // Verify that the card bit maps for the cards spanned by the current 1470 // region match. We have an error if we have a set bit in the expected 1471 // bit map and the corresponding bit in the actual bitmap is not set. 1472 1473 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1474 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1475 1476 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1477 expected = _exp_card_bm->at(i); 1478 actual = _card_bm->at(i); 1479 1480 if (expected && !actual) { 1481 failures += 1; 1482 } 1483 } 1484 1485 _failures += failures; 1486 1487 // We could stop iteration over the heap when we 1488 // find the first violating region by returning true. 1489 return false; 1490 } 1491 }; 1492 1493 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1494 protected: 1495 G1CollectedHeap* _g1h; 1496 ConcurrentMark* _cm; 1497 BitMap* _actual_region_bm; 1498 BitMap* _actual_card_bm; 1499 1500 uint _n_workers; 1501 1502 BitMap* _expected_region_bm; 1503 BitMap* _expected_card_bm; 1504 1505 int _failures; 1506 1507 HeapRegionClaimer _hrclaimer; 1508 1509 public: 1510 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1511 BitMap* region_bm, BitMap* card_bm, 1512 BitMap* expected_region_bm, BitMap* expected_card_bm) 1513 : AbstractGangTask("G1 verify final counting"), 1514 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1515 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1516 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1517 _failures(0), 1518 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1519 assert(VerifyDuringGC, "don't call this otherwise"); 1520 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1521 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1522 } 1523 1524 void work(uint worker_id) { 1525 assert(worker_id < _n_workers, "invariant"); 1526 1527 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1528 _actual_region_bm, _actual_card_bm, 1529 _expected_region_bm, 1530 _expected_card_bm); 1531 1532 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1533 1534 Atomic::add(verify_cl.failures(), &_failures); 1535 } 1536 1537 int failures() const { return _failures; } 1538 }; 1539 1540 // Closure that finalizes the liveness counting data. 1541 // Used during the cleanup pause. 1542 // Sets the bits corresponding to the interval [NTAMS, top] 1543 // (which contains the implicitly live objects) in the 1544 // card liveness bitmap. Also sets the bit for each region, 1545 // containing live data, in the region liveness bitmap. 1546 1547 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1548 public: 1549 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1550 BitMap* region_bm, 1551 BitMap* card_bm) : 1552 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1553 1554 bool doHeapRegion(HeapRegion* hr) { 1555 HeapWord* ntams = hr->next_top_at_mark_start(); 1556 HeapWord* top = hr->top(); 1557 1558 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1559 1560 // Mark the allocated-since-marking portion... 1561 if (ntams < top) { 1562 // This definitely means the region has live objects. 1563 set_bit_for_region(hr); 1564 1565 // Now set the bits in the card bitmap for [ntams, top) 1566 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1567 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1568 1569 // Note: if we're looking at the last region in heap - top 1570 // could be actually just beyond the end of the heap; end_idx 1571 // will then correspond to a (non-existent) card that is also 1572 // just beyond the heap. 1573 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1574 // end of object is not card aligned - increment to cover 1575 // all the cards spanned by the object 1576 end_idx += 1; 1577 } 1578 1579 assert(end_idx <= _card_bm->size(), 1580 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1581 end_idx, _card_bm->size()); 1582 assert(start_idx < _card_bm->size(), 1583 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1584 start_idx, _card_bm->size()); 1585 1586 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1587 } 1588 1589 // Set the bit for the region if it contains live data 1590 if (hr->next_marked_bytes() > 0) { 1591 set_bit_for_region(hr); 1592 } 1593 1594 return false; 1595 } 1596 }; 1597 1598 class G1ParFinalCountTask: public AbstractGangTask { 1599 protected: 1600 G1CollectedHeap* _g1h; 1601 ConcurrentMark* _cm; 1602 BitMap* _actual_region_bm; 1603 BitMap* _actual_card_bm; 1604 1605 uint _n_workers; 1606 HeapRegionClaimer _hrclaimer; 1607 1608 public: 1609 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1610 : AbstractGangTask("G1 final counting"), 1611 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1612 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1613 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1614 } 1615 1616 void work(uint worker_id) { 1617 assert(worker_id < _n_workers, "invariant"); 1618 1619 FinalCountDataUpdateClosure final_update_cl(_g1h, 1620 _actual_region_bm, 1621 _actual_card_bm); 1622 1623 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1624 } 1625 }; 1626 1627 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1628 G1CollectedHeap* _g1; 1629 size_t _freed_bytes; 1630 FreeRegionList* _local_cleanup_list; 1631 HeapRegionSetCount _old_regions_removed; 1632 HeapRegionSetCount _humongous_regions_removed; 1633 HRRSCleanupTask* _hrrs_cleanup_task; 1634 1635 public: 1636 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1637 FreeRegionList* local_cleanup_list, 1638 HRRSCleanupTask* hrrs_cleanup_task) : 1639 _g1(g1), 1640 _freed_bytes(0), 1641 _local_cleanup_list(local_cleanup_list), 1642 _old_regions_removed(), 1643 _humongous_regions_removed(), 1644 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1645 1646 size_t freed_bytes() { return _freed_bytes; } 1647 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } 1648 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } 1649 1650 bool doHeapRegion(HeapRegion *hr) { 1651 if (hr->is_archive()) { 1652 return false; 1653 } 1654 // We use a claim value of zero here because all regions 1655 // were claimed with value 1 in the FinalCount task. 1656 _g1->reset_gc_time_stamps(hr); 1657 hr->note_end_of_marking(); 1658 1659 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1660 _freed_bytes += hr->used(); 1661 hr->set_containing_set(NULL); 1662 if (hr->is_humongous()) { 1663 _humongous_regions_removed.increment(1u, hr->capacity()); 1664 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1665 } else { 1666 _old_regions_removed.increment(1u, hr->capacity()); 1667 _g1->free_region(hr, _local_cleanup_list, true); 1668 } 1669 } else { 1670 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1671 } 1672 1673 return false; 1674 } 1675 }; 1676 1677 class G1ParNoteEndTask: public AbstractGangTask { 1678 friend class G1NoteEndOfConcMarkClosure; 1679 1680 protected: 1681 G1CollectedHeap* _g1h; 1682 FreeRegionList* _cleanup_list; 1683 HeapRegionClaimer _hrclaimer; 1684 1685 public: 1686 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1687 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1688 } 1689 1690 void work(uint worker_id) { 1691 FreeRegionList local_cleanup_list("Local Cleanup List"); 1692 HRRSCleanupTask hrrs_cleanup_task; 1693 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1694 &hrrs_cleanup_task); 1695 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1696 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1697 1698 // Now update the lists 1699 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1700 { 1701 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1702 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1703 1704 // If we iterate over the global cleanup list at the end of 1705 // cleanup to do this printing we will not guarantee to only 1706 // generate output for the newly-reclaimed regions (the list 1707 // might not be empty at the beginning of cleanup; we might 1708 // still be working on its previous contents). So we do the 1709 // printing here, before we append the new regions to the global 1710 // cleanup list. 1711 1712 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1713 if (hr_printer->is_active()) { 1714 FreeRegionListIterator iter(&local_cleanup_list); 1715 while (iter.more_available()) { 1716 HeapRegion* hr = iter.get_next(); 1717 hr_printer->cleanup(hr); 1718 } 1719 } 1720 1721 _cleanup_list->add_ordered(&local_cleanup_list); 1722 assert(local_cleanup_list.is_empty(), "post-condition"); 1723 1724 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1725 } 1726 } 1727 }; 1728 1729 class G1ParScrubRemSetTask: public AbstractGangTask { 1730 protected: 1731 G1RemSet* _g1rs; 1732 BitMap* _region_bm; 1733 BitMap* _card_bm; 1734 HeapRegionClaimer _hrclaimer; 1735 1736 public: 1737 G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) : 1738 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { 1739 } 1740 1741 void work(uint worker_id) { 1742 _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer); 1743 } 1744 1745 }; 1746 1747 void ConcurrentMark::cleanup() { 1748 // world is stopped at this checkpoint 1749 assert(SafepointSynchronize::is_at_safepoint(), 1750 "world should be stopped"); 1751 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1752 1753 // If a full collection has happened, we shouldn't do this. 1754 if (has_aborted()) { 1755 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1756 return; 1757 } 1758 1759 g1h->verify_region_sets_optional(); 1760 1761 if (VerifyDuringGC) { 1762 HandleMark hm; // handle scope 1763 g1h->prepare_for_verify(); 1764 Universe::verify(VerifyOption_G1UsePrevMarking, 1765 " VerifyDuringGC:(before)"); 1766 } 1767 g1h->check_bitmaps("Cleanup Start"); 1768 1769 G1CollectorPolicy* g1p = g1h->g1_policy(); 1770 g1p->record_concurrent_mark_cleanup_start(); 1771 1772 double start = os::elapsedTime(); 1773 1774 HeapRegionRemSet::reset_for_cleanup_tasks(); 1775 1776 // Do counting once more with the world stopped for good measure. 1777 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1778 1779 g1h->workers()->run_task(&g1_par_count_task); 1780 1781 if (VerifyDuringGC) { 1782 // Verify that the counting data accumulated during marking matches 1783 // that calculated by walking the marking bitmap. 1784 1785 // Bitmaps to hold expected values 1786 BitMap expected_region_bm(_region_bm.size(), true); 1787 BitMap expected_card_bm(_card_bm.size(), true); 1788 1789 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1790 &_region_bm, 1791 &_card_bm, 1792 &expected_region_bm, 1793 &expected_card_bm); 1794 1795 g1h->workers()->run_task(&g1_par_verify_task); 1796 1797 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1798 } 1799 1800 size_t start_used_bytes = g1h->used(); 1801 g1h->collector_state()->set_mark_in_progress(false); 1802 1803 double count_end = os::elapsedTime(); 1804 double this_final_counting_time = (count_end - start); 1805 _total_counting_time += this_final_counting_time; 1806 1807 if (G1PrintRegionLivenessInfo) { 1808 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1809 _g1h->heap_region_iterate(&cl); 1810 } 1811 1812 // Install newly created mark bitMap as "prev". 1813 swapMarkBitMaps(); 1814 1815 g1h->reset_gc_time_stamp(); 1816 1817 uint n_workers = _g1h->workers()->active_workers(); 1818 1819 // Note end of marking in all heap regions. 1820 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1821 g1h->workers()->run_task(&g1_par_note_end_task); 1822 g1h->check_gc_time_stamps(); 1823 1824 if (!cleanup_list_is_empty()) { 1825 // The cleanup list is not empty, so we'll have to process it 1826 // concurrently. Notify anyone else that might be wanting free 1827 // regions that there will be more free regions coming soon. 1828 g1h->set_free_regions_coming(); 1829 } 1830 1831 // call below, since it affects the metric by which we sort the heap 1832 // regions. 1833 if (G1ScrubRemSets) { 1834 double rs_scrub_start = os::elapsedTime(); 1835 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); 1836 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1837 1838 double rs_scrub_end = os::elapsedTime(); 1839 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1840 _total_rs_scrub_time += this_rs_scrub_time; 1841 } 1842 1843 // this will also free any regions totally full of garbage objects, 1844 // and sort the regions. 1845 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1846 1847 // Statistics. 1848 double end = os::elapsedTime(); 1849 _cleanup_times.add((end - start) * 1000.0); 1850 1851 if (G1Log::fine()) { 1852 g1h->g1_policy()->print_heap_transition(start_used_bytes); 1853 } 1854 1855 // Clean up will have freed any regions completely full of garbage. 1856 // Update the soft reference policy with the new heap occupancy. 1857 Universe::update_heap_info_at_gc(); 1858 1859 if (VerifyDuringGC) { 1860 HandleMark hm; // handle scope 1861 g1h->prepare_for_verify(); 1862 Universe::verify(VerifyOption_G1UsePrevMarking, 1863 " VerifyDuringGC:(after)"); 1864 } 1865 1866 g1h->check_bitmaps("Cleanup End"); 1867 1868 g1h->verify_region_sets_optional(); 1869 1870 // We need to make this be a "collection" so any collection pause that 1871 // races with it goes around and waits for completeCleanup to finish. 1872 g1h->increment_total_collections(); 1873 1874 // Clean out dead classes and update Metaspace sizes. 1875 if (ClassUnloadingWithConcurrentMark) { 1876 ClassLoaderDataGraph::purge(); 1877 } 1878 MetaspaceGC::compute_new_size(); 1879 1880 // We reclaimed old regions so we should calculate the sizes to make 1881 // sure we update the old gen/space data. 1882 g1h->g1mm()->update_sizes(); 1883 g1h->allocation_context_stats().update_after_mark(); 1884 1885 g1h->trace_heap_after_concurrent_cycle(); 1886 } 1887 1888 void ConcurrentMark::completeCleanup() { 1889 if (has_aborted()) return; 1890 1891 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1892 1893 _cleanup_list.verify_optional(); 1894 FreeRegionList tmp_free_list("Tmp Free List"); 1895 1896 if (G1ConcRegionFreeingVerbose) { 1897 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1898 "cleanup list has %u entries", 1899 _cleanup_list.length()); 1900 } 1901 1902 // No one else should be accessing the _cleanup_list at this point, 1903 // so it is not necessary to take any locks 1904 while (!_cleanup_list.is_empty()) { 1905 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1906 assert(hr != NULL, "Got NULL from a non-empty list"); 1907 hr->par_clear(); 1908 tmp_free_list.add_ordered(hr); 1909 1910 // Instead of adding one region at a time to the secondary_free_list, 1911 // we accumulate them in the local list and move them a few at a 1912 // time. This also cuts down on the number of notify_all() calls 1913 // we do during this process. We'll also append the local list when 1914 // _cleanup_list is empty (which means we just removed the last 1915 // region from the _cleanup_list). 1916 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1917 _cleanup_list.is_empty()) { 1918 if (G1ConcRegionFreeingVerbose) { 1919 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1920 "appending %u entries to the secondary_free_list, " 1921 "cleanup list still has %u entries", 1922 tmp_free_list.length(), 1923 _cleanup_list.length()); 1924 } 1925 1926 { 1927 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1928 g1h->secondary_free_list_add(&tmp_free_list); 1929 SecondaryFreeList_lock->notify_all(); 1930 } 1931 #ifndef PRODUCT 1932 if (G1StressConcRegionFreeing) { 1933 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1934 os::sleep(Thread::current(), (jlong) 1, false); 1935 } 1936 } 1937 #endif 1938 } 1939 } 1940 assert(tmp_free_list.is_empty(), "post-condition"); 1941 } 1942 1943 // Supporting Object and Oop closures for reference discovery 1944 // and processing in during marking 1945 1946 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1947 HeapWord* addr = (HeapWord*)obj; 1948 return addr != NULL && 1949 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1950 } 1951 1952 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1953 // Uses the CMTask associated with a worker thread (for serial reference 1954 // processing the CMTask for worker 0 is used) to preserve (mark) and 1955 // trace referent objects. 1956 // 1957 // Using the CMTask and embedded local queues avoids having the worker 1958 // threads operating on the global mark stack. This reduces the risk 1959 // of overflowing the stack - which we would rather avoid at this late 1960 // state. Also using the tasks' local queues removes the potential 1961 // of the workers interfering with each other that could occur if 1962 // operating on the global stack. 1963 1964 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1965 ConcurrentMark* _cm; 1966 CMTask* _task; 1967 int _ref_counter_limit; 1968 int _ref_counter; 1969 bool _is_serial; 1970 public: 1971 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1972 _cm(cm), _task(task), _is_serial(is_serial), 1973 _ref_counter_limit(G1RefProcDrainInterval) { 1974 assert(_ref_counter_limit > 0, "sanity"); 1975 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1976 _ref_counter = _ref_counter_limit; 1977 } 1978 1979 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1980 virtual void do_oop( oop* p) { do_oop_work(p); } 1981 1982 template <class T> void do_oop_work(T* p) { 1983 if (!_cm->has_overflown()) { 1984 oop obj = oopDesc::load_decode_heap_oop(p); 1985 _task->deal_with_reference(obj); 1986 _ref_counter--; 1987 1988 if (_ref_counter == 0) { 1989 // We have dealt with _ref_counter_limit references, pushing them 1990 // and objects reachable from them on to the local stack (and 1991 // possibly the global stack). Call CMTask::do_marking_step() to 1992 // process these entries. 1993 // 1994 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1995 // there's nothing more to do (i.e. we're done with the entries that 1996 // were pushed as a result of the CMTask::deal_with_reference() calls 1997 // above) or we overflow. 1998 // 1999 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2000 // flag while there may still be some work to do. (See the comment at 2001 // the beginning of CMTask::do_marking_step() for those conditions - 2002 // one of which is reaching the specified time target.) It is only 2003 // when CMTask::do_marking_step() returns without setting the 2004 // has_aborted() flag that the marking step has completed. 2005 do { 2006 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2007 _task->do_marking_step(mark_step_duration_ms, 2008 false /* do_termination */, 2009 _is_serial); 2010 } while (_task->has_aborted() && !_cm->has_overflown()); 2011 _ref_counter = _ref_counter_limit; 2012 } 2013 } 2014 } 2015 }; 2016 2017 // 'Drain' oop closure used by both serial and parallel reference processing. 2018 // Uses the CMTask associated with a given worker thread (for serial 2019 // reference processing the CMtask for worker 0 is used). Calls the 2020 // do_marking_step routine, with an unbelievably large timeout value, 2021 // to drain the marking data structures of the remaining entries 2022 // added by the 'keep alive' oop closure above. 2023 2024 class G1CMDrainMarkingStackClosure: public VoidClosure { 2025 ConcurrentMark* _cm; 2026 CMTask* _task; 2027 bool _is_serial; 2028 public: 2029 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 2030 _cm(cm), _task(task), _is_serial(is_serial) { 2031 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 2032 } 2033 2034 void do_void() { 2035 do { 2036 // We call CMTask::do_marking_step() to completely drain the local 2037 // and global marking stacks of entries pushed by the 'keep alive' 2038 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 2039 // 2040 // CMTask::do_marking_step() is called in a loop, which we'll exit 2041 // if there's nothing more to do (i.e. we've completely drained the 2042 // entries that were pushed as a a result of applying the 'keep alive' 2043 // closure to the entries on the discovered ref lists) or we overflow 2044 // the global marking stack. 2045 // 2046 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 2047 // flag while there may still be some work to do. (See the comment at 2048 // the beginning of CMTask::do_marking_step() for those conditions - 2049 // one of which is reaching the specified time target.) It is only 2050 // when CMTask::do_marking_step() returns without setting the 2051 // has_aborted() flag that the marking step has completed. 2052 2053 _task->do_marking_step(1000000000.0 /* something very large */, 2054 true /* do_termination */, 2055 _is_serial); 2056 } while (_task->has_aborted() && !_cm->has_overflown()); 2057 } 2058 }; 2059 2060 // Implementation of AbstractRefProcTaskExecutor for parallel 2061 // reference processing at the end of G1 concurrent marking 2062 2063 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2064 private: 2065 G1CollectedHeap* _g1h; 2066 ConcurrentMark* _cm; 2067 WorkGang* _workers; 2068 uint _active_workers; 2069 2070 public: 2071 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2072 ConcurrentMark* cm, 2073 WorkGang* workers, 2074 uint n_workers) : 2075 _g1h(g1h), _cm(cm), 2076 _workers(workers), _active_workers(n_workers) { } 2077 2078 // Executes the given task using concurrent marking worker threads. 2079 virtual void execute(ProcessTask& task); 2080 virtual void execute(EnqueueTask& task); 2081 }; 2082 2083 class G1CMRefProcTaskProxy: public AbstractGangTask { 2084 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2085 ProcessTask& _proc_task; 2086 G1CollectedHeap* _g1h; 2087 ConcurrentMark* _cm; 2088 2089 public: 2090 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2091 G1CollectedHeap* g1h, 2092 ConcurrentMark* cm) : 2093 AbstractGangTask("Process reference objects in parallel"), 2094 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 2095 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 2096 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 2097 } 2098 2099 virtual void work(uint worker_id) { 2100 ResourceMark rm; 2101 HandleMark hm; 2102 CMTask* task = _cm->task(worker_id); 2103 G1CMIsAliveClosure g1_is_alive(_g1h); 2104 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 2105 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 2106 2107 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2108 } 2109 }; 2110 2111 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2112 assert(_workers != NULL, "Need parallel worker threads."); 2113 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2114 2115 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2116 2117 // We need to reset the concurrency level before each 2118 // proxy task execution, so that the termination protocol 2119 // and overflow handling in CMTask::do_marking_step() knows 2120 // how many workers to wait for. 2121 _cm->set_concurrency(_active_workers); 2122 _workers->run_task(&proc_task_proxy); 2123 } 2124 2125 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2126 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2127 EnqueueTask& _enq_task; 2128 2129 public: 2130 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2131 AbstractGangTask("Enqueue reference objects in parallel"), 2132 _enq_task(enq_task) { } 2133 2134 virtual void work(uint worker_id) { 2135 _enq_task.work(worker_id); 2136 } 2137 }; 2138 2139 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2140 assert(_workers != NULL, "Need parallel worker threads."); 2141 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2142 2143 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2144 2145 // Not strictly necessary but... 2146 // 2147 // We need to reset the concurrency level before each 2148 // proxy task execution, so that the termination protocol 2149 // and overflow handling in CMTask::do_marking_step() knows 2150 // how many workers to wait for. 2151 _cm->set_concurrency(_active_workers); 2152 _workers->run_task(&enq_task_proxy); 2153 } 2154 2155 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2156 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2157 } 2158 2159 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2160 if (has_overflown()) { 2161 // Skip processing the discovered references if we have 2162 // overflown the global marking stack. Reference objects 2163 // only get discovered once so it is OK to not 2164 // de-populate the discovered reference lists. We could have, 2165 // but the only benefit would be that, when marking restarts, 2166 // less reference objects are discovered. 2167 return; 2168 } 2169 2170 ResourceMark rm; 2171 HandleMark hm; 2172 2173 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2174 2175 // Is alive closure. 2176 G1CMIsAliveClosure g1_is_alive(g1h); 2177 2178 // Inner scope to exclude the cleaning of the string and symbol 2179 // tables from the displayed time. 2180 { 2181 G1CMTraceTime t("GC ref-proc", G1Log::finer()); 2182 2183 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2184 2185 // See the comment in G1CollectedHeap::ref_processing_init() 2186 // about how reference processing currently works in G1. 2187 2188 // Set the soft reference policy 2189 rp->setup_policy(clear_all_soft_refs); 2190 assert(_markStack.isEmpty(), "mark stack should be empty"); 2191 2192 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2193 // in serial reference processing. Note these closures are also 2194 // used for serially processing (by the the current thread) the 2195 // JNI references during parallel reference processing. 2196 // 2197 // These closures do not need to synchronize with the worker 2198 // threads involved in parallel reference processing as these 2199 // instances are executed serially by the current thread (e.g. 2200 // reference processing is not multi-threaded and is thus 2201 // performed by the current thread instead of a gang worker). 2202 // 2203 // The gang tasks involved in parallel reference processing create 2204 // their own instances of these closures, which do their own 2205 // synchronization among themselves. 2206 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2207 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2208 2209 // We need at least one active thread. If reference processing 2210 // is not multi-threaded we use the current (VMThread) thread, 2211 // otherwise we use the work gang from the G1CollectedHeap and 2212 // we utilize all the worker threads we can. 2213 bool processing_is_mt = rp->processing_is_mt(); 2214 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2215 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2216 2217 // Parallel processing task executor. 2218 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2219 g1h->workers(), active_workers); 2220 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2221 2222 // Set the concurrency level. The phase was already set prior to 2223 // executing the remark task. 2224 set_concurrency(active_workers); 2225 2226 // Set the degree of MT processing here. If the discovery was done MT, 2227 // the number of threads involved during discovery could differ from 2228 // the number of active workers. This is OK as long as the discovered 2229 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2230 rp->set_active_mt_degree(active_workers); 2231 2232 // Process the weak references. 2233 const ReferenceProcessorStats& stats = 2234 rp->process_discovered_references(&g1_is_alive, 2235 &g1_keep_alive, 2236 &g1_drain_mark_stack, 2237 executor, 2238 g1h->gc_timer_cm()); 2239 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2240 2241 // The do_oop work routines of the keep_alive and drain_marking_stack 2242 // oop closures will set the has_overflown flag if we overflow the 2243 // global marking stack. 2244 2245 assert(_markStack.overflow() || _markStack.isEmpty(), 2246 "mark stack should be empty (unless it overflowed)"); 2247 2248 if (_markStack.overflow()) { 2249 // This should have been done already when we tried to push an 2250 // entry on to the global mark stack. But let's do it again. 2251 set_has_overflown(); 2252 } 2253 2254 assert(rp->num_q() == active_workers, "why not"); 2255 2256 rp->enqueue_discovered_references(executor); 2257 2258 rp->verify_no_references_recorded(); 2259 assert(!rp->discovery_enabled(), "Post condition"); 2260 } 2261 2262 if (has_overflown()) { 2263 // We can not trust g1_is_alive if the marking stack overflowed 2264 return; 2265 } 2266 2267 assert(_markStack.isEmpty(), "Marking should have completed"); 2268 2269 // Unload Klasses, String, Symbols, Code Cache, etc. 2270 { 2271 G1CMTraceTime trace("Unloading", G1Log::finer()); 2272 2273 if (ClassUnloadingWithConcurrentMark) { 2274 bool purged_classes; 2275 2276 { 2277 G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest()); 2278 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2279 } 2280 2281 { 2282 G1CMTraceTime trace("Parallel Unloading", G1Log::finest()); 2283 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2284 } 2285 } 2286 2287 if (G1StringDedup::is_enabled()) { 2288 G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest()); 2289 G1StringDedup::unlink(&g1_is_alive); 2290 } 2291 } 2292 } 2293 2294 void ConcurrentMark::swapMarkBitMaps() { 2295 CMBitMapRO* temp = _prevMarkBitMap; 2296 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2297 _nextMarkBitMap = (CMBitMap*) temp; 2298 } 2299 2300 // Closure for marking entries in SATB buffers. 2301 class CMSATBBufferClosure : public SATBBufferClosure { 2302 private: 2303 CMTask* _task; 2304 G1CollectedHeap* _g1h; 2305 2306 // This is very similar to CMTask::deal_with_reference, but with 2307 // more relaxed requirements for the argument, so this must be more 2308 // circumspect about treating the argument as an object. 2309 void do_entry(void* entry) const { 2310 _task->increment_refs_reached(); 2311 HeapRegion* hr = _g1h->heap_region_containing(entry); 2312 if (entry < hr->next_top_at_mark_start()) { 2313 // Until we get here, we don't know whether entry refers to a valid 2314 // object; it could instead have been a stale reference. 2315 oop obj = static_cast<oop>(entry); 2316 assert(obj->is_oop(true /* ignore mark word */), 2317 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2318 _task->make_reference_grey(obj, hr); 2319 } 2320 } 2321 2322 public: 2323 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2324 : _task(task), _g1h(g1h) { } 2325 2326 virtual void do_buffer(void** buffer, size_t size) { 2327 for (size_t i = 0; i < size; ++i) { 2328 do_entry(buffer[i]); 2329 } 2330 } 2331 }; 2332 2333 class G1RemarkThreadsClosure : public ThreadClosure { 2334 CMSATBBufferClosure _cm_satb_cl; 2335 G1CMOopClosure _cm_cl; 2336 MarkingCodeBlobClosure _code_cl; 2337 int _thread_parity; 2338 2339 public: 2340 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2341 _cm_satb_cl(task, g1h), 2342 _cm_cl(g1h, g1h->concurrent_mark(), task), 2343 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2344 _thread_parity(Threads::thread_claim_parity()) {} 2345 2346 void do_thread(Thread* thread) { 2347 if (thread->is_Java_thread()) { 2348 if (thread->claim_oops_do(true, _thread_parity)) { 2349 JavaThread* jt = (JavaThread*)thread; 2350 2351 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2352 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2353 // * Alive if on the stack of an executing method 2354 // * Weakly reachable otherwise 2355 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2356 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2357 jt->nmethods_do(&_code_cl); 2358 2359 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2360 } 2361 } else if (thread->is_VM_thread()) { 2362 if (thread->claim_oops_do(true, _thread_parity)) { 2363 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2364 } 2365 } 2366 } 2367 }; 2368 2369 class CMRemarkTask: public AbstractGangTask { 2370 private: 2371 ConcurrentMark* _cm; 2372 public: 2373 void work(uint worker_id) { 2374 // Since all available tasks are actually started, we should 2375 // only proceed if we're supposed to be active. 2376 if (worker_id < _cm->active_tasks()) { 2377 CMTask* task = _cm->task(worker_id); 2378 task->record_start_time(); 2379 { 2380 ResourceMark rm; 2381 HandleMark hm; 2382 2383 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2384 Threads::threads_do(&threads_f); 2385 } 2386 2387 do { 2388 task->do_marking_step(1000000000.0 /* something very large */, 2389 true /* do_termination */, 2390 false /* is_serial */); 2391 } while (task->has_aborted() && !_cm->has_overflown()); 2392 // If we overflow, then we do not want to restart. We instead 2393 // want to abort remark and do concurrent marking again. 2394 task->record_end_time(); 2395 } 2396 } 2397 2398 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2399 AbstractGangTask("Par Remark"), _cm(cm) { 2400 _cm->terminator()->reset_for_reuse(active_workers); 2401 } 2402 }; 2403 2404 void ConcurrentMark::checkpointRootsFinalWork() { 2405 ResourceMark rm; 2406 HandleMark hm; 2407 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2408 2409 G1CMTraceTime trace("Finalize Marking", G1Log::finer()); 2410 2411 g1h->ensure_parsability(false); 2412 2413 // this is remark, so we'll use up all active threads 2414 uint active_workers = g1h->workers()->active_workers(); 2415 set_concurrency_and_phase(active_workers, false /* concurrent */); 2416 // Leave _parallel_marking_threads at it's 2417 // value originally calculated in the ConcurrentMark 2418 // constructor and pass values of the active workers 2419 // through the gang in the task. 2420 2421 { 2422 StrongRootsScope srs(active_workers); 2423 2424 CMRemarkTask remarkTask(this, active_workers); 2425 // We will start all available threads, even if we decide that the 2426 // active_workers will be fewer. The extra ones will just bail out 2427 // immediately. 2428 g1h->workers()->run_task(&remarkTask); 2429 } 2430 2431 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2432 guarantee(has_overflown() || 2433 satb_mq_set.completed_buffers_num() == 0, 2434 "Invariant: has_overflown = %s, num buffers = %d", 2435 BOOL_TO_STR(has_overflown()), 2436 satb_mq_set.completed_buffers_num()); 2437 2438 print_stats(); 2439 } 2440 2441 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2442 // Note we are overriding the read-only view of the prev map here, via 2443 // the cast. 2444 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2445 } 2446 2447 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2448 _nextMarkBitMap->clearRange(mr); 2449 } 2450 2451 HeapRegion* 2452 ConcurrentMark::claim_region(uint worker_id) { 2453 // "checkpoint" the finger 2454 HeapWord* finger = _finger; 2455 2456 // _heap_end will not change underneath our feet; it only changes at 2457 // yield points. 2458 while (finger < _heap_end) { 2459 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2460 2461 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2462 2463 // Above heap_region_containing may return NULL as we always scan claim 2464 // until the end of the heap. In this case, just jump to the next region. 2465 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2466 2467 // Is the gap between reading the finger and doing the CAS too long? 2468 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2469 if (res == finger && curr_region != NULL) { 2470 // we succeeded 2471 HeapWord* bottom = curr_region->bottom(); 2472 HeapWord* limit = curr_region->next_top_at_mark_start(); 2473 2474 // notice that _finger == end cannot be guaranteed here since, 2475 // someone else might have moved the finger even further 2476 assert(_finger >= end, "the finger should have moved forward"); 2477 2478 if (limit > bottom) { 2479 return curr_region; 2480 } else { 2481 assert(limit == bottom, 2482 "the region limit should be at bottom"); 2483 // we return NULL and the caller should try calling 2484 // claim_region() again. 2485 return NULL; 2486 } 2487 } else { 2488 assert(_finger > finger, "the finger should have moved forward"); 2489 // read it again 2490 finger = _finger; 2491 } 2492 } 2493 2494 return NULL; 2495 } 2496 2497 #ifndef PRODUCT 2498 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2499 private: 2500 G1CollectedHeap* _g1h; 2501 const char* _phase; 2502 int _info; 2503 2504 public: 2505 VerifyNoCSetOops(const char* phase, int info = -1) : 2506 _g1h(G1CollectedHeap::heap()), 2507 _phase(phase), 2508 _info(info) 2509 { } 2510 2511 void operator()(oop obj) const { 2512 guarantee(obj->is_oop(), 2513 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2514 p2i(obj), _phase, _info); 2515 guarantee(!_g1h->obj_in_cs(obj), 2516 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2517 p2i(obj), _phase, _info); 2518 } 2519 }; 2520 2521 void ConcurrentMark::verify_no_cset_oops() { 2522 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2523 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2524 return; 2525 } 2526 2527 // Verify entries on the global mark stack 2528 _markStack.iterate(VerifyNoCSetOops("Stack")); 2529 2530 // Verify entries on the task queues 2531 for (uint i = 0; i < _max_worker_id; ++i) { 2532 CMTaskQueue* queue = _task_queues->queue(i); 2533 queue->iterate(VerifyNoCSetOops("Queue", i)); 2534 } 2535 2536 // Verify the global finger 2537 HeapWord* global_finger = finger(); 2538 if (global_finger != NULL && global_finger < _heap_end) { 2539 // Since we always iterate over all regions, we might get a NULL HeapRegion 2540 // here. 2541 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2542 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2543 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2544 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2545 } 2546 2547 // Verify the task fingers 2548 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2549 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2550 CMTask* task = _tasks[i]; 2551 HeapWord* task_finger = task->finger(); 2552 if (task_finger != NULL && task_finger < _heap_end) { 2553 // See above note on the global finger verification. 2554 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2555 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2556 !task_hr->in_collection_set(), 2557 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2558 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2559 } 2560 } 2561 } 2562 #endif // PRODUCT 2563 2564 // Aggregate the counting data that was constructed concurrently 2565 // with marking. 2566 class AggregateCountDataHRClosure: public HeapRegionClosure { 2567 G1CollectedHeap* _g1h; 2568 ConcurrentMark* _cm; 2569 CardTableModRefBS* _ct_bs; 2570 BitMap* _cm_card_bm; 2571 uint _max_worker_id; 2572 2573 public: 2574 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2575 BitMap* cm_card_bm, 2576 uint max_worker_id) : 2577 _g1h(g1h), _cm(g1h->concurrent_mark()), 2578 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2579 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2580 2581 bool doHeapRegion(HeapRegion* hr) { 2582 HeapWord* start = hr->bottom(); 2583 HeapWord* limit = hr->next_top_at_mark_start(); 2584 HeapWord* end = hr->end(); 2585 2586 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2587 "Preconditions not met - " 2588 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2589 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2590 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2591 2592 assert(hr->next_marked_bytes() == 0, "Precondition"); 2593 2594 if (start == limit) { 2595 // NTAMS of this region has not been set so nothing to do. 2596 return false; 2597 } 2598 2599 // 'start' should be in the heap. 2600 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2601 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2602 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2603 2604 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2605 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2606 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2607 2608 // If ntams is not card aligned then we bump card bitmap index 2609 // for limit so that we get the all the cards spanned by 2610 // the object ending at ntams. 2611 // Note: if this is the last region in the heap then ntams 2612 // could be actually just beyond the end of the the heap; 2613 // limit_idx will then correspond to a (non-existent) card 2614 // that is also outside the heap. 2615 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2616 limit_idx += 1; 2617 } 2618 2619 assert(limit_idx <= end_idx, "or else use atomics"); 2620 2621 // Aggregate the "stripe" in the count data associated with hr. 2622 uint hrm_index = hr->hrm_index(); 2623 size_t marked_bytes = 0; 2624 2625 for (uint i = 0; i < _max_worker_id; i += 1) { 2626 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2627 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2628 2629 // Fetch the marked_bytes in this region for task i and 2630 // add it to the running total for this region. 2631 marked_bytes += marked_bytes_array[hrm_index]; 2632 2633 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2634 // into the global card bitmap. 2635 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2636 2637 while (scan_idx < limit_idx) { 2638 assert(task_card_bm->at(scan_idx) == true, "should be"); 2639 _cm_card_bm->set_bit(scan_idx); 2640 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2641 2642 // BitMap::get_next_one_offset() can handle the case when 2643 // its left_offset parameter is greater than its right_offset 2644 // parameter. It does, however, have an early exit if 2645 // left_offset == right_offset. So let's limit the value 2646 // passed in for left offset here. 2647 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2648 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2649 } 2650 } 2651 2652 // Update the marked bytes for this region. 2653 hr->add_to_marked_bytes(marked_bytes); 2654 2655 // Next heap region 2656 return false; 2657 } 2658 }; 2659 2660 class G1AggregateCountDataTask: public AbstractGangTask { 2661 protected: 2662 G1CollectedHeap* _g1h; 2663 ConcurrentMark* _cm; 2664 BitMap* _cm_card_bm; 2665 uint _max_worker_id; 2666 uint _active_workers; 2667 HeapRegionClaimer _hrclaimer; 2668 2669 public: 2670 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2671 ConcurrentMark* cm, 2672 BitMap* cm_card_bm, 2673 uint max_worker_id, 2674 uint n_workers) : 2675 AbstractGangTask("Count Aggregation"), 2676 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2677 _max_worker_id(max_worker_id), 2678 _active_workers(n_workers), 2679 _hrclaimer(_active_workers) { 2680 } 2681 2682 void work(uint worker_id) { 2683 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2684 2685 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2686 } 2687 }; 2688 2689 2690 void ConcurrentMark::aggregate_count_data() { 2691 uint n_workers = _g1h->workers()->active_workers(); 2692 2693 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2694 _max_worker_id, n_workers); 2695 2696 _g1h->workers()->run_task(&g1_par_agg_task); 2697 } 2698 2699 // Clear the per-worker arrays used to store the per-region counting data 2700 void ConcurrentMark::clear_all_count_data() { 2701 // Clear the global card bitmap - it will be filled during 2702 // liveness count aggregation (during remark) and the 2703 // final counting task. 2704 _card_bm.clear(); 2705 2706 // Clear the global region bitmap - it will be filled as part 2707 // of the final counting task. 2708 _region_bm.clear(); 2709 2710 uint max_regions = _g1h->max_regions(); 2711 assert(_max_worker_id > 0, "uninitialized"); 2712 2713 for (uint i = 0; i < _max_worker_id; i += 1) { 2714 BitMap* task_card_bm = count_card_bitmap_for(i); 2715 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2716 2717 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2718 assert(marked_bytes_array != NULL, "uninitialized"); 2719 2720 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2721 task_card_bm->clear(); 2722 } 2723 } 2724 2725 void ConcurrentMark::print_stats() { 2726 if (G1MarkingVerboseLevel > 0) { 2727 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2728 for (size_t i = 0; i < _active_tasks; ++i) { 2729 _tasks[i]->print_stats(); 2730 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 2731 } 2732 } 2733 } 2734 2735 // abandon current marking iteration due to a Full GC 2736 void ConcurrentMark::abort() { 2737 if (!cmThread()->during_cycle() || _has_aborted) { 2738 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2739 return; 2740 } 2741 2742 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2743 // concurrent bitmap clearing. 2744 _nextMarkBitMap->clearAll(); 2745 2746 // Note we cannot clear the previous marking bitmap here 2747 // since VerifyDuringGC verifies the objects marked during 2748 // a full GC against the previous bitmap. 2749 2750 // Clear the liveness counting data 2751 clear_all_count_data(); 2752 // Empty mark stack 2753 reset_marking_state(); 2754 for (uint i = 0; i < _max_worker_id; ++i) { 2755 _tasks[i]->clear_region_fields(); 2756 } 2757 _first_overflow_barrier_sync.abort(); 2758 _second_overflow_barrier_sync.abort(); 2759 _has_aborted = true; 2760 2761 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2762 satb_mq_set.abandon_partial_marking(); 2763 // This can be called either during or outside marking, we'll read 2764 // the expected_active value from the SATB queue set. 2765 satb_mq_set.set_active_all_threads( 2766 false, /* new active value */ 2767 satb_mq_set.is_active() /* expected_active */); 2768 2769 _g1h->trace_heap_after_concurrent_cycle(); 2770 _g1h->register_concurrent_cycle_end(); 2771 } 2772 2773 static void print_ms_time_info(const char* prefix, const char* name, 2774 NumberSeq& ns) { 2775 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2776 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2777 if (ns.num() > 0) { 2778 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2779 prefix, ns.sd(), ns.maximum()); 2780 } 2781 } 2782 2783 void ConcurrentMark::print_summary_info() { 2784 gclog_or_tty->print_cr(" Concurrent marking:"); 2785 print_ms_time_info(" ", "init marks", _init_times); 2786 print_ms_time_info(" ", "remarks", _remark_times); 2787 { 2788 print_ms_time_info(" ", "final marks", _remark_mark_times); 2789 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2790 2791 } 2792 print_ms_time_info(" ", "cleanups", _cleanup_times); 2793 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2794 _total_counting_time, 2795 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 2796 (double)_cleanup_times.num() 2797 : 0.0)); 2798 if (G1ScrubRemSets) { 2799 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2800 _total_rs_scrub_time, 2801 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 2802 (double)_cleanup_times.num() 2803 : 0.0)); 2804 } 2805 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 2806 (_init_times.sum() + _remark_times.sum() + 2807 _cleanup_times.sum())/1000.0); 2808 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 2809 "(%8.2f s marking).", 2810 cmThread()->vtime_accum(), 2811 cmThread()->vtime_mark_accum()); 2812 } 2813 2814 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2815 _parallel_workers->print_worker_threads_on(st); 2816 } 2817 2818 void ConcurrentMark::print_on_error(outputStream* st) const { 2819 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2820 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2821 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2822 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2823 } 2824 2825 // We take a break if someone is trying to stop the world. 2826 bool ConcurrentMark::do_yield_check(uint worker_id) { 2827 if (SuspendibleThreadSet::should_yield()) { 2828 if (worker_id == 0) { 2829 _g1h->g1_policy()->record_concurrent_pause(); 2830 } 2831 SuspendibleThreadSet::yield(); 2832 return true; 2833 } else { 2834 return false; 2835 } 2836 } 2837 2838 // Closure for iteration over bitmaps 2839 class CMBitMapClosure : public BitMapClosure { 2840 private: 2841 // the bitmap that is being iterated over 2842 CMBitMap* _nextMarkBitMap; 2843 ConcurrentMark* _cm; 2844 CMTask* _task; 2845 2846 public: 2847 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2848 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2849 2850 bool do_bit(size_t offset) { 2851 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2852 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2853 assert( addr < _cm->finger(), "invariant"); 2854 assert(addr >= _task->finger(), "invariant"); 2855 2856 // We move that task's local finger along. 2857 _task->move_finger_to(addr); 2858 2859 _task->scan_object(oop(addr)); 2860 // we only partially drain the local queue and global stack 2861 _task->drain_local_queue(true); 2862 _task->drain_global_stack(true); 2863 2864 // if the has_aborted flag has been raised, we need to bail out of 2865 // the iteration 2866 return !_task->has_aborted(); 2867 } 2868 }; 2869 2870 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2871 ReferenceProcessor* result = NULL; 2872 if (G1UseConcMarkReferenceProcessing) { 2873 result = g1h->ref_processor_cm(); 2874 assert(result != NULL, "should not be NULL"); 2875 } 2876 return result; 2877 } 2878 2879 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2880 ConcurrentMark* cm, 2881 CMTask* task) 2882 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2883 _g1h(g1h), _cm(cm), _task(task) 2884 { } 2885 2886 void CMTask::setup_for_region(HeapRegion* hr) { 2887 assert(hr != NULL, 2888 "claim_region() should have filtered out NULL regions"); 2889 _curr_region = hr; 2890 _finger = hr->bottom(); 2891 update_region_limit(); 2892 } 2893 2894 void CMTask::update_region_limit() { 2895 HeapRegion* hr = _curr_region; 2896 HeapWord* bottom = hr->bottom(); 2897 HeapWord* limit = hr->next_top_at_mark_start(); 2898 2899 if (limit == bottom) { 2900 // The region was collected underneath our feet. 2901 // We set the finger to bottom to ensure that the bitmap 2902 // iteration that will follow this will not do anything. 2903 // (this is not a condition that holds when we set the region up, 2904 // as the region is not supposed to be empty in the first place) 2905 _finger = bottom; 2906 } else if (limit >= _region_limit) { 2907 assert(limit >= _finger, "peace of mind"); 2908 } else { 2909 assert(limit < _region_limit, "only way to get here"); 2910 // This can happen under some pretty unusual circumstances. An 2911 // evacuation pause empties the region underneath our feet (NTAMS 2912 // at bottom). We then do some allocation in the region (NTAMS 2913 // stays at bottom), followed by the region being used as a GC 2914 // alloc region (NTAMS will move to top() and the objects 2915 // originally below it will be grayed). All objects now marked in 2916 // the region are explicitly grayed, if below the global finger, 2917 // and we do not need in fact to scan anything else. So, we simply 2918 // set _finger to be limit to ensure that the bitmap iteration 2919 // doesn't do anything. 2920 _finger = limit; 2921 } 2922 2923 _region_limit = limit; 2924 } 2925 2926 void CMTask::giveup_current_region() { 2927 assert(_curr_region != NULL, "invariant"); 2928 clear_region_fields(); 2929 } 2930 2931 void CMTask::clear_region_fields() { 2932 // Values for these three fields that indicate that we're not 2933 // holding on to a region. 2934 _curr_region = NULL; 2935 _finger = NULL; 2936 _region_limit = NULL; 2937 } 2938 2939 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2940 if (cm_oop_closure == NULL) { 2941 assert(_cm_oop_closure != NULL, "invariant"); 2942 } else { 2943 assert(_cm_oop_closure == NULL, "invariant"); 2944 } 2945 _cm_oop_closure = cm_oop_closure; 2946 } 2947 2948 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2949 guarantee(nextMarkBitMap != NULL, "invariant"); 2950 _nextMarkBitMap = nextMarkBitMap; 2951 clear_region_fields(); 2952 2953 _calls = 0; 2954 _elapsed_time_ms = 0.0; 2955 _termination_time_ms = 0.0; 2956 _termination_start_time_ms = 0.0; 2957 } 2958 2959 bool CMTask::should_exit_termination() { 2960 regular_clock_call(); 2961 // This is called when we are in the termination protocol. We should 2962 // quit if, for some reason, this task wants to abort or the global 2963 // stack is not empty (this means that we can get work from it). 2964 return !_cm->mark_stack_empty() || has_aborted(); 2965 } 2966 2967 void CMTask::reached_limit() { 2968 assert(_words_scanned >= _words_scanned_limit || 2969 _refs_reached >= _refs_reached_limit , 2970 "shouldn't have been called otherwise"); 2971 regular_clock_call(); 2972 } 2973 2974 void CMTask::regular_clock_call() { 2975 if (has_aborted()) return; 2976 2977 // First, we need to recalculate the words scanned and refs reached 2978 // limits for the next clock call. 2979 recalculate_limits(); 2980 2981 // During the regular clock call we do the following 2982 2983 // (1) If an overflow has been flagged, then we abort. 2984 if (_cm->has_overflown()) { 2985 set_has_aborted(); 2986 return; 2987 } 2988 2989 // If we are not concurrent (i.e. we're doing remark) we don't need 2990 // to check anything else. The other steps are only needed during 2991 // the concurrent marking phase. 2992 if (!concurrent()) return; 2993 2994 // (2) If marking has been aborted for Full GC, then we also abort. 2995 if (_cm->has_aborted()) { 2996 set_has_aborted(); 2997 return; 2998 } 2999 3000 double curr_time_ms = os::elapsedVTime() * 1000.0; 3001 3002 // (4) We check whether we should yield. If we have to, then we abort. 3003 if (SuspendibleThreadSet::should_yield()) { 3004 // We should yield. To do this we abort the task. The caller is 3005 // responsible for yielding. 3006 set_has_aborted(); 3007 return; 3008 } 3009 3010 // (5) We check whether we've reached our time quota. If we have, 3011 // then we abort. 3012 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3013 if (elapsed_time_ms > _time_target_ms) { 3014 set_has_aborted(); 3015 _has_timed_out = true; 3016 return; 3017 } 3018 3019 // (6) Finally, we check whether there are enough completed STAB 3020 // buffers available for processing. If there are, we abort. 3021 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3022 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3023 // we do need to process SATB buffers, we'll abort and restart 3024 // the marking task to do so 3025 set_has_aborted(); 3026 return; 3027 } 3028 } 3029 3030 void CMTask::recalculate_limits() { 3031 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3032 _words_scanned_limit = _real_words_scanned_limit; 3033 3034 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3035 _refs_reached_limit = _real_refs_reached_limit; 3036 } 3037 3038 void CMTask::decrease_limits() { 3039 // This is called when we believe that we're going to do an infrequent 3040 // operation which will increase the per byte scanned cost (i.e. move 3041 // entries to/from the global stack). It basically tries to decrease the 3042 // scanning limit so that the clock is called earlier. 3043 3044 _words_scanned_limit = _real_words_scanned_limit - 3045 3 * words_scanned_period / 4; 3046 _refs_reached_limit = _real_refs_reached_limit - 3047 3 * refs_reached_period / 4; 3048 } 3049 3050 void CMTask::move_entries_to_global_stack() { 3051 // local array where we'll store the entries that will be popped 3052 // from the local queue 3053 oop buffer[global_stack_transfer_size]; 3054 3055 int n = 0; 3056 oop obj; 3057 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3058 buffer[n] = obj; 3059 ++n; 3060 } 3061 3062 if (n > 0) { 3063 // we popped at least one entry from the local queue 3064 3065 if (!_cm->mark_stack_push(buffer, n)) { 3066 set_has_aborted(); 3067 } 3068 } 3069 3070 // this operation was quite expensive, so decrease the limits 3071 decrease_limits(); 3072 } 3073 3074 void CMTask::get_entries_from_global_stack() { 3075 // local array where we'll store the entries that will be popped 3076 // from the global stack. 3077 oop buffer[global_stack_transfer_size]; 3078 int n; 3079 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3080 assert(n <= global_stack_transfer_size, 3081 "we should not pop more than the given limit"); 3082 if (n > 0) { 3083 // yes, we did actually pop at least one entry 3084 for (int i = 0; i < n; ++i) { 3085 bool success = _task_queue->push(buffer[i]); 3086 // We only call this when the local queue is empty or under a 3087 // given target limit. So, we do not expect this push to fail. 3088 assert(success, "invariant"); 3089 } 3090 } 3091 3092 // this operation was quite expensive, so decrease the limits 3093 decrease_limits(); 3094 } 3095 3096 void CMTask::drain_local_queue(bool partially) { 3097 if (has_aborted()) return; 3098 3099 // Decide what the target size is, depending whether we're going to 3100 // drain it partially (so that other tasks can steal if they run out 3101 // of things to do) or totally (at the very end). 3102 size_t target_size; 3103 if (partially) { 3104 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3105 } else { 3106 target_size = 0; 3107 } 3108 3109 if (_task_queue->size() > target_size) { 3110 oop obj; 3111 bool ret = _task_queue->pop_local(obj); 3112 while (ret) { 3113 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3114 assert(!_g1h->is_on_master_free_list( 3115 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3116 3117 scan_object(obj); 3118 3119 if (_task_queue->size() <= target_size || has_aborted()) { 3120 ret = false; 3121 } else { 3122 ret = _task_queue->pop_local(obj); 3123 } 3124 } 3125 } 3126 } 3127 3128 void CMTask::drain_global_stack(bool partially) { 3129 if (has_aborted()) return; 3130 3131 // We have a policy to drain the local queue before we attempt to 3132 // drain the global stack. 3133 assert(partially || _task_queue->size() == 0, "invariant"); 3134 3135 // Decide what the target size is, depending whether we're going to 3136 // drain it partially (so that other tasks can steal if they run out 3137 // of things to do) or totally (at the very end). Notice that, 3138 // because we move entries from the global stack in chunks or 3139 // because another task might be doing the same, we might in fact 3140 // drop below the target. But, this is not a problem. 3141 size_t target_size; 3142 if (partially) { 3143 target_size = _cm->partial_mark_stack_size_target(); 3144 } else { 3145 target_size = 0; 3146 } 3147 3148 if (_cm->mark_stack_size() > target_size) { 3149 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3150 get_entries_from_global_stack(); 3151 drain_local_queue(partially); 3152 } 3153 } 3154 } 3155 3156 // SATB Queue has several assumptions on whether to call the par or 3157 // non-par versions of the methods. this is why some of the code is 3158 // replicated. We should really get rid of the single-threaded version 3159 // of the code to simplify things. 3160 void CMTask::drain_satb_buffers() { 3161 if (has_aborted()) return; 3162 3163 // We set this so that the regular clock knows that we're in the 3164 // middle of draining buffers and doesn't set the abort flag when it 3165 // notices that SATB buffers are available for draining. It'd be 3166 // very counter productive if it did that. :-) 3167 _draining_satb_buffers = true; 3168 3169 CMSATBBufferClosure satb_cl(this, _g1h); 3170 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3171 3172 // This keeps claiming and applying the closure to completed buffers 3173 // until we run out of buffers or we need to abort. 3174 while (!has_aborted() && 3175 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3176 regular_clock_call(); 3177 } 3178 3179 _draining_satb_buffers = false; 3180 3181 assert(has_aborted() || 3182 concurrent() || 3183 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3184 3185 // again, this was a potentially expensive operation, decrease the 3186 // limits to get the regular clock call early 3187 decrease_limits(); 3188 } 3189 3190 void CMTask::print_stats() { 3191 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", 3192 _worker_id, _calls); 3193 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3194 _elapsed_time_ms, _termination_time_ms); 3195 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3196 _step_times_ms.num(), _step_times_ms.avg(), 3197 _step_times_ms.sd()); 3198 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3199 _step_times_ms.maximum(), _step_times_ms.sum()); 3200 } 3201 3202 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3203 return _task_queues->steal(worker_id, hash_seed, obj); 3204 } 3205 3206 /***************************************************************************** 3207 3208 The do_marking_step(time_target_ms, ...) method is the building 3209 block of the parallel marking framework. It can be called in parallel 3210 with other invocations of do_marking_step() on different tasks 3211 (but only one per task, obviously) and concurrently with the 3212 mutator threads, or during remark, hence it eliminates the need 3213 for two versions of the code. When called during remark, it will 3214 pick up from where the task left off during the concurrent marking 3215 phase. Interestingly, tasks are also claimable during evacuation 3216 pauses too, since do_marking_step() ensures that it aborts before 3217 it needs to yield. 3218 3219 The data structures that it uses to do marking work are the 3220 following: 3221 3222 (1) Marking Bitmap. If there are gray objects that appear only 3223 on the bitmap (this happens either when dealing with an overflow 3224 or when the initial marking phase has simply marked the roots 3225 and didn't push them on the stack), then tasks claim heap 3226 regions whose bitmap they then scan to find gray objects. A 3227 global finger indicates where the end of the last claimed region 3228 is. A local finger indicates how far into the region a task has 3229 scanned. The two fingers are used to determine how to gray an 3230 object (i.e. whether simply marking it is OK, as it will be 3231 visited by a task in the future, or whether it needs to be also 3232 pushed on a stack). 3233 3234 (2) Local Queue. The local queue of the task which is accessed 3235 reasonably efficiently by the task. Other tasks can steal from 3236 it when they run out of work. Throughout the marking phase, a 3237 task attempts to keep its local queue short but not totally 3238 empty, so that entries are available for stealing by other 3239 tasks. Only when there is no more work, a task will totally 3240 drain its local queue. 3241 3242 (3) Global Mark Stack. This handles local queue overflow. During 3243 marking only sets of entries are moved between it and the local 3244 queues, as access to it requires a mutex and more fine-grain 3245 interaction with it which might cause contention. If it 3246 overflows, then the marking phase should restart and iterate 3247 over the bitmap to identify gray objects. Throughout the marking 3248 phase, tasks attempt to keep the global mark stack at a small 3249 length but not totally empty, so that entries are available for 3250 popping by other tasks. Only when there is no more work, tasks 3251 will totally drain the global mark stack. 3252 3253 (4) SATB Buffer Queue. This is where completed SATB buffers are 3254 made available. Buffers are regularly removed from this queue 3255 and scanned for roots, so that the queue doesn't get too 3256 long. During remark, all completed buffers are processed, as 3257 well as the filled in parts of any uncompleted buffers. 3258 3259 The do_marking_step() method tries to abort when the time target 3260 has been reached. There are a few other cases when the 3261 do_marking_step() method also aborts: 3262 3263 (1) When the marking phase has been aborted (after a Full GC). 3264 3265 (2) When a global overflow (on the global stack) has been 3266 triggered. Before the task aborts, it will actually sync up with 3267 the other tasks to ensure that all the marking data structures 3268 (local queues, stacks, fingers etc.) are re-initialized so that 3269 when do_marking_step() completes, the marking phase can 3270 immediately restart. 3271 3272 (3) When enough completed SATB buffers are available. The 3273 do_marking_step() method only tries to drain SATB buffers right 3274 at the beginning. So, if enough buffers are available, the 3275 marking step aborts and the SATB buffers are processed at 3276 the beginning of the next invocation. 3277 3278 (4) To yield. when we have to yield then we abort and yield 3279 right at the end of do_marking_step(). This saves us from a lot 3280 of hassle as, by yielding we might allow a Full GC. If this 3281 happens then objects will be compacted underneath our feet, the 3282 heap might shrink, etc. We save checking for this by just 3283 aborting and doing the yield right at the end. 3284 3285 From the above it follows that the do_marking_step() method should 3286 be called in a loop (or, otherwise, regularly) until it completes. 3287 3288 If a marking step completes without its has_aborted() flag being 3289 true, it means it has completed the current marking phase (and 3290 also all other marking tasks have done so and have all synced up). 3291 3292 A method called regular_clock_call() is invoked "regularly" (in 3293 sub ms intervals) throughout marking. It is this clock method that 3294 checks all the abort conditions which were mentioned above and 3295 decides when the task should abort. A work-based scheme is used to 3296 trigger this clock method: when the number of object words the 3297 marking phase has scanned or the number of references the marking 3298 phase has visited reach a given limit. Additional invocations to 3299 the method clock have been planted in a few other strategic places 3300 too. The initial reason for the clock method was to avoid calling 3301 vtime too regularly, as it is quite expensive. So, once it was in 3302 place, it was natural to piggy-back all the other conditions on it 3303 too and not constantly check them throughout the code. 3304 3305 If do_termination is true then do_marking_step will enter its 3306 termination protocol. 3307 3308 The value of is_serial must be true when do_marking_step is being 3309 called serially (i.e. by the VMThread) and do_marking_step should 3310 skip any synchronization in the termination and overflow code. 3311 Examples include the serial remark code and the serial reference 3312 processing closures. 3313 3314 The value of is_serial must be false when do_marking_step is 3315 being called by any of the worker threads in a work gang. 3316 Examples include the concurrent marking code (CMMarkingTask), 3317 the MT remark code, and the MT reference processing closures. 3318 3319 *****************************************************************************/ 3320 3321 void CMTask::do_marking_step(double time_target_ms, 3322 bool do_termination, 3323 bool is_serial) { 3324 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3325 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3326 3327 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3328 assert(_task_queues != NULL, "invariant"); 3329 assert(_task_queue != NULL, "invariant"); 3330 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3331 3332 assert(!_claimed, 3333 "only one thread should claim this task at any one time"); 3334 3335 // OK, this doesn't safeguard again all possible scenarios, as it is 3336 // possible for two threads to set the _claimed flag at the same 3337 // time. But it is only for debugging purposes anyway and it will 3338 // catch most problems. 3339 _claimed = true; 3340 3341 _start_time_ms = os::elapsedVTime() * 1000.0; 3342 3343 // If do_stealing is true then do_marking_step will attempt to 3344 // steal work from the other CMTasks. It only makes sense to 3345 // enable stealing when the termination protocol is enabled 3346 // and do_marking_step() is not being called serially. 3347 bool do_stealing = do_termination && !is_serial; 3348 3349 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3350 _time_target_ms = time_target_ms - diff_prediction_ms; 3351 3352 // set up the variables that are used in the work-based scheme to 3353 // call the regular clock method 3354 _words_scanned = 0; 3355 _refs_reached = 0; 3356 recalculate_limits(); 3357 3358 // clear all flags 3359 clear_has_aborted(); 3360 _has_timed_out = false; 3361 _draining_satb_buffers = false; 3362 3363 ++_calls; 3364 3365 // Set up the bitmap and oop closures. Anything that uses them is 3366 // eventually called from this method, so it is OK to allocate these 3367 // statically. 3368 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3369 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3370 set_cm_oop_closure(&cm_oop_closure); 3371 3372 if (_cm->has_overflown()) { 3373 // This can happen if the mark stack overflows during a GC pause 3374 // and this task, after a yield point, restarts. We have to abort 3375 // as we need to get into the overflow protocol which happens 3376 // right at the end of this task. 3377 set_has_aborted(); 3378 } 3379 3380 // First drain any available SATB buffers. After this, we will not 3381 // look at SATB buffers before the next invocation of this method. 3382 // If enough completed SATB buffers are queued up, the regular clock 3383 // will abort this task so that it restarts. 3384 drain_satb_buffers(); 3385 // ...then partially drain the local queue and the global stack 3386 drain_local_queue(true); 3387 drain_global_stack(true); 3388 3389 do { 3390 if (!has_aborted() && _curr_region != NULL) { 3391 // This means that we're already holding on to a region. 3392 assert(_finger != NULL, "if region is not NULL, then the finger " 3393 "should not be NULL either"); 3394 3395 // We might have restarted this task after an evacuation pause 3396 // which might have evacuated the region we're holding on to 3397 // underneath our feet. Let's read its limit again to make sure 3398 // that we do not iterate over a region of the heap that 3399 // contains garbage (update_region_limit() will also move 3400 // _finger to the start of the region if it is found empty). 3401 update_region_limit(); 3402 // We will start from _finger not from the start of the region, 3403 // as we might be restarting this task after aborting half-way 3404 // through scanning this region. In this case, _finger points to 3405 // the address where we last found a marked object. If this is a 3406 // fresh region, _finger points to start(). 3407 MemRegion mr = MemRegion(_finger, _region_limit); 3408 3409 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3410 "humongous regions should go around loop once only"); 3411 3412 // Some special cases: 3413 // If the memory region is empty, we can just give up the region. 3414 // If the current region is humongous then we only need to check 3415 // the bitmap for the bit associated with the start of the object, 3416 // scan the object if it's live, and give up the region. 3417 // Otherwise, let's iterate over the bitmap of the part of the region 3418 // that is left. 3419 // If the iteration is successful, give up the region. 3420 if (mr.is_empty()) { 3421 giveup_current_region(); 3422 regular_clock_call(); 3423 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3424 if (_nextMarkBitMap->isMarked(mr.start())) { 3425 // The object is marked - apply the closure 3426 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3427 bitmap_closure.do_bit(offset); 3428 } 3429 // Even if this task aborted while scanning the humongous object 3430 // we can (and should) give up the current region. 3431 giveup_current_region(); 3432 regular_clock_call(); 3433 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3434 giveup_current_region(); 3435 regular_clock_call(); 3436 } else { 3437 assert(has_aborted(), "currently the only way to do so"); 3438 // The only way to abort the bitmap iteration is to return 3439 // false from the do_bit() method. However, inside the 3440 // do_bit() method we move the _finger to point to the 3441 // object currently being looked at. So, if we bail out, we 3442 // have definitely set _finger to something non-null. 3443 assert(_finger != NULL, "invariant"); 3444 3445 // Region iteration was actually aborted. So now _finger 3446 // points to the address of the object we last scanned. If we 3447 // leave it there, when we restart this task, we will rescan 3448 // the object. It is easy to avoid this. We move the finger by 3449 // enough to point to the next possible object header (the 3450 // bitmap knows by how much we need to move it as it knows its 3451 // granularity). 3452 assert(_finger < _region_limit, "invariant"); 3453 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3454 // Check if bitmap iteration was aborted while scanning the last object 3455 if (new_finger >= _region_limit) { 3456 giveup_current_region(); 3457 } else { 3458 move_finger_to(new_finger); 3459 } 3460 } 3461 } 3462 // At this point we have either completed iterating over the 3463 // region we were holding on to, or we have aborted. 3464 3465 // We then partially drain the local queue and the global stack. 3466 // (Do we really need this?) 3467 drain_local_queue(true); 3468 drain_global_stack(true); 3469 3470 // Read the note on the claim_region() method on why it might 3471 // return NULL with potentially more regions available for 3472 // claiming and why we have to check out_of_regions() to determine 3473 // whether we're done or not. 3474 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3475 // We are going to try to claim a new region. We should have 3476 // given up on the previous one. 3477 // Separated the asserts so that we know which one fires. 3478 assert(_curr_region == NULL, "invariant"); 3479 assert(_finger == NULL, "invariant"); 3480 assert(_region_limit == NULL, "invariant"); 3481 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3482 if (claimed_region != NULL) { 3483 // Yes, we managed to claim one 3484 setup_for_region(claimed_region); 3485 assert(_curr_region == claimed_region, "invariant"); 3486 } 3487 // It is important to call the regular clock here. It might take 3488 // a while to claim a region if, for example, we hit a large 3489 // block of empty regions. So we need to call the regular clock 3490 // method once round the loop to make sure it's called 3491 // frequently enough. 3492 regular_clock_call(); 3493 } 3494 3495 if (!has_aborted() && _curr_region == NULL) { 3496 assert(_cm->out_of_regions(), 3497 "at this point we should be out of regions"); 3498 } 3499 } while ( _curr_region != NULL && !has_aborted()); 3500 3501 if (!has_aborted()) { 3502 // We cannot check whether the global stack is empty, since other 3503 // tasks might be pushing objects to it concurrently. 3504 assert(_cm->out_of_regions(), 3505 "at this point we should be out of regions"); 3506 // Try to reduce the number of available SATB buffers so that 3507 // remark has less work to do. 3508 drain_satb_buffers(); 3509 } 3510 3511 // Since we've done everything else, we can now totally drain the 3512 // local queue and global stack. 3513 drain_local_queue(false); 3514 drain_global_stack(false); 3515 3516 // Attempt at work stealing from other task's queues. 3517 if (do_stealing && !has_aborted()) { 3518 // We have not aborted. This means that we have finished all that 3519 // we could. Let's try to do some stealing... 3520 3521 // We cannot check whether the global stack is empty, since other 3522 // tasks might be pushing objects to it concurrently. 3523 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3524 "only way to reach here"); 3525 while (!has_aborted()) { 3526 oop obj; 3527 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3528 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3529 "any stolen object should be marked"); 3530 scan_object(obj); 3531 3532 // And since we're towards the end, let's totally drain the 3533 // local queue and global stack. 3534 drain_local_queue(false); 3535 drain_global_stack(false); 3536 } else { 3537 break; 3538 } 3539 } 3540 } 3541 3542 // If we are about to wrap up and go into termination, check if we 3543 // should raise the overflow flag. 3544 if (do_termination && !has_aborted()) { 3545 if (_cm->force_overflow()->should_force()) { 3546 _cm->set_has_overflown(); 3547 regular_clock_call(); 3548 } 3549 } 3550 3551 // We still haven't aborted. Now, let's try to get into the 3552 // termination protocol. 3553 if (do_termination && !has_aborted()) { 3554 // We cannot check whether the global stack is empty, since other 3555 // tasks might be concurrently pushing objects on it. 3556 // Separated the asserts so that we know which one fires. 3557 assert(_cm->out_of_regions(), "only way to reach here"); 3558 assert(_task_queue->size() == 0, "only way to reach here"); 3559 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3560 3561 // The CMTask class also extends the TerminatorTerminator class, 3562 // hence its should_exit_termination() method will also decide 3563 // whether to exit the termination protocol or not. 3564 bool finished = (is_serial || 3565 _cm->terminator()->offer_termination(this)); 3566 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3567 _termination_time_ms += 3568 termination_end_time_ms - _termination_start_time_ms; 3569 3570 if (finished) { 3571 // We're all done. 3572 3573 if (_worker_id == 0) { 3574 // let's allow task 0 to do this 3575 if (concurrent()) { 3576 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3577 // we need to set this to false before the next 3578 // safepoint. This way we ensure that the marking phase 3579 // doesn't observe any more heap expansions. 3580 _cm->clear_concurrent_marking_in_progress(); 3581 } 3582 } 3583 3584 // We can now guarantee that the global stack is empty, since 3585 // all other tasks have finished. We separated the guarantees so 3586 // that, if a condition is false, we can immediately find out 3587 // which one. 3588 guarantee(_cm->out_of_regions(), "only way to reach here"); 3589 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3590 guarantee(_task_queue->size() == 0, "only way to reach here"); 3591 guarantee(!_cm->has_overflown(), "only way to reach here"); 3592 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3593 } else { 3594 // Apparently there's more work to do. Let's abort this task. It 3595 // will restart it and we can hopefully find more things to do. 3596 set_has_aborted(); 3597 } 3598 } 3599 3600 // Mainly for debugging purposes to make sure that a pointer to the 3601 // closure which was statically allocated in this frame doesn't 3602 // escape it by accident. 3603 set_cm_oop_closure(NULL); 3604 double end_time_ms = os::elapsedVTime() * 1000.0; 3605 double elapsed_time_ms = end_time_ms - _start_time_ms; 3606 // Update the step history. 3607 _step_times_ms.add(elapsed_time_ms); 3608 3609 if (has_aborted()) { 3610 // The task was aborted for some reason. 3611 if (_has_timed_out) { 3612 double diff_ms = elapsed_time_ms - _time_target_ms; 3613 // Keep statistics of how well we did with respect to hitting 3614 // our target only if we actually timed out (if we aborted for 3615 // other reasons, then the results might get skewed). 3616 _marking_step_diffs_ms.add(diff_ms); 3617 } 3618 3619 if (_cm->has_overflown()) { 3620 // This is the interesting one. We aborted because a global 3621 // overflow was raised. This means we have to restart the 3622 // marking phase and start iterating over regions. However, in 3623 // order to do this we have to make sure that all tasks stop 3624 // what they are doing and re-initialize in a safe manner. We 3625 // will achieve this with the use of two barrier sync points. 3626 3627 if (!is_serial) { 3628 // We only need to enter the sync barrier if being called 3629 // from a parallel context 3630 _cm->enter_first_sync_barrier(_worker_id); 3631 3632 // When we exit this sync barrier we know that all tasks have 3633 // stopped doing marking work. So, it's now safe to 3634 // re-initialize our data structures. At the end of this method, 3635 // task 0 will clear the global data structures. 3636 } 3637 3638 // We clear the local state of this task... 3639 clear_region_fields(); 3640 3641 if (!is_serial) { 3642 // ...and enter the second barrier. 3643 _cm->enter_second_sync_barrier(_worker_id); 3644 } 3645 // At this point, if we're during the concurrent phase of 3646 // marking, everything has been re-initialized and we're 3647 // ready to restart. 3648 } 3649 } 3650 3651 _claimed = false; 3652 } 3653 3654 CMTask::CMTask(uint worker_id, 3655 ConcurrentMark* cm, 3656 size_t* marked_bytes, 3657 BitMap* card_bm, 3658 CMTaskQueue* task_queue, 3659 CMTaskQueueSet* task_queues) 3660 : _g1h(G1CollectedHeap::heap()), 3661 _worker_id(worker_id), _cm(cm), 3662 _claimed(false), 3663 _nextMarkBitMap(NULL), _hash_seed(17), 3664 _task_queue(task_queue), 3665 _task_queues(task_queues), 3666 _cm_oop_closure(NULL), 3667 _marked_bytes_array(marked_bytes), 3668 _card_bm(card_bm) { 3669 guarantee(task_queue != NULL, "invariant"); 3670 guarantee(task_queues != NULL, "invariant"); 3671 3672 _marking_step_diffs_ms.add(0.5); 3673 } 3674 3675 // These are formatting macros that are used below to ensure 3676 // consistent formatting. The *_H_* versions are used to format the 3677 // header for a particular value and they should be kept consistent 3678 // with the corresponding macro. Also note that most of the macros add 3679 // the necessary white space (as a prefix) which makes them a bit 3680 // easier to compose. 3681 3682 // All the output lines are prefixed with this string to be able to 3683 // identify them easily in a large log file. 3684 #define G1PPRL_LINE_PREFIX "###" 3685 3686 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3687 #ifdef _LP64 3688 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3689 #else // _LP64 3690 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3691 #endif // _LP64 3692 3693 // For per-region info 3694 #define G1PPRL_TYPE_FORMAT " %-4s" 3695 #define G1PPRL_TYPE_H_FORMAT " %4s" 3696 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3697 #define G1PPRL_BYTE_H_FORMAT " %9s" 3698 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3699 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3700 3701 // For summary info 3702 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3703 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3704 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3705 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3706 3707 G1PrintRegionLivenessInfoClosure:: 3708 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 3709 : _out(out), 3710 _total_used_bytes(0), _total_capacity_bytes(0), 3711 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3712 _hum_used_bytes(0), _hum_capacity_bytes(0), 3713 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3714 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3715 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3716 MemRegion g1_reserved = g1h->g1_reserved(); 3717 double now = os::elapsedTime(); 3718 3719 // Print the header of the output. 3720 _out->cr(); 3721 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3722 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 3723 G1PPRL_SUM_ADDR_FORMAT("reserved") 3724 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3725 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3726 HeapRegion::GrainBytes); 3727 _out->print_cr(G1PPRL_LINE_PREFIX); 3728 _out->print_cr(G1PPRL_LINE_PREFIX 3729 G1PPRL_TYPE_H_FORMAT 3730 G1PPRL_ADDR_BASE_H_FORMAT 3731 G1PPRL_BYTE_H_FORMAT 3732 G1PPRL_BYTE_H_FORMAT 3733 G1PPRL_BYTE_H_FORMAT 3734 G1PPRL_DOUBLE_H_FORMAT 3735 G1PPRL_BYTE_H_FORMAT 3736 G1PPRL_BYTE_H_FORMAT, 3737 "type", "address-range", 3738 "used", "prev-live", "next-live", "gc-eff", 3739 "remset", "code-roots"); 3740 _out->print_cr(G1PPRL_LINE_PREFIX 3741 G1PPRL_TYPE_H_FORMAT 3742 G1PPRL_ADDR_BASE_H_FORMAT 3743 G1PPRL_BYTE_H_FORMAT 3744 G1PPRL_BYTE_H_FORMAT 3745 G1PPRL_BYTE_H_FORMAT 3746 G1PPRL_DOUBLE_H_FORMAT 3747 G1PPRL_BYTE_H_FORMAT 3748 G1PPRL_BYTE_H_FORMAT, 3749 "", "", 3750 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3751 "(bytes)", "(bytes)"); 3752 } 3753 3754 // It takes as a parameter a reference to one of the _hum_* fields, it 3755 // deduces the corresponding value for a region in a humongous region 3756 // series (either the region size, or what's left if the _hum_* field 3757 // is < the region size), and updates the _hum_* field accordingly. 3758 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3759 size_t bytes = 0; 3760 // The > 0 check is to deal with the prev and next live bytes which 3761 // could be 0. 3762 if (*hum_bytes > 0) { 3763 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3764 *hum_bytes -= bytes; 3765 } 3766 return bytes; 3767 } 3768 3769 // It deduces the values for a region in a humongous region series 3770 // from the _hum_* fields and updates those accordingly. It assumes 3771 // that that _hum_* fields have already been set up from the "starts 3772 // humongous" region and we visit the regions in address order. 3773 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3774 size_t* capacity_bytes, 3775 size_t* prev_live_bytes, 3776 size_t* next_live_bytes) { 3777 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3778 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3779 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3780 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3781 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3782 } 3783 3784 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3785 const char* type = r->get_type_str(); 3786 HeapWord* bottom = r->bottom(); 3787 HeapWord* end = r->end(); 3788 size_t capacity_bytes = r->capacity(); 3789 size_t used_bytes = r->used(); 3790 size_t prev_live_bytes = r->live_bytes(); 3791 size_t next_live_bytes = r->next_live_bytes(); 3792 double gc_eff = r->gc_efficiency(); 3793 size_t remset_bytes = r->rem_set()->mem_size(); 3794 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3795 3796 if (r->is_starts_humongous()) { 3797 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3798 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3799 "they should have been zeroed after the last time we used them"); 3800 // Set up the _hum_* fields. 3801 _hum_capacity_bytes = capacity_bytes; 3802 _hum_used_bytes = used_bytes; 3803 _hum_prev_live_bytes = prev_live_bytes; 3804 _hum_next_live_bytes = next_live_bytes; 3805 get_hum_bytes(&used_bytes, &capacity_bytes, 3806 &prev_live_bytes, &next_live_bytes); 3807 end = bottom + HeapRegion::GrainWords; 3808 } else if (r->is_continues_humongous()) { 3809 get_hum_bytes(&used_bytes, &capacity_bytes, 3810 &prev_live_bytes, &next_live_bytes); 3811 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3812 } 3813 3814 _total_used_bytes += used_bytes; 3815 _total_capacity_bytes += capacity_bytes; 3816 _total_prev_live_bytes += prev_live_bytes; 3817 _total_next_live_bytes += next_live_bytes; 3818 _total_remset_bytes += remset_bytes; 3819 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3820 3821 // Print a line for this particular region. 3822 _out->print_cr(G1PPRL_LINE_PREFIX 3823 G1PPRL_TYPE_FORMAT 3824 G1PPRL_ADDR_BASE_FORMAT 3825 G1PPRL_BYTE_FORMAT 3826 G1PPRL_BYTE_FORMAT 3827 G1PPRL_BYTE_FORMAT 3828 G1PPRL_DOUBLE_FORMAT 3829 G1PPRL_BYTE_FORMAT 3830 G1PPRL_BYTE_FORMAT, 3831 type, p2i(bottom), p2i(end), 3832 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3833 remset_bytes, strong_code_roots_bytes); 3834 3835 return false; 3836 } 3837 3838 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3839 // add static memory usages to remembered set sizes 3840 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3841 // Print the footer of the output. 3842 _out->print_cr(G1PPRL_LINE_PREFIX); 3843 _out->print_cr(G1PPRL_LINE_PREFIX 3844 " SUMMARY" 3845 G1PPRL_SUM_MB_FORMAT("capacity") 3846 G1PPRL_SUM_MB_PERC_FORMAT("used") 3847 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3848 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3849 G1PPRL_SUM_MB_FORMAT("remset") 3850 G1PPRL_SUM_MB_FORMAT("code-roots"), 3851 bytes_to_mb(_total_capacity_bytes), 3852 bytes_to_mb(_total_used_bytes), 3853 perc(_total_used_bytes, _total_capacity_bytes), 3854 bytes_to_mb(_total_prev_live_bytes), 3855 perc(_total_prev_live_bytes, _total_capacity_bytes), 3856 bytes_to_mb(_total_next_live_bytes), 3857 perc(_total_next_live_bytes, _total_capacity_bytes), 3858 bytes_to_mb(_total_remset_bytes), 3859 bytes_to_mb(_total_strong_code_roots_bytes)); 3860 _out->cr(); 3861 }