1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorPolicy.hpp" 32 #include "gc/g1/g1CollectorState.hpp" 33 #include "gc/g1/g1ConcurrentMark.inline.hpp" 34 #include "gc/g1/g1HeapVerifier.hpp" 35 #include "gc/g1/g1OopClosures.inline.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionRemSet.hpp" 39 #include "gc/g1/heapRegionSet.inline.hpp" 40 #include "gc/g1/suspendibleThreadSet.hpp" 41 #include "gc/shared/gcId.hpp" 42 #include "gc/shared/gcTimer.hpp" 43 #include "gc/shared/gcTrace.hpp" 44 #include "gc/shared/gcTraceTime.inline.hpp" 45 #include "gc/shared/genOopClosures.inline.hpp" 46 #include "gc/shared/referencePolicy.hpp" 47 #include "gc/shared/strongRootsScope.hpp" 48 #include "gc/shared/taskqueue.inline.hpp" 49 #include "gc/shared/vmGCOperations.hpp" 50 #include "logging/log.hpp" 51 #include "memory/allocation.hpp" 52 #include "memory/resourceArea.hpp" 53 #include "oops/oop.inline.hpp" 54 #include "runtime/atomic.inline.hpp" 55 #include "runtime/handles.inline.hpp" 56 #include "runtime/java.hpp" 57 #include "runtime/prefetch.inline.hpp" 58 #include "services/memTracker.hpp" 59 60 // Concurrent marking bit map wrapper 61 62 G1CMBitMapRO::G1CMBitMapRO(int shifter) : 63 _bm(), 64 _shifter(shifter) { 65 _bmStartWord = 0; 66 _bmWordSize = 0; 67 } 68 69 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 70 const HeapWord* limit) const { 71 // First we must round addr *up* to a possible object boundary. 72 addr = (HeapWord*)align_size_up((intptr_t)addr, 73 HeapWordSize << _shifter); 74 size_t addrOffset = heapWordToOffset(addr); 75 assert(limit != NULL, "limit must not be NULL"); 76 size_t limitOffset = heapWordToOffset(limit); 77 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 78 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 79 assert(nextAddr >= addr, "get_next_one postcondition"); 80 assert(nextAddr == limit || isMarked(nextAddr), 81 "get_next_one postcondition"); 82 return nextAddr; 83 } 84 85 #ifndef PRODUCT 86 bool G1CMBitMapRO::covers(MemRegion heap_rs) const { 87 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 88 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 89 "size inconsistency"); 90 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 91 _bmWordSize == heap_rs.word_size(); 92 } 93 #endif 94 95 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 96 _bm.print_on_error(st, prefix); 97 } 98 99 size_t G1CMBitMap::compute_size(size_t heap_size) { 100 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 101 } 102 103 size_t G1CMBitMap::mark_distance() { 104 return MinObjAlignmentInBytes * BitsPerByte; 105 } 106 107 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 108 _bmStartWord = heap.start(); 109 _bmWordSize = heap.word_size(); 110 111 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 112 _bm.set_size(_bmWordSize >> _shifter); 113 114 storage->set_mapping_changed_listener(&_listener); 115 } 116 117 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 118 if (zero_filled) { 119 return; 120 } 121 // We need to clear the bitmap on commit, removing any existing information. 122 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 123 _bm->clearRange(mr); 124 } 125 126 // Closure used for clearing the given mark bitmap. 127 class ClearBitmapHRClosure : public HeapRegionClosure { 128 private: 129 G1ConcurrentMark* _cm; 130 G1CMBitMap* _bitmap; 131 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 132 public: 133 ClearBitmapHRClosure(G1ConcurrentMark* cm, G1CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 134 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 135 } 136 137 virtual bool doHeapRegion(HeapRegion* r) { 138 size_t const chunk_size_in_words = M / HeapWordSize; 139 140 HeapWord* cur = r->bottom(); 141 HeapWord* const end = r->end(); 142 143 while (cur < end) { 144 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 145 _bitmap->clearRange(mr); 146 147 cur += chunk_size_in_words; 148 149 // Abort iteration if after yielding the marking has been aborted. 150 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 151 return true; 152 } 153 // Repeat the asserts from before the start of the closure. We will do them 154 // as asserts here to minimize their overhead on the product. However, we 155 // will have them as guarantees at the beginning / end of the bitmap 156 // clearing to get some checking in the product. 157 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 158 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 159 } 160 161 return false; 162 } 163 }; 164 165 class ParClearNextMarkBitmapTask : public AbstractGangTask { 166 ClearBitmapHRClosure* _cl; 167 HeapRegionClaimer _hrclaimer; 168 bool _suspendible; // If the task is suspendible, workers must join the STS. 169 170 public: 171 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 172 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 173 174 void work(uint worker_id) { 175 SuspendibleThreadSetJoiner sts_join(_suspendible); 176 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 177 } 178 }; 179 180 void G1CMBitMap::clearAll() { 181 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 182 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 183 uint n_workers = g1h->workers()->active_workers(); 184 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 185 g1h->workers()->run_task(&task); 186 guarantee(cl.complete(), "Must have completed iteration."); 187 return; 188 } 189 190 void G1CMBitMap::clearRange(MemRegion mr) { 191 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 192 assert(!mr.is_empty(), "unexpected empty region"); 193 // convert address range into offset range 194 _bm.at_put_range(heapWordToOffset(mr.start()), 195 heapWordToOffset(mr.end()), false); 196 } 197 198 G1CMMarkStack::G1CMMarkStack(G1ConcurrentMark* cm) : 199 _base(NULL), _cm(cm) 200 {} 201 202 bool G1CMMarkStack::allocate(size_t capacity) { 203 // allocate a stack of the requisite depth 204 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 205 if (!rs.is_reserved()) { 206 warning("ConcurrentMark MarkStack allocation failure"); 207 return false; 208 } 209 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 210 if (!_virtual_space.initialize(rs, rs.size())) { 211 warning("ConcurrentMark MarkStack backing store failure"); 212 // Release the virtual memory reserved for the marking stack 213 rs.release(); 214 return false; 215 } 216 assert(_virtual_space.committed_size() == rs.size(), 217 "Didn't reserve backing store for all of G1ConcurrentMark stack?"); 218 _base = (oop*) _virtual_space.low(); 219 setEmpty(); 220 _capacity = (jint) capacity; 221 _saved_index = -1; 222 _should_expand = false; 223 return true; 224 } 225 226 void G1CMMarkStack::expand() { 227 // Called, during remark, if we've overflown the marking stack during marking. 228 assert(isEmpty(), "stack should been emptied while handling overflow"); 229 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 230 // Clear expansion flag 231 _should_expand = false; 232 if (_capacity == (jint) MarkStackSizeMax) { 233 log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit"); 234 return; 235 } 236 // Double capacity if possible 237 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 238 // Do not give up existing stack until we have managed to 239 // get the double capacity that we desired. 240 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 241 sizeof(oop))); 242 if (rs.is_reserved()) { 243 // Release the backing store associated with old stack 244 _virtual_space.release(); 245 // Reinitialize virtual space for new stack 246 if (!_virtual_space.initialize(rs, rs.size())) { 247 fatal("Not enough swap for expanded marking stack capacity"); 248 } 249 _base = (oop*)(_virtual_space.low()); 250 _index = 0; 251 _capacity = new_capacity; 252 } else { 253 // Failed to double capacity, continue; 254 log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K", 255 _capacity / K, new_capacity / K); 256 } 257 } 258 259 void G1CMMarkStack::set_should_expand() { 260 // If we're resetting the marking state because of an 261 // marking stack overflow, record that we should, if 262 // possible, expand the stack. 263 _should_expand = _cm->has_overflown(); 264 } 265 266 G1CMMarkStack::~G1CMMarkStack() { 267 if (_base != NULL) { 268 _base = NULL; 269 _virtual_space.release(); 270 } 271 } 272 273 void G1CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 274 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 275 jint start = _index; 276 jint next_index = start + n; 277 if (next_index > _capacity) { 278 _overflow = true; 279 return; 280 } 281 // Otherwise. 282 _index = next_index; 283 for (int i = 0; i < n; i++) { 284 int ind = start + i; 285 assert(ind < _capacity, "By overflow test above."); 286 _base[ind] = ptr_arr[i]; 287 } 288 } 289 290 bool G1CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 291 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 292 jint index = _index; 293 if (index == 0) { 294 *n = 0; 295 return false; 296 } else { 297 int k = MIN2(max, index); 298 jint new_ind = index - k; 299 for (int j = 0; j < k; j++) { 300 ptr_arr[j] = _base[new_ind + j]; 301 } 302 _index = new_ind; 303 *n = k; 304 return true; 305 } 306 } 307 308 void G1CMMarkStack::note_start_of_gc() { 309 assert(_saved_index == -1, 310 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 311 _saved_index = _index; 312 } 313 314 void G1CMMarkStack::note_end_of_gc() { 315 // This is intentionally a guarantee, instead of an assert. If we 316 // accidentally add something to the mark stack during GC, it 317 // will be a correctness issue so it's better if we crash. we'll 318 // only check this once per GC anyway, so it won't be a performance 319 // issue in any way. 320 guarantee(_saved_index == _index, 321 "saved index: %d index: %d", _saved_index, _index); 322 _saved_index = -1; 323 } 324 325 G1CMRootRegions::G1CMRootRegions() : 326 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 327 _should_abort(false), _next_survivor(NULL) { } 328 329 void G1CMRootRegions::init(G1CollectedHeap* g1h, G1ConcurrentMark* cm) { 330 _young_list = g1h->young_list(); 331 _cm = cm; 332 } 333 334 void G1CMRootRegions::prepare_for_scan() { 335 assert(!scan_in_progress(), "pre-condition"); 336 337 // Currently, only survivors can be root regions. 338 assert(_next_survivor == NULL, "pre-condition"); 339 _next_survivor = _young_list->first_survivor_region(); 340 _scan_in_progress = (_next_survivor != NULL); 341 _should_abort = false; 342 } 343 344 HeapRegion* G1CMRootRegions::claim_next() { 345 if (_should_abort) { 346 // If someone has set the should_abort flag, we return NULL to 347 // force the caller to bail out of their loop. 348 return NULL; 349 } 350 351 // Currently, only survivors can be root regions. 352 HeapRegion* res = _next_survivor; 353 if (res != NULL) { 354 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 355 // Read it again in case it changed while we were waiting for the lock. 356 res = _next_survivor; 357 if (res != NULL) { 358 if (res == _young_list->last_survivor_region()) { 359 // We just claimed the last survivor so store NULL to indicate 360 // that we're done. 361 _next_survivor = NULL; 362 } else { 363 _next_survivor = res->get_next_young_region(); 364 } 365 } else { 366 // Someone else claimed the last survivor while we were trying 367 // to take the lock so nothing else to do. 368 } 369 } 370 assert(res == NULL || res->is_survivor(), "post-condition"); 371 372 return res; 373 } 374 375 void G1CMRootRegions::scan_finished() { 376 assert(scan_in_progress(), "pre-condition"); 377 378 // Currently, only survivors can be root regions. 379 if (!_should_abort) { 380 assert(_next_survivor == NULL, "we should have claimed all survivors"); 381 } 382 _next_survivor = NULL; 383 384 { 385 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 386 _scan_in_progress = false; 387 RootRegionScan_lock->notify_all(); 388 } 389 } 390 391 bool G1CMRootRegions::wait_until_scan_finished() { 392 if (!scan_in_progress()) return false; 393 394 { 395 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 396 while (scan_in_progress()) { 397 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 398 } 399 } 400 return true; 401 } 402 403 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 404 return MAX2((n_par_threads + 2) / 4, 1U); 405 } 406 407 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 408 _g1h(g1h), 409 _markBitMap1(), 410 _markBitMap2(), 411 _parallel_marking_threads(0), 412 _max_parallel_marking_threads(0), 413 _sleep_factor(0.0), 414 _marking_task_overhead(1.0), 415 _cleanup_list("Cleanup List"), 416 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 417 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 418 CardTableModRefBS::card_shift, 419 false /* in_resource_area*/), 420 421 _prevMarkBitMap(&_markBitMap1), 422 _nextMarkBitMap(&_markBitMap2), 423 424 _markStack(this), 425 // _finger set in set_non_marking_state 426 427 _max_worker_id(ParallelGCThreads), 428 // _active_tasks set in set_non_marking_state 429 // _tasks set inside the constructor 430 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 431 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 432 433 _has_overflown(false), 434 _concurrent(false), 435 _has_aborted(false), 436 _restart_for_overflow(false), 437 _concurrent_marking_in_progress(false), 438 _concurrent_phase_started(false), 439 440 // _verbose_level set below 441 442 _init_times(), 443 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 444 _cleanup_times(), 445 _total_counting_time(0.0), 446 _total_rs_scrub_time(0.0), 447 448 _parallel_workers(NULL), 449 450 _count_card_bitmaps(NULL), 451 _count_marked_bytes(NULL), 452 _completed_initialization(false) { 453 454 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 455 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 456 457 // Create & start a ConcurrentMark thread. 458 _cmThread = new ConcurrentMarkThread(this); 459 assert(cmThread() != NULL, "CM Thread should have been created"); 460 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 461 if (_cmThread->osthread() == NULL) { 462 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 463 } 464 465 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 466 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 467 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 468 469 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 470 satb_qs.set_buffer_size(G1SATBBufferSize); 471 472 _root_regions.init(_g1h, this); 473 474 if (ConcGCThreads > ParallelGCThreads) { 475 warning("Can't have more ConcGCThreads (%u) " 476 "than ParallelGCThreads (%u).", 477 ConcGCThreads, ParallelGCThreads); 478 return; 479 } 480 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 481 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 482 // if both are set 483 _sleep_factor = 0.0; 484 _marking_task_overhead = 1.0; 485 } else if (G1MarkingOverheadPercent > 0) { 486 // We will calculate the number of parallel marking threads based 487 // on a target overhead with respect to the soft real-time goal 488 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 489 double overall_cm_overhead = 490 (double) MaxGCPauseMillis * marking_overhead / 491 (double) GCPauseIntervalMillis; 492 double cpu_ratio = 1.0 / (double) os::processor_count(); 493 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 494 double marking_task_overhead = 495 overall_cm_overhead / marking_thread_num * 496 (double) os::processor_count(); 497 double sleep_factor = 498 (1.0 - marking_task_overhead) / marking_task_overhead; 499 500 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 501 _sleep_factor = sleep_factor; 502 _marking_task_overhead = marking_task_overhead; 503 } else { 504 // Calculate the number of parallel marking threads by scaling 505 // the number of parallel GC threads. 506 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 507 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 508 _sleep_factor = 0.0; 509 _marking_task_overhead = 1.0; 510 } 511 512 assert(ConcGCThreads > 0, "Should have been set"); 513 _parallel_marking_threads = ConcGCThreads; 514 _max_parallel_marking_threads = _parallel_marking_threads; 515 516 _parallel_workers = new WorkGang("G1 Marker", 517 _max_parallel_marking_threads, false, true); 518 if (_parallel_workers == NULL) { 519 vm_exit_during_initialization("Failed necessary allocation."); 520 } else { 521 _parallel_workers->initialize_workers(); 522 } 523 524 if (FLAG_IS_DEFAULT(MarkStackSize)) { 525 size_t mark_stack_size = 526 MIN2(MarkStackSizeMax, 527 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 528 // Verify that the calculated value for MarkStackSize is in range. 529 // It would be nice to use the private utility routine from Arguments. 530 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 531 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 532 "must be between 1 and " SIZE_FORMAT, 533 mark_stack_size, MarkStackSizeMax); 534 return; 535 } 536 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 537 } else { 538 // Verify MarkStackSize is in range. 539 if (FLAG_IS_CMDLINE(MarkStackSize)) { 540 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 541 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 542 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 543 "must be between 1 and " SIZE_FORMAT, 544 MarkStackSize, MarkStackSizeMax); 545 return; 546 } 547 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 548 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 549 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 550 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 551 MarkStackSize, MarkStackSizeMax); 552 return; 553 } 554 } 555 } 556 } 557 558 if (!_markStack.allocate(MarkStackSize)) { 559 warning("Failed to allocate CM marking stack"); 560 return; 561 } 562 563 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 564 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 565 566 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 567 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 568 569 BitMap::idx_t card_bm_size = _card_bm.size(); 570 571 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 572 _active_tasks = _max_worker_id; 573 574 uint max_regions = _g1h->max_regions(); 575 for (uint i = 0; i < _max_worker_id; ++i) { 576 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 577 task_queue->initialize(); 578 _task_queues->register_queue(i, task_queue); 579 580 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 581 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 582 583 _tasks[i] = new G1CMTask(i, this, 584 _count_marked_bytes[i], 585 &_count_card_bitmaps[i], 586 task_queue, _task_queues); 587 588 _accum_task_vtime[i] = 0.0; 589 } 590 591 // Calculate the card number for the bottom of the heap. Used 592 // in biasing indexes into the accounting card bitmaps. 593 _heap_bottom_card_num = 594 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 595 CardTableModRefBS::card_shift); 596 597 // Clear all the liveness counting data 598 clear_all_count_data(); 599 600 // so that the call below can read a sensible value 601 _heap_start = g1h->reserved_region().start(); 602 set_non_marking_state(); 603 _completed_initialization = true; 604 } 605 606 void G1ConcurrentMark::reset() { 607 // Starting values for these two. This should be called in a STW 608 // phase. 609 MemRegion reserved = _g1h->g1_reserved(); 610 _heap_start = reserved.start(); 611 _heap_end = reserved.end(); 612 613 // Separated the asserts so that we know which one fires. 614 assert(_heap_start != NULL, "heap bounds should look ok"); 615 assert(_heap_end != NULL, "heap bounds should look ok"); 616 assert(_heap_start < _heap_end, "heap bounds should look ok"); 617 618 // Reset all the marking data structures and any necessary flags 619 reset_marking_state(); 620 621 // We do reset all of them, since different phases will use 622 // different number of active threads. So, it's easiest to have all 623 // of them ready. 624 for (uint i = 0; i < _max_worker_id; ++i) { 625 _tasks[i]->reset(_nextMarkBitMap); 626 } 627 628 // we need this to make sure that the flag is on during the evac 629 // pause with initial mark piggy-backed 630 set_concurrent_marking_in_progress(); 631 } 632 633 634 void G1ConcurrentMark::reset_marking_state(bool clear_overflow) { 635 _markStack.set_should_expand(); 636 _markStack.setEmpty(); // Also clears the _markStack overflow flag 637 if (clear_overflow) { 638 clear_has_overflown(); 639 } else { 640 assert(has_overflown(), "pre-condition"); 641 } 642 _finger = _heap_start; 643 644 for (uint i = 0; i < _max_worker_id; ++i) { 645 G1CMTaskQueue* queue = _task_queues->queue(i); 646 queue->set_empty(); 647 } 648 } 649 650 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 651 assert(active_tasks <= _max_worker_id, "we should not have more"); 652 653 _active_tasks = active_tasks; 654 // Need to update the three data structures below according to the 655 // number of active threads for this phase. 656 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 657 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 658 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 659 } 660 661 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 662 set_concurrency(active_tasks); 663 664 _concurrent = concurrent; 665 // We propagate this to all tasks, not just the active ones. 666 for (uint i = 0; i < _max_worker_id; ++i) 667 _tasks[i]->set_concurrent(concurrent); 668 669 if (concurrent) { 670 set_concurrent_marking_in_progress(); 671 } else { 672 // We currently assume that the concurrent flag has been set to 673 // false before we start remark. At this point we should also be 674 // in a STW phase. 675 assert(!concurrent_marking_in_progress(), "invariant"); 676 assert(out_of_regions(), 677 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 678 p2i(_finger), p2i(_heap_end)); 679 } 680 } 681 682 void G1ConcurrentMark::set_non_marking_state() { 683 // We set the global marking state to some default values when we're 684 // not doing marking. 685 reset_marking_state(); 686 _active_tasks = 0; 687 clear_concurrent_marking_in_progress(); 688 } 689 690 G1ConcurrentMark::~G1ConcurrentMark() { 691 // The G1ConcurrentMark instance is never freed. 692 ShouldNotReachHere(); 693 } 694 695 void G1ConcurrentMark::clearNextBitmap() { 696 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 697 698 // Make sure that the concurrent mark thread looks to still be in 699 // the current cycle. 700 guarantee(cmThread()->during_cycle(), "invariant"); 701 702 // We are finishing up the current cycle by clearing the next 703 // marking bitmap and getting it ready for the next cycle. During 704 // this time no other cycle can start. So, let's make sure that this 705 // is the case. 706 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 707 708 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 709 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 710 _parallel_workers->run_task(&task); 711 712 // Clear the liveness counting data. If the marking has been aborted, the abort() 713 // call already did that. 714 if (cl.complete()) { 715 clear_all_count_data(); 716 } 717 718 // Repeat the asserts from above. 719 guarantee(cmThread()->during_cycle(), "invariant"); 720 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 721 } 722 723 class CheckBitmapClearHRClosure : public HeapRegionClosure { 724 G1CMBitMap* _bitmap; 725 bool _error; 726 public: 727 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 728 } 729 730 virtual bool doHeapRegion(HeapRegion* r) { 731 // This closure can be called concurrently to the mutator, so we must make sure 732 // that the result of the getNextMarkedWordAddress() call is compared to the 733 // value passed to it as limit to detect any found bits. 734 // end never changes in G1. 735 HeapWord* end = r->end(); 736 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 737 } 738 }; 739 740 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 741 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 742 _g1h->heap_region_iterate(&cl); 743 return cl.complete(); 744 } 745 746 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 747 public: 748 bool doHeapRegion(HeapRegion* r) { 749 r->note_start_of_marking(); 750 return false; 751 } 752 }; 753 754 void G1ConcurrentMark::checkpointRootsInitialPre() { 755 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 756 G1CollectorPolicy* g1p = g1h->g1_policy(); 757 758 _has_aborted = false; 759 760 // Initialize marking structures. This has to be done in a STW phase. 761 reset(); 762 763 // For each region note start of marking. 764 NoteStartOfMarkHRClosure startcl; 765 g1h->heap_region_iterate(&startcl); 766 } 767 768 769 void G1ConcurrentMark::checkpointRootsInitialPost() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 772 // Start Concurrent Marking weak-reference discovery. 773 ReferenceProcessor* rp = g1h->ref_processor_cm(); 774 // enable ("weak") refs discovery 775 rp->enable_discovery(); 776 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 777 778 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 779 // This is the start of the marking cycle, we're expected all 780 // threads to have SATB queues with active set to false. 781 satb_mq_set.set_active_all_threads(true, /* new active value */ 782 false /* expected_active */); 783 784 _root_regions.prepare_for_scan(); 785 786 // update_g1_committed() will be called at the end of an evac pause 787 // when marking is on. So, it's also called at the end of the 788 // initial-mark pause to update the heap end, if the heap expands 789 // during it. No need to call it here. 790 } 791 792 /* 793 * Notice that in the next two methods, we actually leave the STS 794 * during the barrier sync and join it immediately afterwards. If we 795 * do not do this, the following deadlock can occur: one thread could 796 * be in the barrier sync code, waiting for the other thread to also 797 * sync up, whereas another one could be trying to yield, while also 798 * waiting for the other threads to sync up too. 799 * 800 * Note, however, that this code is also used during remark and in 801 * this case we should not attempt to leave / enter the STS, otherwise 802 * we'll either hit an assert (debug / fastdebug) or deadlock 803 * (product). So we should only leave / enter the STS if we are 804 * operating concurrently. 805 * 806 * Because the thread that does the sync barrier has left the STS, it 807 * is possible to be suspended for a Full GC or an evacuation pause 808 * could occur. This is actually safe, since the entering the sync 809 * barrier is one of the last things do_marking_step() does, and it 810 * doesn't manipulate any data structures afterwards. 811 */ 812 813 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 814 bool barrier_aborted; 815 { 816 SuspendibleThreadSetLeaver sts_leave(concurrent()); 817 barrier_aborted = !_first_overflow_barrier_sync.enter(); 818 } 819 820 // at this point everyone should have synced up and not be doing any 821 // more work 822 823 if (barrier_aborted) { 824 // If the barrier aborted we ignore the overflow condition and 825 // just abort the whole marking phase as quickly as possible. 826 return; 827 } 828 829 // If we're executing the concurrent phase of marking, reset the marking 830 // state; otherwise the marking state is reset after reference processing, 831 // during the remark pause. 832 // If we reset here as a result of an overflow during the remark we will 833 // see assertion failures from any subsequent set_concurrency_and_phase() 834 // calls. 835 if (concurrent()) { 836 // let the task associated with with worker 0 do this 837 if (worker_id == 0) { 838 // task 0 is responsible for clearing the global data structures 839 // We should be here because of an overflow. During STW we should 840 // not clear the overflow flag since we rely on it being true when 841 // we exit this method to abort the pause and restart concurrent 842 // marking. 843 reset_marking_state(true /* clear_overflow */); 844 845 log_info(gc)("Concurrent Mark reset for overflow"); 846 } 847 } 848 849 // after this, each task should reset its own data structures then 850 // then go into the second barrier 851 } 852 853 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 854 SuspendibleThreadSetLeaver sts_leave(concurrent()); 855 _second_overflow_barrier_sync.enter(); 856 857 // at this point everything should be re-initialized and ready to go 858 } 859 860 class G1CMConcurrentMarkingTask: public AbstractGangTask { 861 private: 862 G1ConcurrentMark* _cm; 863 ConcurrentMarkThread* _cmt; 864 865 public: 866 void work(uint worker_id) { 867 assert(Thread::current()->is_ConcurrentGC_thread(), 868 "this should only be done by a conc GC thread"); 869 ResourceMark rm; 870 871 double start_vtime = os::elapsedVTime(); 872 873 { 874 SuspendibleThreadSetJoiner sts_join; 875 876 assert(worker_id < _cm->active_tasks(), "invariant"); 877 G1CMTask* the_task = _cm->task(worker_id); 878 the_task->record_start_time(); 879 if (!_cm->has_aborted()) { 880 do { 881 double start_vtime_sec = os::elapsedVTime(); 882 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 883 884 the_task->do_marking_step(mark_step_duration_ms, 885 true /* do_termination */, 886 false /* is_serial*/); 887 888 double end_vtime_sec = os::elapsedVTime(); 889 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 890 _cm->clear_has_overflown(); 891 892 _cm->do_yield_check(worker_id); 893 894 jlong sleep_time_ms; 895 if (!_cm->has_aborted() && the_task->has_aborted()) { 896 sleep_time_ms = 897 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 898 { 899 SuspendibleThreadSetLeaver sts_leave; 900 os::sleep(Thread::current(), sleep_time_ms, false); 901 } 902 } 903 } while (!_cm->has_aborted() && the_task->has_aborted()); 904 } 905 the_task->record_end_time(); 906 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 907 } 908 909 double end_vtime = os::elapsedVTime(); 910 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 911 } 912 913 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 914 ConcurrentMarkThread* cmt) : 915 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 916 917 ~G1CMConcurrentMarkingTask() { } 918 }; 919 920 // Calculates the number of active workers for a concurrent 921 // phase. 922 uint G1ConcurrentMark::calc_parallel_marking_threads() { 923 uint n_conc_workers = 0; 924 if (!UseDynamicNumberOfGCThreads || 925 (!FLAG_IS_DEFAULT(ConcGCThreads) && 926 !ForceDynamicNumberOfGCThreads)) { 927 n_conc_workers = max_parallel_marking_threads(); 928 } else { 929 n_conc_workers = 930 AdaptiveSizePolicy::calc_default_active_workers( 931 max_parallel_marking_threads(), 932 1, /* Minimum workers */ 933 parallel_marking_threads(), 934 Threads::number_of_non_daemon_threads()); 935 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 936 // that scaling has already gone into "_max_parallel_marking_threads". 937 } 938 assert(n_conc_workers > 0, "Always need at least 1"); 939 return n_conc_workers; 940 } 941 942 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 943 // Currently, only survivors can be root regions. 944 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 945 G1RootRegionScanClosure cl(_g1h, this, worker_id); 946 947 const uintx interval = PrefetchScanIntervalInBytes; 948 HeapWord* curr = hr->bottom(); 949 const HeapWord* end = hr->top(); 950 while (curr < end) { 951 Prefetch::read(curr, interval); 952 oop obj = oop(curr); 953 int size = obj->oop_iterate_size(&cl); 954 assert(size == obj->size(), "sanity"); 955 curr += size; 956 } 957 } 958 959 class G1CMRootRegionScanTask : public AbstractGangTask { 960 private: 961 G1ConcurrentMark* _cm; 962 963 public: 964 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 965 AbstractGangTask("Root Region Scan"), _cm(cm) { } 966 967 void work(uint worker_id) { 968 assert(Thread::current()->is_ConcurrentGC_thread(), 969 "this should only be done by a conc GC thread"); 970 971 G1CMRootRegions* root_regions = _cm->root_regions(); 972 HeapRegion* hr = root_regions->claim_next(); 973 while (hr != NULL) { 974 _cm->scanRootRegion(hr, worker_id); 975 hr = root_regions->claim_next(); 976 } 977 } 978 }; 979 980 void G1ConcurrentMark::scanRootRegions() { 981 // Start of concurrent marking. 982 ClassLoaderDataGraph::clear_claimed_marks(); 983 984 // scan_in_progress() will have been set to true only if there was 985 // at least one root region to scan. So, if it's false, we 986 // should not attempt to do any further work. 987 if (root_regions()->scan_in_progress()) { 988 GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan"); 989 990 _parallel_marking_threads = calc_parallel_marking_threads(); 991 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 992 "Maximum number of marking threads exceeded"); 993 uint active_workers = MAX2(1U, parallel_marking_threads()); 994 995 G1CMRootRegionScanTask task(this); 996 _parallel_workers->set_active_workers(active_workers); 997 _parallel_workers->run_task(&task); 998 999 // It's possible that has_aborted() is true here without actually 1000 // aborting the survivor scan earlier. This is OK as it's 1001 // mainly used for sanity checking. 1002 root_regions()->scan_finished(); 1003 } 1004 } 1005 1006 void G1ConcurrentMark::register_concurrent_phase_start(const char* title) { 1007 assert(!_concurrent_phase_started, "Sanity"); 1008 _concurrent_phase_started = true; 1009 _g1h->gc_timer_cm()->register_gc_concurrent_start(title); 1010 } 1011 1012 void G1ConcurrentMark::register_concurrent_phase_end() { 1013 if (_concurrent_phase_started) { 1014 _concurrent_phase_started = false; 1015 _g1h->gc_timer_cm()->register_gc_concurrent_end(); 1016 } 1017 } 1018 1019 void G1ConcurrentMark::markFromRoots() { 1020 // we might be tempted to assert that: 1021 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1022 // "inconsistent argument?"); 1023 // However that wouldn't be right, because it's possible that 1024 // a safepoint is indeed in progress as a younger generation 1025 // stop-the-world GC happens even as we mark in this generation. 1026 1027 _restart_for_overflow = false; 1028 1029 // _g1h has _n_par_threads 1030 _parallel_marking_threads = calc_parallel_marking_threads(); 1031 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1032 "Maximum number of marking threads exceeded"); 1033 1034 uint active_workers = MAX2(1U, parallel_marking_threads()); 1035 assert(active_workers > 0, "Should have been set"); 1036 1037 // Parallel task terminator is set in "set_concurrency_and_phase()" 1038 set_concurrency_and_phase(active_workers, true /* concurrent */); 1039 1040 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1041 _parallel_workers->set_active_workers(active_workers); 1042 _parallel_workers->run_task(&markingTask); 1043 print_stats(); 1044 } 1045 1046 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1047 // world is stopped at this checkpoint 1048 assert(SafepointSynchronize::is_at_safepoint(), 1049 "world should be stopped"); 1050 1051 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1052 1053 // If a full collection has happened, we shouldn't do this. 1054 if (has_aborted()) { 1055 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1056 return; 1057 } 1058 1059 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1060 1061 if (VerifyDuringGC) { 1062 HandleMark hm; // handle scope 1063 g1h->prepare_for_verify(); 1064 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1065 } 1066 g1h->verifier()->check_bitmaps("Remark Start"); 1067 1068 G1CollectorPolicy* g1p = g1h->g1_policy(); 1069 g1p->record_concurrent_mark_remark_start(); 1070 1071 double start = os::elapsedTime(); 1072 1073 checkpointRootsFinalWork(); 1074 1075 double mark_work_end = os::elapsedTime(); 1076 1077 weakRefsWork(clear_all_soft_refs); 1078 1079 if (has_overflown()) { 1080 // Oops. We overflowed. Restart concurrent marking. 1081 _restart_for_overflow = true; 1082 log_develop_trace(gc)("Remark led to restart for overflow."); 1083 1084 // Verify the heap w.r.t. the previous marking bitmap. 1085 if (VerifyDuringGC) { 1086 HandleMark hm; // handle scope 1087 g1h->prepare_for_verify(); 1088 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1089 } 1090 1091 // Clear the marking state because we will be restarting 1092 // marking due to overflowing the global mark stack. 1093 reset_marking_state(); 1094 } else { 1095 { 1096 GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm()); 1097 1098 // Aggregate the per-task counting data that we have accumulated 1099 // while marking. 1100 aggregate_count_data(); 1101 } 1102 1103 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1104 // We're done with marking. 1105 // This is the end of the marking cycle, we're expected all 1106 // threads to have SATB queues with active set to true. 1107 satb_mq_set.set_active_all_threads(false, /* new active value */ 1108 true /* expected_active */); 1109 1110 if (VerifyDuringGC) { 1111 HandleMark hm; // handle scope 1112 g1h->prepare_for_verify(); 1113 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1114 } 1115 g1h->verifier()->check_bitmaps("Remark End"); 1116 assert(!restart_for_overflow(), "sanity"); 1117 // Completely reset the marking state since marking completed 1118 set_non_marking_state(); 1119 } 1120 1121 // Expand the marking stack, if we have to and if we can. 1122 if (_markStack.should_expand()) { 1123 _markStack.expand(); 1124 } 1125 1126 // Statistics 1127 double now = os::elapsedTime(); 1128 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1129 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1130 _remark_times.add((now - start) * 1000.0); 1131 1132 g1p->record_concurrent_mark_remark_end(); 1133 1134 G1CMIsAliveClosure is_alive(g1h); 1135 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1136 } 1137 1138 // Base class of the closures that finalize and verify the 1139 // liveness counting data. 1140 class G1CMCountDataClosureBase: public HeapRegionClosure { 1141 protected: 1142 G1CollectedHeap* _g1h; 1143 G1ConcurrentMark* _cm; 1144 CardTableModRefBS* _ct_bs; 1145 1146 BitMap* _region_bm; 1147 BitMap* _card_bm; 1148 1149 // Takes a region that's not empty (i.e., it has at least one 1150 // live object in it and sets its corresponding bit on the region 1151 // bitmap to 1. 1152 void set_bit_for_region(HeapRegion* hr) { 1153 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1154 _region_bm->par_at_put(index, true); 1155 } 1156 1157 public: 1158 G1CMCountDataClosureBase(G1CollectedHeap* g1h, 1159 BitMap* region_bm, BitMap* card_bm): 1160 _g1h(g1h), _cm(g1h->concurrent_mark()), 1161 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1162 _region_bm(region_bm), _card_bm(card_bm) { } 1163 }; 1164 1165 // Closure that calculates the # live objects per region. Used 1166 // for verification purposes during the cleanup pause. 1167 class CalcLiveObjectsClosure: public G1CMCountDataClosureBase { 1168 G1CMBitMapRO* _bm; 1169 size_t _region_marked_bytes; 1170 1171 public: 1172 CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h, 1173 BitMap* region_bm, BitMap* card_bm) : 1174 G1CMCountDataClosureBase(g1h, region_bm, card_bm), 1175 _bm(bm), _region_marked_bytes(0) { } 1176 1177 bool doHeapRegion(HeapRegion* hr) { 1178 HeapWord* ntams = hr->next_top_at_mark_start(); 1179 HeapWord* start = hr->bottom(); 1180 1181 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1182 "Preconditions not met - " 1183 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1184 p2i(start), p2i(ntams), p2i(hr->end())); 1185 1186 // Find the first marked object at or after "start". 1187 start = _bm->getNextMarkedWordAddress(start, ntams); 1188 1189 size_t marked_bytes = 0; 1190 1191 while (start < ntams) { 1192 oop obj = oop(start); 1193 int obj_sz = obj->size(); 1194 HeapWord* obj_end = start + obj_sz; 1195 1196 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1197 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1198 1199 // Note: if we're looking at the last region in heap - obj_end 1200 // could be actually just beyond the end of the heap; end_idx 1201 // will then correspond to a (non-existent) card that is also 1202 // just beyond the heap. 1203 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1204 // end of object is not card aligned - increment to cover 1205 // all the cards spanned by the object 1206 end_idx += 1; 1207 } 1208 1209 // Set the bits in the card BM for the cards spanned by this object. 1210 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1211 1212 // Add the size of this object to the number of marked bytes. 1213 marked_bytes += (size_t)obj_sz * HeapWordSize; 1214 1215 // This will happen if we are handling a humongous object that spans 1216 // several heap regions. 1217 if (obj_end > hr->end()) { 1218 break; 1219 } 1220 // Find the next marked object after this one. 1221 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1222 } 1223 1224 // Mark the allocated-since-marking portion... 1225 HeapWord* top = hr->top(); 1226 if (ntams < top) { 1227 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1228 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1229 1230 // Note: if we're looking at the last region in heap - top 1231 // could be actually just beyond the end of the heap; end_idx 1232 // will then correspond to a (non-existent) card that is also 1233 // just beyond the heap. 1234 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1235 // end of object is not card aligned - increment to cover 1236 // all the cards spanned by the object 1237 end_idx += 1; 1238 } 1239 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1240 1241 // This definitely means the region has live objects. 1242 set_bit_for_region(hr); 1243 } 1244 1245 // Update the live region bitmap. 1246 if (marked_bytes > 0) { 1247 set_bit_for_region(hr); 1248 } 1249 1250 // Set the marked bytes for the current region so that 1251 // it can be queried by a calling verification routine 1252 _region_marked_bytes = marked_bytes; 1253 1254 return false; 1255 } 1256 1257 size_t region_marked_bytes() const { return _region_marked_bytes; } 1258 }; 1259 1260 // Heap region closure used for verifying the counting data 1261 // that was accumulated concurrently and aggregated during 1262 // the remark pause. This closure is applied to the heap 1263 // regions during the STW cleanup pause. 1264 1265 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1266 G1CollectedHeap* _g1h; 1267 G1ConcurrentMark* _cm; 1268 CalcLiveObjectsClosure _calc_cl; 1269 BitMap* _region_bm; // Region BM to be verified 1270 BitMap* _card_bm; // Card BM to be verified 1271 1272 BitMap* _exp_region_bm; // Expected Region BM values 1273 BitMap* _exp_card_bm; // Expected card BM values 1274 1275 int _failures; 1276 1277 public: 1278 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1279 BitMap* region_bm, 1280 BitMap* card_bm, 1281 BitMap* exp_region_bm, 1282 BitMap* exp_card_bm) : 1283 _g1h(g1h), _cm(g1h->concurrent_mark()), 1284 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1285 _region_bm(region_bm), _card_bm(card_bm), 1286 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1287 _failures(0) { } 1288 1289 int failures() const { return _failures; } 1290 1291 bool doHeapRegion(HeapRegion* hr) { 1292 int failures = 0; 1293 1294 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1295 // this region and set the corresponding bits in the expected region 1296 // and card bitmaps. 1297 bool res = _calc_cl.doHeapRegion(hr); 1298 assert(res == false, "should be continuing"); 1299 1300 // Verify the marked bytes for this region. 1301 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1302 size_t act_marked_bytes = hr->next_marked_bytes(); 1303 1304 if (exp_marked_bytes > act_marked_bytes) { 1305 if (hr->is_starts_humongous()) { 1306 // For start_humongous regions, the size of the whole object will be 1307 // in exp_marked_bytes. 1308 HeapRegion* region = hr; 1309 int num_regions; 1310 for (num_regions = 0; region != NULL; num_regions++) { 1311 region = _g1h->next_region_in_humongous(region); 1312 } 1313 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1314 failures += 1; 1315 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1316 failures += 1; 1317 } 1318 } else { 1319 // We're not OK if expected marked bytes > actual marked bytes. It means 1320 // we have missed accounting some objects during the actual marking. 1321 failures += 1; 1322 } 1323 } 1324 1325 // Verify the bit, for this region, in the actual and expected 1326 // (which was just calculated) region bit maps. 1327 // We're not OK if the bit in the calculated expected region 1328 // bitmap is set and the bit in the actual region bitmap is not. 1329 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1330 1331 bool expected = _exp_region_bm->at(index); 1332 bool actual = _region_bm->at(index); 1333 if (expected && !actual) { 1334 failures += 1; 1335 } 1336 1337 // Verify that the card bit maps for the cards spanned by the current 1338 // region match. We have an error if we have a set bit in the expected 1339 // bit map and the corresponding bit in the actual bitmap is not set. 1340 1341 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1342 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1343 1344 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1345 expected = _exp_card_bm->at(i); 1346 actual = _card_bm->at(i); 1347 1348 if (expected && !actual) { 1349 failures += 1; 1350 } 1351 } 1352 1353 _failures += failures; 1354 1355 // We could stop iteration over the heap when we 1356 // find the first violating region by returning true. 1357 return false; 1358 } 1359 }; 1360 1361 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1362 protected: 1363 G1CollectedHeap* _g1h; 1364 G1ConcurrentMark* _cm; 1365 BitMap* _actual_region_bm; 1366 BitMap* _actual_card_bm; 1367 1368 uint _n_workers; 1369 1370 BitMap* _expected_region_bm; 1371 BitMap* _expected_card_bm; 1372 1373 int _failures; 1374 1375 HeapRegionClaimer _hrclaimer; 1376 1377 public: 1378 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1379 BitMap* region_bm, BitMap* card_bm, 1380 BitMap* expected_region_bm, BitMap* expected_card_bm) 1381 : AbstractGangTask("G1 verify final counting"), 1382 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1383 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1384 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1385 _failures(0), 1386 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1387 assert(VerifyDuringGC, "don't call this otherwise"); 1388 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1389 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1390 } 1391 1392 void work(uint worker_id) { 1393 assert(worker_id < _n_workers, "invariant"); 1394 1395 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1396 _actual_region_bm, _actual_card_bm, 1397 _expected_region_bm, 1398 _expected_card_bm); 1399 1400 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1401 1402 Atomic::add(verify_cl.failures(), &_failures); 1403 } 1404 1405 int failures() const { return _failures; } 1406 }; 1407 1408 // Closure that finalizes the liveness counting data. 1409 // Used during the cleanup pause. 1410 // Sets the bits corresponding to the interval [NTAMS, top] 1411 // (which contains the implicitly live objects) in the 1412 // card liveness bitmap. Also sets the bit for each region, 1413 // containing live data, in the region liveness bitmap. 1414 1415 class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase { 1416 public: 1417 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1418 BitMap* region_bm, 1419 BitMap* card_bm) : 1420 G1CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1421 1422 bool doHeapRegion(HeapRegion* hr) { 1423 HeapWord* ntams = hr->next_top_at_mark_start(); 1424 HeapWord* top = hr->top(); 1425 1426 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1427 1428 // Mark the allocated-since-marking portion... 1429 if (ntams < top) { 1430 // This definitely means the region has live objects. 1431 set_bit_for_region(hr); 1432 1433 // Now set the bits in the card bitmap for [ntams, top) 1434 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1435 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1436 1437 // Note: if we're looking at the last region in heap - top 1438 // could be actually just beyond the end of the heap; end_idx 1439 // will then correspond to a (non-existent) card that is also 1440 // just beyond the heap. 1441 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1442 // end of object is not card aligned - increment to cover 1443 // all the cards spanned by the object 1444 end_idx += 1; 1445 } 1446 1447 assert(end_idx <= _card_bm->size(), 1448 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1449 end_idx, _card_bm->size()); 1450 assert(start_idx < _card_bm->size(), 1451 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1452 start_idx, _card_bm->size()); 1453 1454 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1455 } 1456 1457 // Set the bit for the region if it contains live data 1458 if (hr->next_marked_bytes() > 0) { 1459 set_bit_for_region(hr); 1460 } 1461 1462 return false; 1463 } 1464 }; 1465 1466 class G1ParFinalCountTask: public AbstractGangTask { 1467 protected: 1468 G1CollectedHeap* _g1h; 1469 G1ConcurrentMark* _cm; 1470 BitMap* _actual_region_bm; 1471 BitMap* _actual_card_bm; 1472 1473 uint _n_workers; 1474 HeapRegionClaimer _hrclaimer; 1475 1476 public: 1477 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1478 : AbstractGangTask("G1 final counting"), 1479 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1480 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1481 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1482 } 1483 1484 void work(uint worker_id) { 1485 assert(worker_id < _n_workers, "invariant"); 1486 1487 FinalCountDataUpdateClosure final_update_cl(_g1h, 1488 _actual_region_bm, 1489 _actual_card_bm); 1490 1491 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1492 } 1493 }; 1494 1495 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1496 G1CollectedHeap* _g1; 1497 size_t _freed_bytes; 1498 FreeRegionList* _local_cleanup_list; 1499 uint _old_regions_removed; 1500 uint _humongous_regions_removed; 1501 HRRSCleanupTask* _hrrs_cleanup_task; 1502 1503 public: 1504 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1505 FreeRegionList* local_cleanup_list, 1506 HRRSCleanupTask* hrrs_cleanup_task) : 1507 _g1(g1), 1508 _freed_bytes(0), 1509 _local_cleanup_list(local_cleanup_list), 1510 _old_regions_removed(0), 1511 _humongous_regions_removed(0), 1512 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1513 1514 size_t freed_bytes() { return _freed_bytes; } 1515 const uint old_regions_removed() { return _old_regions_removed; } 1516 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1517 1518 bool doHeapRegion(HeapRegion *hr) { 1519 if (hr->is_archive()) { 1520 return false; 1521 } 1522 // We use a claim value of zero here because all regions 1523 // were claimed with value 1 in the FinalCount task. 1524 _g1->reset_gc_time_stamps(hr); 1525 hr->note_end_of_marking(); 1526 1527 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1528 _freed_bytes += hr->used(); 1529 hr->set_containing_set(NULL); 1530 if (hr->is_humongous()) { 1531 _humongous_regions_removed++; 1532 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1533 } else { 1534 _old_regions_removed++; 1535 _g1->free_region(hr, _local_cleanup_list, true); 1536 } 1537 } else { 1538 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1539 } 1540 1541 return false; 1542 } 1543 }; 1544 1545 class G1ParNoteEndTask: public AbstractGangTask { 1546 friend class G1NoteEndOfConcMarkClosure; 1547 1548 protected: 1549 G1CollectedHeap* _g1h; 1550 FreeRegionList* _cleanup_list; 1551 HeapRegionClaimer _hrclaimer; 1552 1553 public: 1554 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1555 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1556 } 1557 1558 void work(uint worker_id) { 1559 FreeRegionList local_cleanup_list("Local Cleanup List"); 1560 HRRSCleanupTask hrrs_cleanup_task; 1561 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1562 &hrrs_cleanup_task); 1563 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1564 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1565 1566 // Now update the lists 1567 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1568 { 1569 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1570 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1571 1572 // If we iterate over the global cleanup list at the end of 1573 // cleanup to do this printing we will not guarantee to only 1574 // generate output for the newly-reclaimed regions (the list 1575 // might not be empty at the beginning of cleanup; we might 1576 // still be working on its previous contents). So we do the 1577 // printing here, before we append the new regions to the global 1578 // cleanup list. 1579 1580 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1581 if (hr_printer->is_active()) { 1582 FreeRegionListIterator iter(&local_cleanup_list); 1583 while (iter.more_available()) { 1584 HeapRegion* hr = iter.get_next(); 1585 hr_printer->cleanup(hr); 1586 } 1587 } 1588 1589 _cleanup_list->add_ordered(&local_cleanup_list); 1590 assert(local_cleanup_list.is_empty(), "post-condition"); 1591 1592 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1593 } 1594 } 1595 }; 1596 1597 void G1ConcurrentMark::cleanup() { 1598 // world is stopped at this checkpoint 1599 assert(SafepointSynchronize::is_at_safepoint(), 1600 "world should be stopped"); 1601 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1602 1603 // If a full collection has happened, we shouldn't do this. 1604 if (has_aborted()) { 1605 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1606 return; 1607 } 1608 1609 g1h->verifier()->verify_region_sets_optional(); 1610 1611 if (VerifyDuringGC) { 1612 HandleMark hm; // handle scope 1613 g1h->prepare_for_verify(); 1614 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1615 } 1616 g1h->verifier()->check_bitmaps("Cleanup Start"); 1617 1618 G1CollectorPolicy* g1p = g1h->g1_policy(); 1619 g1p->record_concurrent_mark_cleanup_start(); 1620 1621 double start = os::elapsedTime(); 1622 1623 HeapRegionRemSet::reset_for_cleanup_tasks(); 1624 1625 // Do counting once more with the world stopped for good measure. 1626 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1627 1628 g1h->workers()->run_task(&g1_par_count_task); 1629 1630 if (VerifyDuringGC) { 1631 // Verify that the counting data accumulated during marking matches 1632 // that calculated by walking the marking bitmap. 1633 1634 // Bitmaps to hold expected values 1635 BitMap expected_region_bm(_region_bm.size(), true); 1636 BitMap expected_card_bm(_card_bm.size(), true); 1637 1638 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1639 &_region_bm, 1640 &_card_bm, 1641 &expected_region_bm, 1642 &expected_card_bm); 1643 1644 g1h->workers()->run_task(&g1_par_verify_task); 1645 1646 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1647 } 1648 1649 size_t start_used_bytes = g1h->used(); 1650 g1h->collector_state()->set_mark_in_progress(false); 1651 1652 double count_end = os::elapsedTime(); 1653 double this_final_counting_time = (count_end - start); 1654 _total_counting_time += this_final_counting_time; 1655 1656 if (log_is_enabled(Trace, gc, liveness)) { 1657 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1658 _g1h->heap_region_iterate(&cl); 1659 } 1660 1661 // Install newly created mark bitMap as "prev". 1662 swapMarkBitMaps(); 1663 1664 g1h->reset_gc_time_stamp(); 1665 1666 uint n_workers = _g1h->workers()->active_workers(); 1667 1668 // Note end of marking in all heap regions. 1669 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1670 g1h->workers()->run_task(&g1_par_note_end_task); 1671 g1h->check_gc_time_stamps(); 1672 1673 if (!cleanup_list_is_empty()) { 1674 // The cleanup list is not empty, so we'll have to process it 1675 // concurrently. Notify anyone else that might be wanting free 1676 // regions that there will be more free regions coming soon. 1677 g1h->set_free_regions_coming(); 1678 } 1679 1680 // call below, since it affects the metric by which we sort the heap 1681 // regions. 1682 if (G1ScrubRemSets) { 1683 double rs_scrub_start = os::elapsedTime(); 1684 g1h->scrub_rem_set(&_region_bm, &_card_bm); 1685 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1686 } 1687 1688 // this will also free any regions totally full of garbage objects, 1689 // and sort the regions. 1690 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1691 1692 // Statistics. 1693 double end = os::elapsedTime(); 1694 _cleanup_times.add((end - start) * 1000.0); 1695 1696 // Clean up will have freed any regions completely full of garbage. 1697 // Update the soft reference policy with the new heap occupancy. 1698 Universe::update_heap_info_at_gc(); 1699 1700 if (VerifyDuringGC) { 1701 HandleMark hm; // handle scope 1702 g1h->prepare_for_verify(); 1703 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1704 } 1705 1706 g1h->verifier()->check_bitmaps("Cleanup End"); 1707 1708 g1h->verifier()->verify_region_sets_optional(); 1709 1710 // We need to make this be a "collection" so any collection pause that 1711 // races with it goes around and waits for completeCleanup to finish. 1712 g1h->increment_total_collections(); 1713 1714 // Clean out dead classes and update Metaspace sizes. 1715 if (ClassUnloadingWithConcurrentMark) { 1716 ClassLoaderDataGraph::purge(); 1717 } 1718 MetaspaceGC::compute_new_size(); 1719 1720 // We reclaimed old regions so we should calculate the sizes to make 1721 // sure we update the old gen/space data. 1722 g1h->g1mm()->update_sizes(); 1723 g1h->allocation_context_stats().update_after_mark(); 1724 1725 g1h->trace_heap_after_concurrent_cycle(); 1726 } 1727 1728 void G1ConcurrentMark::completeCleanup() { 1729 if (has_aborted()) return; 1730 1731 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1732 1733 _cleanup_list.verify_optional(); 1734 FreeRegionList tmp_free_list("Tmp Free List"); 1735 1736 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1737 "cleanup list has %u entries", 1738 _cleanup_list.length()); 1739 1740 // No one else should be accessing the _cleanup_list at this point, 1741 // so it is not necessary to take any locks 1742 while (!_cleanup_list.is_empty()) { 1743 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1744 assert(hr != NULL, "Got NULL from a non-empty list"); 1745 hr->par_clear(); 1746 tmp_free_list.add_ordered(hr); 1747 1748 // Instead of adding one region at a time to the secondary_free_list, 1749 // we accumulate them in the local list and move them a few at a 1750 // time. This also cuts down on the number of notify_all() calls 1751 // we do during this process. We'll also append the local list when 1752 // _cleanup_list is empty (which means we just removed the last 1753 // region from the _cleanup_list). 1754 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1755 _cleanup_list.is_empty()) { 1756 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1757 "appending %u entries to the secondary_free_list, " 1758 "cleanup list still has %u entries", 1759 tmp_free_list.length(), 1760 _cleanup_list.length()); 1761 1762 { 1763 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1764 g1h->secondary_free_list_add(&tmp_free_list); 1765 SecondaryFreeList_lock->notify_all(); 1766 } 1767 #ifndef PRODUCT 1768 if (G1StressConcRegionFreeing) { 1769 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1770 os::sleep(Thread::current(), (jlong) 1, false); 1771 } 1772 } 1773 #endif 1774 } 1775 } 1776 assert(tmp_free_list.is_empty(), "post-condition"); 1777 } 1778 1779 // Supporting Object and Oop closures for reference discovery 1780 // and processing in during marking 1781 1782 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1783 HeapWord* addr = (HeapWord*)obj; 1784 return addr != NULL && 1785 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1786 } 1787 1788 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1789 // Uses the G1CMTask associated with a worker thread (for serial reference 1790 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1791 // trace referent objects. 1792 // 1793 // Using the G1CMTask and embedded local queues avoids having the worker 1794 // threads operating on the global mark stack. This reduces the risk 1795 // of overflowing the stack - which we would rather avoid at this late 1796 // state. Also using the tasks' local queues removes the potential 1797 // of the workers interfering with each other that could occur if 1798 // operating on the global stack. 1799 1800 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1801 G1ConcurrentMark* _cm; 1802 G1CMTask* _task; 1803 int _ref_counter_limit; 1804 int _ref_counter; 1805 bool _is_serial; 1806 public: 1807 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1808 _cm(cm), _task(task), _is_serial(is_serial), 1809 _ref_counter_limit(G1RefProcDrainInterval) { 1810 assert(_ref_counter_limit > 0, "sanity"); 1811 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1812 _ref_counter = _ref_counter_limit; 1813 } 1814 1815 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1816 virtual void do_oop( oop* p) { do_oop_work(p); } 1817 1818 template <class T> void do_oop_work(T* p) { 1819 if (!_cm->has_overflown()) { 1820 oop obj = oopDesc::load_decode_heap_oop(p); 1821 _task->deal_with_reference(obj); 1822 _ref_counter--; 1823 1824 if (_ref_counter == 0) { 1825 // We have dealt with _ref_counter_limit references, pushing them 1826 // and objects reachable from them on to the local stack (and 1827 // possibly the global stack). Call G1CMTask::do_marking_step() to 1828 // process these entries. 1829 // 1830 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1831 // there's nothing more to do (i.e. we're done with the entries that 1832 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1833 // above) or we overflow. 1834 // 1835 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1836 // flag while there may still be some work to do. (See the comment at 1837 // the beginning of G1CMTask::do_marking_step() for those conditions - 1838 // one of which is reaching the specified time target.) It is only 1839 // when G1CMTask::do_marking_step() returns without setting the 1840 // has_aborted() flag that the marking step has completed. 1841 do { 1842 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1843 _task->do_marking_step(mark_step_duration_ms, 1844 false /* do_termination */, 1845 _is_serial); 1846 } while (_task->has_aborted() && !_cm->has_overflown()); 1847 _ref_counter = _ref_counter_limit; 1848 } 1849 } 1850 } 1851 }; 1852 1853 // 'Drain' oop closure used by both serial and parallel reference processing. 1854 // Uses the G1CMTask associated with a given worker thread (for serial 1855 // reference processing the G1CMtask for worker 0 is used). Calls the 1856 // do_marking_step routine, with an unbelievably large timeout value, 1857 // to drain the marking data structures of the remaining entries 1858 // added by the 'keep alive' oop closure above. 1859 1860 class G1CMDrainMarkingStackClosure: public VoidClosure { 1861 G1ConcurrentMark* _cm; 1862 G1CMTask* _task; 1863 bool _is_serial; 1864 public: 1865 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1866 _cm(cm), _task(task), _is_serial(is_serial) { 1867 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1868 } 1869 1870 void do_void() { 1871 do { 1872 // We call G1CMTask::do_marking_step() to completely drain the local 1873 // and global marking stacks of entries pushed by the 'keep alive' 1874 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1875 // 1876 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1877 // if there's nothing more to do (i.e. we've completely drained the 1878 // entries that were pushed as a a result of applying the 'keep alive' 1879 // closure to the entries on the discovered ref lists) or we overflow 1880 // the global marking stack. 1881 // 1882 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1883 // flag while there may still be some work to do. (See the comment at 1884 // the beginning of G1CMTask::do_marking_step() for those conditions - 1885 // one of which is reaching the specified time target.) It is only 1886 // when G1CMTask::do_marking_step() returns without setting the 1887 // has_aborted() flag that the marking step has completed. 1888 1889 _task->do_marking_step(1000000000.0 /* something very large */, 1890 true /* do_termination */, 1891 _is_serial); 1892 } while (_task->has_aborted() && !_cm->has_overflown()); 1893 } 1894 }; 1895 1896 // Implementation of AbstractRefProcTaskExecutor for parallel 1897 // reference processing at the end of G1 concurrent marking 1898 1899 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1900 private: 1901 G1CollectedHeap* _g1h; 1902 G1ConcurrentMark* _cm; 1903 WorkGang* _workers; 1904 uint _active_workers; 1905 1906 public: 1907 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1908 G1ConcurrentMark* cm, 1909 WorkGang* workers, 1910 uint n_workers) : 1911 _g1h(g1h), _cm(cm), 1912 _workers(workers), _active_workers(n_workers) { } 1913 1914 // Executes the given task using concurrent marking worker threads. 1915 virtual void execute(ProcessTask& task); 1916 virtual void execute(EnqueueTask& task); 1917 }; 1918 1919 class G1CMRefProcTaskProxy: public AbstractGangTask { 1920 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1921 ProcessTask& _proc_task; 1922 G1CollectedHeap* _g1h; 1923 G1ConcurrentMark* _cm; 1924 1925 public: 1926 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1927 G1CollectedHeap* g1h, 1928 G1ConcurrentMark* cm) : 1929 AbstractGangTask("Process reference objects in parallel"), 1930 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1931 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1932 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1933 } 1934 1935 virtual void work(uint worker_id) { 1936 ResourceMark rm; 1937 HandleMark hm; 1938 G1CMTask* task = _cm->task(worker_id); 1939 G1CMIsAliveClosure g1_is_alive(_g1h); 1940 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1941 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1942 1943 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1944 } 1945 }; 1946 1947 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1948 assert(_workers != NULL, "Need parallel worker threads."); 1949 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1950 1951 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1952 1953 // We need to reset the concurrency level before each 1954 // proxy task execution, so that the termination protocol 1955 // and overflow handling in G1CMTask::do_marking_step() knows 1956 // how many workers to wait for. 1957 _cm->set_concurrency(_active_workers); 1958 _workers->run_task(&proc_task_proxy); 1959 } 1960 1961 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1962 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1963 EnqueueTask& _enq_task; 1964 1965 public: 1966 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1967 AbstractGangTask("Enqueue reference objects in parallel"), 1968 _enq_task(enq_task) { } 1969 1970 virtual void work(uint worker_id) { 1971 _enq_task.work(worker_id); 1972 } 1973 }; 1974 1975 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1976 assert(_workers != NULL, "Need parallel worker threads."); 1977 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1978 1979 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1980 1981 // Not strictly necessary but... 1982 // 1983 // We need to reset the concurrency level before each 1984 // proxy task execution, so that the termination protocol 1985 // and overflow handling in G1CMTask::do_marking_step() knows 1986 // how many workers to wait for. 1987 _cm->set_concurrency(_active_workers); 1988 _workers->run_task(&enq_task_proxy); 1989 } 1990 1991 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 1992 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 1993 } 1994 1995 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1996 if (has_overflown()) { 1997 // Skip processing the discovered references if we have 1998 // overflown the global marking stack. Reference objects 1999 // only get discovered once so it is OK to not 2000 // de-populate the discovered reference lists. We could have, 2001 // but the only benefit would be that, when marking restarts, 2002 // less reference objects are discovered. 2003 return; 2004 } 2005 2006 ResourceMark rm; 2007 HandleMark hm; 2008 2009 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2010 2011 // Is alive closure. 2012 G1CMIsAliveClosure g1_is_alive(g1h); 2013 2014 // Inner scope to exclude the cleaning of the string and symbol 2015 // tables from the displayed time. 2016 { 2017 GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm()); 2018 2019 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2020 2021 // See the comment in G1CollectedHeap::ref_processing_init() 2022 // about how reference processing currently works in G1. 2023 2024 // Set the soft reference policy 2025 rp->setup_policy(clear_all_soft_refs); 2026 assert(_markStack.isEmpty(), "mark stack should be empty"); 2027 2028 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2029 // in serial reference processing. Note these closures are also 2030 // used for serially processing (by the the current thread) the 2031 // JNI references during parallel reference processing. 2032 // 2033 // These closures do not need to synchronize with the worker 2034 // threads involved in parallel reference processing as these 2035 // instances are executed serially by the current thread (e.g. 2036 // reference processing is not multi-threaded and is thus 2037 // performed by the current thread instead of a gang worker). 2038 // 2039 // The gang tasks involved in parallel reference processing create 2040 // their own instances of these closures, which do their own 2041 // synchronization among themselves. 2042 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2043 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2044 2045 // We need at least one active thread. If reference processing 2046 // is not multi-threaded we use the current (VMThread) thread, 2047 // otherwise we use the work gang from the G1CollectedHeap and 2048 // we utilize all the worker threads we can. 2049 bool processing_is_mt = rp->processing_is_mt(); 2050 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2051 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2052 2053 // Parallel processing task executor. 2054 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2055 g1h->workers(), active_workers); 2056 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2057 2058 // Set the concurrency level. The phase was already set prior to 2059 // executing the remark task. 2060 set_concurrency(active_workers); 2061 2062 // Set the degree of MT processing here. If the discovery was done MT, 2063 // the number of threads involved during discovery could differ from 2064 // the number of active workers. This is OK as long as the discovered 2065 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2066 rp->set_active_mt_degree(active_workers); 2067 2068 // Process the weak references. 2069 const ReferenceProcessorStats& stats = 2070 rp->process_discovered_references(&g1_is_alive, 2071 &g1_keep_alive, 2072 &g1_drain_mark_stack, 2073 executor, 2074 g1h->gc_timer_cm()); 2075 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2076 2077 // The do_oop work routines of the keep_alive and drain_marking_stack 2078 // oop closures will set the has_overflown flag if we overflow the 2079 // global marking stack. 2080 2081 assert(_markStack.overflow() || _markStack.isEmpty(), 2082 "mark stack should be empty (unless it overflowed)"); 2083 2084 if (_markStack.overflow()) { 2085 // This should have been done already when we tried to push an 2086 // entry on to the global mark stack. But let's do it again. 2087 set_has_overflown(); 2088 } 2089 2090 assert(rp->num_q() == active_workers, "why not"); 2091 2092 rp->enqueue_discovered_references(executor); 2093 2094 rp->verify_no_references_recorded(); 2095 assert(!rp->discovery_enabled(), "Post condition"); 2096 } 2097 2098 if (has_overflown()) { 2099 // We can not trust g1_is_alive if the marking stack overflowed 2100 return; 2101 } 2102 2103 assert(_markStack.isEmpty(), "Marking should have completed"); 2104 2105 // Unload Klasses, String, Symbols, Code Cache, etc. 2106 { 2107 GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm()); 2108 2109 if (ClassUnloadingWithConcurrentMark) { 2110 bool purged_classes; 2111 2112 { 2113 GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm()); 2114 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2115 } 2116 2117 { 2118 GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm()); 2119 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2120 } 2121 } 2122 2123 if (G1StringDedup::is_enabled()) { 2124 GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm()); 2125 G1StringDedup::unlink(&g1_is_alive); 2126 } 2127 } 2128 } 2129 2130 void G1ConcurrentMark::swapMarkBitMaps() { 2131 G1CMBitMapRO* temp = _prevMarkBitMap; 2132 _prevMarkBitMap = (G1CMBitMapRO*)_nextMarkBitMap; 2133 _nextMarkBitMap = (G1CMBitMap*) temp; 2134 } 2135 2136 // Closure for marking entries in SATB buffers. 2137 class G1CMSATBBufferClosure : public SATBBufferClosure { 2138 private: 2139 G1CMTask* _task; 2140 G1CollectedHeap* _g1h; 2141 2142 // This is very similar to G1CMTask::deal_with_reference, but with 2143 // more relaxed requirements for the argument, so this must be more 2144 // circumspect about treating the argument as an object. 2145 void do_entry(void* entry) const { 2146 _task->increment_refs_reached(); 2147 HeapRegion* hr = _g1h->heap_region_containing(entry); 2148 if (entry < hr->next_top_at_mark_start()) { 2149 // Until we get here, we don't know whether entry refers to a valid 2150 // object; it could instead have been a stale reference. 2151 oop obj = static_cast<oop>(entry); 2152 assert(obj->is_oop(true /* ignore mark word */), 2153 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2154 _task->make_reference_grey(obj, hr); 2155 } 2156 } 2157 2158 public: 2159 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 2160 : _task(task), _g1h(g1h) { } 2161 2162 virtual void do_buffer(void** buffer, size_t size) { 2163 for (size_t i = 0; i < size; ++i) { 2164 do_entry(buffer[i]); 2165 } 2166 } 2167 }; 2168 2169 class G1RemarkThreadsClosure : public ThreadClosure { 2170 G1CMSATBBufferClosure _cm_satb_cl; 2171 G1CMOopClosure _cm_cl; 2172 MarkingCodeBlobClosure _code_cl; 2173 int _thread_parity; 2174 2175 public: 2176 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 2177 _cm_satb_cl(task, g1h), 2178 _cm_cl(g1h, g1h->concurrent_mark(), task), 2179 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2180 _thread_parity(Threads::thread_claim_parity()) {} 2181 2182 void do_thread(Thread* thread) { 2183 if (thread->is_Java_thread()) { 2184 if (thread->claim_oops_do(true, _thread_parity)) { 2185 JavaThread* jt = (JavaThread*)thread; 2186 2187 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2188 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2189 // * Alive if on the stack of an executing method 2190 // * Weakly reachable otherwise 2191 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2192 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2193 jt->nmethods_do(&_code_cl); 2194 2195 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2196 } 2197 } else if (thread->is_VM_thread()) { 2198 if (thread->claim_oops_do(true, _thread_parity)) { 2199 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2200 } 2201 } 2202 } 2203 }; 2204 2205 class G1CMRemarkTask: public AbstractGangTask { 2206 private: 2207 G1ConcurrentMark* _cm; 2208 public: 2209 void work(uint worker_id) { 2210 // Since all available tasks are actually started, we should 2211 // only proceed if we're supposed to be active. 2212 if (worker_id < _cm->active_tasks()) { 2213 G1CMTask* task = _cm->task(worker_id); 2214 task->record_start_time(); 2215 { 2216 ResourceMark rm; 2217 HandleMark hm; 2218 2219 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2220 Threads::threads_do(&threads_f); 2221 } 2222 2223 do { 2224 task->do_marking_step(1000000000.0 /* something very large */, 2225 true /* do_termination */, 2226 false /* is_serial */); 2227 } while (task->has_aborted() && !_cm->has_overflown()); 2228 // If we overflow, then we do not want to restart. We instead 2229 // want to abort remark and do concurrent marking again. 2230 task->record_end_time(); 2231 } 2232 } 2233 2234 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 2235 AbstractGangTask("Par Remark"), _cm(cm) { 2236 _cm->terminator()->reset_for_reuse(active_workers); 2237 } 2238 }; 2239 2240 void G1ConcurrentMark::checkpointRootsFinalWork() { 2241 ResourceMark rm; 2242 HandleMark hm; 2243 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2244 2245 GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm()); 2246 2247 g1h->ensure_parsability(false); 2248 2249 // this is remark, so we'll use up all active threads 2250 uint active_workers = g1h->workers()->active_workers(); 2251 set_concurrency_and_phase(active_workers, false /* concurrent */); 2252 // Leave _parallel_marking_threads at it's 2253 // value originally calculated in the G1ConcurrentMark 2254 // constructor and pass values of the active workers 2255 // through the gang in the task. 2256 2257 { 2258 StrongRootsScope srs(active_workers); 2259 2260 G1CMRemarkTask remarkTask(this, active_workers); 2261 // We will start all available threads, even if we decide that the 2262 // active_workers will be fewer. The extra ones will just bail out 2263 // immediately. 2264 g1h->workers()->run_task(&remarkTask); 2265 } 2266 2267 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2268 guarantee(has_overflown() || 2269 satb_mq_set.completed_buffers_num() == 0, 2270 "Invariant: has_overflown = %s, num buffers = %d", 2271 BOOL_TO_STR(has_overflown()), 2272 satb_mq_set.completed_buffers_num()); 2273 2274 print_stats(); 2275 } 2276 2277 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2278 // Note we are overriding the read-only view of the prev map here, via 2279 // the cast. 2280 ((G1CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2281 } 2282 2283 HeapRegion* 2284 G1ConcurrentMark::claim_region(uint worker_id) { 2285 // "checkpoint" the finger 2286 HeapWord* finger = _finger; 2287 2288 // _heap_end will not change underneath our feet; it only changes at 2289 // yield points. 2290 while (finger < _heap_end) { 2291 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2292 2293 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2294 2295 // Above heap_region_containing may return NULL as we always scan claim 2296 // until the end of the heap. In this case, just jump to the next region. 2297 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2298 2299 // Is the gap between reading the finger and doing the CAS too long? 2300 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2301 if (res == finger && curr_region != NULL) { 2302 // we succeeded 2303 HeapWord* bottom = curr_region->bottom(); 2304 HeapWord* limit = curr_region->next_top_at_mark_start(); 2305 2306 // notice that _finger == end cannot be guaranteed here since, 2307 // someone else might have moved the finger even further 2308 assert(_finger >= end, "the finger should have moved forward"); 2309 2310 if (limit > bottom) { 2311 return curr_region; 2312 } else { 2313 assert(limit == bottom, 2314 "the region limit should be at bottom"); 2315 // we return NULL and the caller should try calling 2316 // claim_region() again. 2317 return NULL; 2318 } 2319 } else { 2320 assert(_finger > finger, "the finger should have moved forward"); 2321 // read it again 2322 finger = _finger; 2323 } 2324 } 2325 2326 return NULL; 2327 } 2328 2329 #ifndef PRODUCT 2330 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2331 private: 2332 G1CollectedHeap* _g1h; 2333 const char* _phase; 2334 int _info; 2335 2336 public: 2337 VerifyNoCSetOops(const char* phase, int info = -1) : 2338 _g1h(G1CollectedHeap::heap()), 2339 _phase(phase), 2340 _info(info) 2341 { } 2342 2343 void operator()(oop obj) const { 2344 guarantee(obj->is_oop(), 2345 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2346 p2i(obj), _phase, _info); 2347 guarantee(!_g1h->obj_in_cs(obj), 2348 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2349 p2i(obj), _phase, _info); 2350 } 2351 }; 2352 2353 void G1ConcurrentMark::verify_no_cset_oops() { 2354 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2355 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2356 return; 2357 } 2358 2359 // Verify entries on the global mark stack 2360 _markStack.iterate(VerifyNoCSetOops("Stack")); 2361 2362 // Verify entries on the task queues 2363 for (uint i = 0; i < _max_worker_id; ++i) { 2364 G1CMTaskQueue* queue = _task_queues->queue(i); 2365 queue->iterate(VerifyNoCSetOops("Queue", i)); 2366 } 2367 2368 // Verify the global finger 2369 HeapWord* global_finger = finger(); 2370 if (global_finger != NULL && global_finger < _heap_end) { 2371 // Since we always iterate over all regions, we might get a NULL HeapRegion 2372 // here. 2373 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2374 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2375 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2376 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2377 } 2378 2379 // Verify the task fingers 2380 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2381 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2382 G1CMTask* task = _tasks[i]; 2383 HeapWord* task_finger = task->finger(); 2384 if (task_finger != NULL && task_finger < _heap_end) { 2385 // See above note on the global finger verification. 2386 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2387 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2388 !task_hr->in_collection_set(), 2389 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2390 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2391 } 2392 } 2393 } 2394 #endif // PRODUCT 2395 2396 // Aggregate the counting data that was constructed concurrently 2397 // with marking. 2398 class AggregateCountDataHRClosure: public HeapRegionClosure { 2399 G1CollectedHeap* _g1h; 2400 G1ConcurrentMark* _cm; 2401 CardTableModRefBS* _ct_bs; 2402 BitMap* _cm_card_bm; 2403 uint _max_worker_id; 2404 2405 public: 2406 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2407 BitMap* cm_card_bm, 2408 uint max_worker_id) : 2409 _g1h(g1h), _cm(g1h->concurrent_mark()), 2410 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2411 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2412 2413 bool doHeapRegion(HeapRegion* hr) { 2414 HeapWord* start = hr->bottom(); 2415 HeapWord* limit = hr->next_top_at_mark_start(); 2416 HeapWord* end = hr->end(); 2417 2418 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2419 "Preconditions not met - " 2420 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2421 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2422 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2423 2424 assert(hr->next_marked_bytes() == 0, "Precondition"); 2425 2426 if (start == limit) { 2427 // NTAMS of this region has not been set so nothing to do. 2428 return false; 2429 } 2430 2431 // 'start' should be in the heap. 2432 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2433 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2434 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2435 2436 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2437 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2438 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2439 2440 // If ntams is not card aligned then we bump card bitmap index 2441 // for limit so that we get the all the cards spanned by 2442 // the object ending at ntams. 2443 // Note: if this is the last region in the heap then ntams 2444 // could be actually just beyond the end of the the heap; 2445 // limit_idx will then correspond to a (non-existent) card 2446 // that is also outside the heap. 2447 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2448 limit_idx += 1; 2449 } 2450 2451 assert(limit_idx <= end_idx, "or else use atomics"); 2452 2453 // Aggregate the "stripe" in the count data associated with hr. 2454 uint hrm_index = hr->hrm_index(); 2455 size_t marked_bytes = 0; 2456 2457 for (uint i = 0; i < _max_worker_id; i += 1) { 2458 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2459 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2460 2461 // Fetch the marked_bytes in this region for task i and 2462 // add it to the running total for this region. 2463 marked_bytes += marked_bytes_array[hrm_index]; 2464 2465 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2466 // into the global card bitmap. 2467 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2468 2469 while (scan_idx < limit_idx) { 2470 assert(task_card_bm->at(scan_idx) == true, "should be"); 2471 _cm_card_bm->set_bit(scan_idx); 2472 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2473 2474 // BitMap::get_next_one_offset() can handle the case when 2475 // its left_offset parameter is greater than its right_offset 2476 // parameter. It does, however, have an early exit if 2477 // left_offset == right_offset. So let's limit the value 2478 // passed in for left offset here. 2479 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2480 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2481 } 2482 } 2483 2484 // Update the marked bytes for this region. 2485 hr->add_to_marked_bytes(marked_bytes); 2486 2487 // Next heap region 2488 return false; 2489 } 2490 }; 2491 2492 class G1AggregateCountDataTask: public AbstractGangTask { 2493 protected: 2494 G1CollectedHeap* _g1h; 2495 G1ConcurrentMark* _cm; 2496 BitMap* _cm_card_bm; 2497 uint _max_worker_id; 2498 uint _active_workers; 2499 HeapRegionClaimer _hrclaimer; 2500 2501 public: 2502 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2503 G1ConcurrentMark* cm, 2504 BitMap* cm_card_bm, 2505 uint max_worker_id, 2506 uint n_workers) : 2507 AbstractGangTask("Count Aggregation"), 2508 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2509 _max_worker_id(max_worker_id), 2510 _active_workers(n_workers), 2511 _hrclaimer(_active_workers) { 2512 } 2513 2514 void work(uint worker_id) { 2515 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2516 2517 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2518 } 2519 }; 2520 2521 2522 void G1ConcurrentMark::aggregate_count_data() { 2523 uint n_workers = _g1h->workers()->active_workers(); 2524 2525 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2526 _max_worker_id, n_workers); 2527 2528 _g1h->workers()->run_task(&g1_par_agg_task); 2529 } 2530 2531 // Clear the per-worker arrays used to store the per-region counting data 2532 void G1ConcurrentMark::clear_all_count_data() { 2533 // Clear the global card bitmap - it will be filled during 2534 // liveness count aggregation (during remark) and the 2535 // final counting task. 2536 _card_bm.clear(); 2537 2538 // Clear the global region bitmap - it will be filled as part 2539 // of the final counting task. 2540 _region_bm.clear(); 2541 2542 uint max_regions = _g1h->max_regions(); 2543 assert(_max_worker_id > 0, "uninitialized"); 2544 2545 for (uint i = 0; i < _max_worker_id; i += 1) { 2546 BitMap* task_card_bm = count_card_bitmap_for(i); 2547 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2548 2549 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2550 assert(marked_bytes_array != NULL, "uninitialized"); 2551 2552 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2553 task_card_bm->clear(); 2554 } 2555 } 2556 2557 void G1ConcurrentMark::print_stats() { 2558 if (!log_is_enabled(Debug, gc, stats)) { 2559 return; 2560 } 2561 log_debug(gc, stats)("---------------------------------------------------------------------"); 2562 for (size_t i = 0; i < _active_tasks; ++i) { 2563 _tasks[i]->print_stats(); 2564 log_debug(gc, stats)("---------------------------------------------------------------------"); 2565 } 2566 } 2567 2568 // abandon current marking iteration due to a Full GC 2569 void G1ConcurrentMark::abort() { 2570 if (!cmThread()->during_cycle() || _has_aborted) { 2571 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2572 return; 2573 } 2574 2575 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2576 // concurrent bitmap clearing. 2577 _nextMarkBitMap->clearAll(); 2578 2579 // Note we cannot clear the previous marking bitmap here 2580 // since VerifyDuringGC verifies the objects marked during 2581 // a full GC against the previous bitmap. 2582 2583 // Clear the liveness counting data 2584 clear_all_count_data(); 2585 // Empty mark stack 2586 reset_marking_state(); 2587 for (uint i = 0; i < _max_worker_id; ++i) { 2588 _tasks[i]->clear_region_fields(); 2589 } 2590 _first_overflow_barrier_sync.abort(); 2591 _second_overflow_barrier_sync.abort(); 2592 _has_aborted = true; 2593 2594 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2595 satb_mq_set.abandon_partial_marking(); 2596 // This can be called either during or outside marking, we'll read 2597 // the expected_active value from the SATB queue set. 2598 satb_mq_set.set_active_all_threads( 2599 false, /* new active value */ 2600 satb_mq_set.is_active() /* expected_active */); 2601 2602 _g1h->trace_heap_after_concurrent_cycle(); 2603 2604 // Close any open concurrent phase timing 2605 register_concurrent_phase_end(); 2606 2607 _g1h->register_concurrent_cycle_end(); 2608 } 2609 2610 static void print_ms_time_info(const char* prefix, const char* name, 2611 NumberSeq& ns) { 2612 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2613 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2614 if (ns.num() > 0) { 2615 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2616 prefix, ns.sd(), ns.maximum()); 2617 } 2618 } 2619 2620 void G1ConcurrentMark::print_summary_info() { 2621 LogHandle(gc, marking) log; 2622 if (!log.is_trace()) { 2623 return; 2624 } 2625 2626 log.trace(" Concurrent marking:"); 2627 print_ms_time_info(" ", "init marks", _init_times); 2628 print_ms_time_info(" ", "remarks", _remark_times); 2629 { 2630 print_ms_time_info(" ", "final marks", _remark_mark_times); 2631 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2632 2633 } 2634 print_ms_time_info(" ", "cleanups", _cleanup_times); 2635 log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2636 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2637 if (G1ScrubRemSets) { 2638 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2639 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2640 } 2641 log.trace(" Total stop_world time = %8.2f s.", 2642 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2643 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2644 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2645 } 2646 2647 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2648 _parallel_workers->print_worker_threads_on(st); 2649 } 2650 2651 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2652 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2653 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2654 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2655 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2656 } 2657 2658 // We take a break if someone is trying to stop the world. 2659 bool G1ConcurrentMark::do_yield_check(uint worker_id) { 2660 if (SuspendibleThreadSet::should_yield()) { 2661 if (worker_id == 0) { 2662 _g1h->g1_policy()->record_concurrent_pause(); 2663 } 2664 SuspendibleThreadSet::yield(); 2665 return true; 2666 } else { 2667 return false; 2668 } 2669 } 2670 2671 // Closure for iteration over bitmaps 2672 class G1CMBitMapClosure : public BitMapClosure { 2673 private: 2674 // the bitmap that is being iterated over 2675 G1CMBitMap* _nextMarkBitMap; 2676 G1ConcurrentMark* _cm; 2677 G1CMTask* _task; 2678 2679 public: 2680 G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) : 2681 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2682 2683 bool do_bit(size_t offset) { 2684 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2685 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2686 assert( addr < _cm->finger(), "invariant"); 2687 assert(addr >= _task->finger(), "invariant"); 2688 2689 // We move that task's local finger along. 2690 _task->move_finger_to(addr); 2691 2692 _task->scan_object(oop(addr)); 2693 // we only partially drain the local queue and global stack 2694 _task->drain_local_queue(true); 2695 _task->drain_global_stack(true); 2696 2697 // if the has_aborted flag has been raised, we need to bail out of 2698 // the iteration 2699 return !_task->has_aborted(); 2700 } 2701 }; 2702 2703 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2704 ReferenceProcessor* result = NULL; 2705 if (G1UseConcMarkReferenceProcessing) { 2706 result = g1h->ref_processor_cm(); 2707 assert(result != NULL, "should not be NULL"); 2708 } 2709 return result; 2710 } 2711 2712 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2713 G1ConcurrentMark* cm, 2714 G1CMTask* task) 2715 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2716 _g1h(g1h), _cm(cm), _task(task) 2717 { } 2718 2719 void G1CMTask::setup_for_region(HeapRegion* hr) { 2720 assert(hr != NULL, 2721 "claim_region() should have filtered out NULL regions"); 2722 _curr_region = hr; 2723 _finger = hr->bottom(); 2724 update_region_limit(); 2725 } 2726 2727 void G1CMTask::update_region_limit() { 2728 HeapRegion* hr = _curr_region; 2729 HeapWord* bottom = hr->bottom(); 2730 HeapWord* limit = hr->next_top_at_mark_start(); 2731 2732 if (limit == bottom) { 2733 // The region was collected underneath our feet. 2734 // We set the finger to bottom to ensure that the bitmap 2735 // iteration that will follow this will not do anything. 2736 // (this is not a condition that holds when we set the region up, 2737 // as the region is not supposed to be empty in the first place) 2738 _finger = bottom; 2739 } else if (limit >= _region_limit) { 2740 assert(limit >= _finger, "peace of mind"); 2741 } else { 2742 assert(limit < _region_limit, "only way to get here"); 2743 // This can happen under some pretty unusual circumstances. An 2744 // evacuation pause empties the region underneath our feet (NTAMS 2745 // at bottom). We then do some allocation in the region (NTAMS 2746 // stays at bottom), followed by the region being used as a GC 2747 // alloc region (NTAMS will move to top() and the objects 2748 // originally below it will be grayed). All objects now marked in 2749 // the region are explicitly grayed, if below the global finger, 2750 // and we do not need in fact to scan anything else. So, we simply 2751 // set _finger to be limit to ensure that the bitmap iteration 2752 // doesn't do anything. 2753 _finger = limit; 2754 } 2755 2756 _region_limit = limit; 2757 } 2758 2759 void G1CMTask::giveup_current_region() { 2760 assert(_curr_region != NULL, "invariant"); 2761 clear_region_fields(); 2762 } 2763 2764 void G1CMTask::clear_region_fields() { 2765 // Values for these three fields that indicate that we're not 2766 // holding on to a region. 2767 _curr_region = NULL; 2768 _finger = NULL; 2769 _region_limit = NULL; 2770 } 2771 2772 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2773 if (cm_oop_closure == NULL) { 2774 assert(_cm_oop_closure != NULL, "invariant"); 2775 } else { 2776 assert(_cm_oop_closure == NULL, "invariant"); 2777 } 2778 _cm_oop_closure = cm_oop_closure; 2779 } 2780 2781 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2782 guarantee(nextMarkBitMap != NULL, "invariant"); 2783 _nextMarkBitMap = nextMarkBitMap; 2784 clear_region_fields(); 2785 2786 _calls = 0; 2787 _elapsed_time_ms = 0.0; 2788 _termination_time_ms = 0.0; 2789 _termination_start_time_ms = 0.0; 2790 } 2791 2792 bool G1CMTask::should_exit_termination() { 2793 regular_clock_call(); 2794 // This is called when we are in the termination protocol. We should 2795 // quit if, for some reason, this task wants to abort or the global 2796 // stack is not empty (this means that we can get work from it). 2797 return !_cm->mark_stack_empty() || has_aborted(); 2798 } 2799 2800 void G1CMTask::reached_limit() { 2801 assert(_words_scanned >= _words_scanned_limit || 2802 _refs_reached >= _refs_reached_limit , 2803 "shouldn't have been called otherwise"); 2804 regular_clock_call(); 2805 } 2806 2807 void G1CMTask::regular_clock_call() { 2808 if (has_aborted()) return; 2809 2810 // First, we need to recalculate the words scanned and refs reached 2811 // limits for the next clock call. 2812 recalculate_limits(); 2813 2814 // During the regular clock call we do the following 2815 2816 // (1) If an overflow has been flagged, then we abort. 2817 if (_cm->has_overflown()) { 2818 set_has_aborted(); 2819 return; 2820 } 2821 2822 // If we are not concurrent (i.e. we're doing remark) we don't need 2823 // to check anything else. The other steps are only needed during 2824 // the concurrent marking phase. 2825 if (!concurrent()) return; 2826 2827 // (2) If marking has been aborted for Full GC, then we also abort. 2828 if (_cm->has_aborted()) { 2829 set_has_aborted(); 2830 return; 2831 } 2832 2833 double curr_time_ms = os::elapsedVTime() * 1000.0; 2834 2835 // (4) We check whether we should yield. If we have to, then we abort. 2836 if (SuspendibleThreadSet::should_yield()) { 2837 // We should yield. To do this we abort the task. The caller is 2838 // responsible for yielding. 2839 set_has_aborted(); 2840 return; 2841 } 2842 2843 // (5) We check whether we've reached our time quota. If we have, 2844 // then we abort. 2845 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2846 if (elapsed_time_ms > _time_target_ms) { 2847 set_has_aborted(); 2848 _has_timed_out = true; 2849 return; 2850 } 2851 2852 // (6) Finally, we check whether there are enough completed STAB 2853 // buffers available for processing. If there are, we abort. 2854 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2855 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2856 // we do need to process SATB buffers, we'll abort and restart 2857 // the marking task to do so 2858 set_has_aborted(); 2859 return; 2860 } 2861 } 2862 2863 void G1CMTask::recalculate_limits() { 2864 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2865 _words_scanned_limit = _real_words_scanned_limit; 2866 2867 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2868 _refs_reached_limit = _real_refs_reached_limit; 2869 } 2870 2871 void G1CMTask::decrease_limits() { 2872 // This is called when we believe that we're going to do an infrequent 2873 // operation which will increase the per byte scanned cost (i.e. move 2874 // entries to/from the global stack). It basically tries to decrease the 2875 // scanning limit so that the clock is called earlier. 2876 2877 _words_scanned_limit = _real_words_scanned_limit - 2878 3 * words_scanned_period / 4; 2879 _refs_reached_limit = _real_refs_reached_limit - 2880 3 * refs_reached_period / 4; 2881 } 2882 2883 void G1CMTask::move_entries_to_global_stack() { 2884 // local array where we'll store the entries that will be popped 2885 // from the local queue 2886 oop buffer[global_stack_transfer_size]; 2887 2888 int n = 0; 2889 oop obj; 2890 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 2891 buffer[n] = obj; 2892 ++n; 2893 } 2894 2895 if (n > 0) { 2896 // we popped at least one entry from the local queue 2897 2898 if (!_cm->mark_stack_push(buffer, n)) { 2899 set_has_aborted(); 2900 } 2901 } 2902 2903 // this operation was quite expensive, so decrease the limits 2904 decrease_limits(); 2905 } 2906 2907 void G1CMTask::get_entries_from_global_stack() { 2908 // local array where we'll store the entries that will be popped 2909 // from the global stack. 2910 oop buffer[global_stack_transfer_size]; 2911 int n; 2912 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 2913 assert(n <= global_stack_transfer_size, 2914 "we should not pop more than the given limit"); 2915 if (n > 0) { 2916 // yes, we did actually pop at least one entry 2917 for (int i = 0; i < n; ++i) { 2918 bool success = _task_queue->push(buffer[i]); 2919 // We only call this when the local queue is empty or under a 2920 // given target limit. So, we do not expect this push to fail. 2921 assert(success, "invariant"); 2922 } 2923 } 2924 2925 // this operation was quite expensive, so decrease the limits 2926 decrease_limits(); 2927 } 2928 2929 void G1CMTask::drain_local_queue(bool partially) { 2930 if (has_aborted()) return; 2931 2932 // Decide what the target size is, depending whether we're going to 2933 // drain it partially (so that other tasks can steal if they run out 2934 // of things to do) or totally (at the very end). 2935 size_t target_size; 2936 if (partially) { 2937 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2938 } else { 2939 target_size = 0; 2940 } 2941 2942 if (_task_queue->size() > target_size) { 2943 oop obj; 2944 bool ret = _task_queue->pop_local(obj); 2945 while (ret) { 2946 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 2947 assert(!_g1h->is_on_master_free_list( 2948 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 2949 2950 scan_object(obj); 2951 2952 if (_task_queue->size() <= target_size || has_aborted()) { 2953 ret = false; 2954 } else { 2955 ret = _task_queue->pop_local(obj); 2956 } 2957 } 2958 } 2959 } 2960 2961 void G1CMTask::drain_global_stack(bool partially) { 2962 if (has_aborted()) return; 2963 2964 // We have a policy to drain the local queue before we attempt to 2965 // drain the global stack. 2966 assert(partially || _task_queue->size() == 0, "invariant"); 2967 2968 // Decide what the target size is, depending whether we're going to 2969 // drain it partially (so that other tasks can steal if they run out 2970 // of things to do) or totally (at the very end). Notice that, 2971 // because we move entries from the global stack in chunks or 2972 // because another task might be doing the same, we might in fact 2973 // drop below the target. But, this is not a problem. 2974 size_t target_size; 2975 if (partially) { 2976 target_size = _cm->partial_mark_stack_size_target(); 2977 } else { 2978 target_size = 0; 2979 } 2980 2981 if (_cm->mark_stack_size() > target_size) { 2982 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2983 get_entries_from_global_stack(); 2984 drain_local_queue(partially); 2985 } 2986 } 2987 } 2988 2989 // SATB Queue has several assumptions on whether to call the par or 2990 // non-par versions of the methods. this is why some of the code is 2991 // replicated. We should really get rid of the single-threaded version 2992 // of the code to simplify things. 2993 void G1CMTask::drain_satb_buffers() { 2994 if (has_aborted()) return; 2995 2996 // We set this so that the regular clock knows that we're in the 2997 // middle of draining buffers and doesn't set the abort flag when it 2998 // notices that SATB buffers are available for draining. It'd be 2999 // very counter productive if it did that. :-) 3000 _draining_satb_buffers = true; 3001 3002 G1CMSATBBufferClosure satb_cl(this, _g1h); 3003 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3004 3005 // This keeps claiming and applying the closure to completed buffers 3006 // until we run out of buffers or we need to abort. 3007 while (!has_aborted() && 3008 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3009 regular_clock_call(); 3010 } 3011 3012 _draining_satb_buffers = false; 3013 3014 assert(has_aborted() || 3015 concurrent() || 3016 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3017 3018 // again, this was a potentially expensive operation, decrease the 3019 // limits to get the regular clock call early 3020 decrease_limits(); 3021 } 3022 3023 void G1CMTask::print_stats() { 3024 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 3025 _worker_id, _calls); 3026 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3027 _elapsed_time_ms, _termination_time_ms); 3028 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3029 _step_times_ms.num(), _step_times_ms.avg(), 3030 _step_times_ms.sd()); 3031 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 3032 _step_times_ms.maximum(), _step_times_ms.sum()); 3033 } 3034 3035 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3036 return _task_queues->steal(worker_id, hash_seed, obj); 3037 } 3038 3039 /***************************************************************************** 3040 3041 The do_marking_step(time_target_ms, ...) method is the building 3042 block of the parallel marking framework. It can be called in parallel 3043 with other invocations of do_marking_step() on different tasks 3044 (but only one per task, obviously) and concurrently with the 3045 mutator threads, or during remark, hence it eliminates the need 3046 for two versions of the code. When called during remark, it will 3047 pick up from where the task left off during the concurrent marking 3048 phase. Interestingly, tasks are also claimable during evacuation 3049 pauses too, since do_marking_step() ensures that it aborts before 3050 it needs to yield. 3051 3052 The data structures that it uses to do marking work are the 3053 following: 3054 3055 (1) Marking Bitmap. If there are gray objects that appear only 3056 on the bitmap (this happens either when dealing with an overflow 3057 or when the initial marking phase has simply marked the roots 3058 and didn't push them on the stack), then tasks claim heap 3059 regions whose bitmap they then scan to find gray objects. A 3060 global finger indicates where the end of the last claimed region 3061 is. A local finger indicates how far into the region a task has 3062 scanned. The two fingers are used to determine how to gray an 3063 object (i.e. whether simply marking it is OK, as it will be 3064 visited by a task in the future, or whether it needs to be also 3065 pushed on a stack). 3066 3067 (2) Local Queue. The local queue of the task which is accessed 3068 reasonably efficiently by the task. Other tasks can steal from 3069 it when they run out of work. Throughout the marking phase, a 3070 task attempts to keep its local queue short but not totally 3071 empty, so that entries are available for stealing by other 3072 tasks. Only when there is no more work, a task will totally 3073 drain its local queue. 3074 3075 (3) Global Mark Stack. This handles local queue overflow. During 3076 marking only sets of entries are moved between it and the local 3077 queues, as access to it requires a mutex and more fine-grain 3078 interaction with it which might cause contention. If it 3079 overflows, then the marking phase should restart and iterate 3080 over the bitmap to identify gray objects. Throughout the marking 3081 phase, tasks attempt to keep the global mark stack at a small 3082 length but not totally empty, so that entries are available for 3083 popping by other tasks. Only when there is no more work, tasks 3084 will totally drain the global mark stack. 3085 3086 (4) SATB Buffer Queue. This is where completed SATB buffers are 3087 made available. Buffers are regularly removed from this queue 3088 and scanned for roots, so that the queue doesn't get too 3089 long. During remark, all completed buffers are processed, as 3090 well as the filled in parts of any uncompleted buffers. 3091 3092 The do_marking_step() method tries to abort when the time target 3093 has been reached. There are a few other cases when the 3094 do_marking_step() method also aborts: 3095 3096 (1) When the marking phase has been aborted (after a Full GC). 3097 3098 (2) When a global overflow (on the global stack) has been 3099 triggered. Before the task aborts, it will actually sync up with 3100 the other tasks to ensure that all the marking data structures 3101 (local queues, stacks, fingers etc.) are re-initialized so that 3102 when do_marking_step() completes, the marking phase can 3103 immediately restart. 3104 3105 (3) When enough completed SATB buffers are available. The 3106 do_marking_step() method only tries to drain SATB buffers right 3107 at the beginning. So, if enough buffers are available, the 3108 marking step aborts and the SATB buffers are processed at 3109 the beginning of the next invocation. 3110 3111 (4) To yield. when we have to yield then we abort and yield 3112 right at the end of do_marking_step(). This saves us from a lot 3113 of hassle as, by yielding we might allow a Full GC. If this 3114 happens then objects will be compacted underneath our feet, the 3115 heap might shrink, etc. We save checking for this by just 3116 aborting and doing the yield right at the end. 3117 3118 From the above it follows that the do_marking_step() method should 3119 be called in a loop (or, otherwise, regularly) until it completes. 3120 3121 If a marking step completes without its has_aborted() flag being 3122 true, it means it has completed the current marking phase (and 3123 also all other marking tasks have done so and have all synced up). 3124 3125 A method called regular_clock_call() is invoked "regularly" (in 3126 sub ms intervals) throughout marking. It is this clock method that 3127 checks all the abort conditions which were mentioned above and 3128 decides when the task should abort. A work-based scheme is used to 3129 trigger this clock method: when the number of object words the 3130 marking phase has scanned or the number of references the marking 3131 phase has visited reach a given limit. Additional invocations to 3132 the method clock have been planted in a few other strategic places 3133 too. The initial reason for the clock method was to avoid calling 3134 vtime too regularly, as it is quite expensive. So, once it was in 3135 place, it was natural to piggy-back all the other conditions on it 3136 too and not constantly check them throughout the code. 3137 3138 If do_termination is true then do_marking_step will enter its 3139 termination protocol. 3140 3141 The value of is_serial must be true when do_marking_step is being 3142 called serially (i.e. by the VMThread) and do_marking_step should 3143 skip any synchronization in the termination and overflow code. 3144 Examples include the serial remark code and the serial reference 3145 processing closures. 3146 3147 The value of is_serial must be false when do_marking_step is 3148 being called by any of the worker threads in a work gang. 3149 Examples include the concurrent marking code (CMMarkingTask), 3150 the MT remark code, and the MT reference processing closures. 3151 3152 *****************************************************************************/ 3153 3154 void G1CMTask::do_marking_step(double time_target_ms, 3155 bool do_termination, 3156 bool is_serial) { 3157 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3158 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3159 3160 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3161 assert(_task_queues != NULL, "invariant"); 3162 assert(_task_queue != NULL, "invariant"); 3163 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3164 3165 assert(!_claimed, 3166 "only one thread should claim this task at any one time"); 3167 3168 // OK, this doesn't safeguard again all possible scenarios, as it is 3169 // possible for two threads to set the _claimed flag at the same 3170 // time. But it is only for debugging purposes anyway and it will 3171 // catch most problems. 3172 _claimed = true; 3173 3174 _start_time_ms = os::elapsedVTime() * 1000.0; 3175 3176 // If do_stealing is true then do_marking_step will attempt to 3177 // steal work from the other G1CMTasks. It only makes sense to 3178 // enable stealing when the termination protocol is enabled 3179 // and do_marking_step() is not being called serially. 3180 bool do_stealing = do_termination && !is_serial; 3181 3182 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3183 _time_target_ms = time_target_ms - diff_prediction_ms; 3184 3185 // set up the variables that are used in the work-based scheme to 3186 // call the regular clock method 3187 _words_scanned = 0; 3188 _refs_reached = 0; 3189 recalculate_limits(); 3190 3191 // clear all flags 3192 clear_has_aborted(); 3193 _has_timed_out = false; 3194 _draining_satb_buffers = false; 3195 3196 ++_calls; 3197 3198 // Set up the bitmap and oop closures. Anything that uses them is 3199 // eventually called from this method, so it is OK to allocate these 3200 // statically. 3201 G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3202 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3203 set_cm_oop_closure(&cm_oop_closure); 3204 3205 if (_cm->has_overflown()) { 3206 // This can happen if the mark stack overflows during a GC pause 3207 // and this task, after a yield point, restarts. We have to abort 3208 // as we need to get into the overflow protocol which happens 3209 // right at the end of this task. 3210 set_has_aborted(); 3211 } 3212 3213 // First drain any available SATB buffers. After this, we will not 3214 // look at SATB buffers before the next invocation of this method. 3215 // If enough completed SATB buffers are queued up, the regular clock 3216 // will abort this task so that it restarts. 3217 drain_satb_buffers(); 3218 // ...then partially drain the local queue and the global stack 3219 drain_local_queue(true); 3220 drain_global_stack(true); 3221 3222 do { 3223 if (!has_aborted() && _curr_region != NULL) { 3224 // This means that we're already holding on to a region. 3225 assert(_finger != NULL, "if region is not NULL, then the finger " 3226 "should not be NULL either"); 3227 3228 // We might have restarted this task after an evacuation pause 3229 // which might have evacuated the region we're holding on to 3230 // underneath our feet. Let's read its limit again to make sure 3231 // that we do not iterate over a region of the heap that 3232 // contains garbage (update_region_limit() will also move 3233 // _finger to the start of the region if it is found empty). 3234 update_region_limit(); 3235 // We will start from _finger not from the start of the region, 3236 // as we might be restarting this task after aborting half-way 3237 // through scanning this region. In this case, _finger points to 3238 // the address where we last found a marked object. If this is a 3239 // fresh region, _finger points to start(). 3240 MemRegion mr = MemRegion(_finger, _region_limit); 3241 3242 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3243 "humongous regions should go around loop once only"); 3244 3245 // Some special cases: 3246 // If the memory region is empty, we can just give up the region. 3247 // If the current region is humongous then we only need to check 3248 // the bitmap for the bit associated with the start of the object, 3249 // scan the object if it's live, and give up the region. 3250 // Otherwise, let's iterate over the bitmap of the part of the region 3251 // that is left. 3252 // If the iteration is successful, give up the region. 3253 if (mr.is_empty()) { 3254 giveup_current_region(); 3255 regular_clock_call(); 3256 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3257 if (_nextMarkBitMap->isMarked(mr.start())) { 3258 // The object is marked - apply the closure 3259 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3260 bitmap_closure.do_bit(offset); 3261 } 3262 // Even if this task aborted while scanning the humongous object 3263 // we can (and should) give up the current region. 3264 giveup_current_region(); 3265 regular_clock_call(); 3266 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3267 giveup_current_region(); 3268 regular_clock_call(); 3269 } else { 3270 assert(has_aborted(), "currently the only way to do so"); 3271 // The only way to abort the bitmap iteration is to return 3272 // false from the do_bit() method. However, inside the 3273 // do_bit() method we move the _finger to point to the 3274 // object currently being looked at. So, if we bail out, we 3275 // have definitely set _finger to something non-null. 3276 assert(_finger != NULL, "invariant"); 3277 3278 // Region iteration was actually aborted. So now _finger 3279 // points to the address of the object we last scanned. If we 3280 // leave it there, when we restart this task, we will rescan 3281 // the object. It is easy to avoid this. We move the finger by 3282 // enough to point to the next possible object header (the 3283 // bitmap knows by how much we need to move it as it knows its 3284 // granularity). 3285 assert(_finger < _region_limit, "invariant"); 3286 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3287 // Check if bitmap iteration was aborted while scanning the last object 3288 if (new_finger >= _region_limit) { 3289 giveup_current_region(); 3290 } else { 3291 move_finger_to(new_finger); 3292 } 3293 } 3294 } 3295 // At this point we have either completed iterating over the 3296 // region we were holding on to, or we have aborted. 3297 3298 // We then partially drain the local queue and the global stack. 3299 // (Do we really need this?) 3300 drain_local_queue(true); 3301 drain_global_stack(true); 3302 3303 // Read the note on the claim_region() method on why it might 3304 // return NULL with potentially more regions available for 3305 // claiming and why we have to check out_of_regions() to determine 3306 // whether we're done or not. 3307 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3308 // We are going to try to claim a new region. We should have 3309 // given up on the previous one. 3310 // Separated the asserts so that we know which one fires. 3311 assert(_curr_region == NULL, "invariant"); 3312 assert(_finger == NULL, "invariant"); 3313 assert(_region_limit == NULL, "invariant"); 3314 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3315 if (claimed_region != NULL) { 3316 // Yes, we managed to claim one 3317 setup_for_region(claimed_region); 3318 assert(_curr_region == claimed_region, "invariant"); 3319 } 3320 // It is important to call the regular clock here. It might take 3321 // a while to claim a region if, for example, we hit a large 3322 // block of empty regions. So we need to call the regular clock 3323 // method once round the loop to make sure it's called 3324 // frequently enough. 3325 regular_clock_call(); 3326 } 3327 3328 if (!has_aborted() && _curr_region == NULL) { 3329 assert(_cm->out_of_regions(), 3330 "at this point we should be out of regions"); 3331 } 3332 } while ( _curr_region != NULL && !has_aborted()); 3333 3334 if (!has_aborted()) { 3335 // We cannot check whether the global stack is empty, since other 3336 // tasks might be pushing objects to it concurrently. 3337 assert(_cm->out_of_regions(), 3338 "at this point we should be out of regions"); 3339 // Try to reduce the number of available SATB buffers so that 3340 // remark has less work to do. 3341 drain_satb_buffers(); 3342 } 3343 3344 // Since we've done everything else, we can now totally drain the 3345 // local queue and global stack. 3346 drain_local_queue(false); 3347 drain_global_stack(false); 3348 3349 // Attempt at work stealing from other task's queues. 3350 if (do_stealing && !has_aborted()) { 3351 // We have not aborted. This means that we have finished all that 3352 // we could. Let's try to do some stealing... 3353 3354 // We cannot check whether the global stack is empty, since other 3355 // tasks might be pushing objects to it concurrently. 3356 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3357 "only way to reach here"); 3358 while (!has_aborted()) { 3359 oop obj; 3360 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3361 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3362 "any stolen object should be marked"); 3363 scan_object(obj); 3364 3365 // And since we're towards the end, let's totally drain the 3366 // local queue and global stack. 3367 drain_local_queue(false); 3368 drain_global_stack(false); 3369 } else { 3370 break; 3371 } 3372 } 3373 } 3374 3375 // We still haven't aborted. Now, let's try to get into the 3376 // termination protocol. 3377 if (do_termination && !has_aborted()) { 3378 // We cannot check whether the global stack is empty, since other 3379 // tasks might be concurrently pushing objects on it. 3380 // Separated the asserts so that we know which one fires. 3381 assert(_cm->out_of_regions(), "only way to reach here"); 3382 assert(_task_queue->size() == 0, "only way to reach here"); 3383 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3384 3385 // The G1CMTask class also extends the TerminatorTerminator class, 3386 // hence its should_exit_termination() method will also decide 3387 // whether to exit the termination protocol or not. 3388 bool finished = (is_serial || 3389 _cm->terminator()->offer_termination(this)); 3390 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3391 _termination_time_ms += 3392 termination_end_time_ms - _termination_start_time_ms; 3393 3394 if (finished) { 3395 // We're all done. 3396 3397 if (_worker_id == 0) { 3398 // let's allow task 0 to do this 3399 if (concurrent()) { 3400 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3401 // we need to set this to false before the next 3402 // safepoint. This way we ensure that the marking phase 3403 // doesn't observe any more heap expansions. 3404 _cm->clear_concurrent_marking_in_progress(); 3405 } 3406 } 3407 3408 // We can now guarantee that the global stack is empty, since 3409 // all other tasks have finished. We separated the guarantees so 3410 // that, if a condition is false, we can immediately find out 3411 // which one. 3412 guarantee(_cm->out_of_regions(), "only way to reach here"); 3413 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3414 guarantee(_task_queue->size() == 0, "only way to reach here"); 3415 guarantee(!_cm->has_overflown(), "only way to reach here"); 3416 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3417 } else { 3418 // Apparently there's more work to do. Let's abort this task. It 3419 // will restart it and we can hopefully find more things to do. 3420 set_has_aborted(); 3421 } 3422 } 3423 3424 // Mainly for debugging purposes to make sure that a pointer to the 3425 // closure which was statically allocated in this frame doesn't 3426 // escape it by accident. 3427 set_cm_oop_closure(NULL); 3428 double end_time_ms = os::elapsedVTime() * 1000.0; 3429 double elapsed_time_ms = end_time_ms - _start_time_ms; 3430 // Update the step history. 3431 _step_times_ms.add(elapsed_time_ms); 3432 3433 if (has_aborted()) { 3434 // The task was aborted for some reason. 3435 if (_has_timed_out) { 3436 double diff_ms = elapsed_time_ms - _time_target_ms; 3437 // Keep statistics of how well we did with respect to hitting 3438 // our target only if we actually timed out (if we aborted for 3439 // other reasons, then the results might get skewed). 3440 _marking_step_diffs_ms.add(diff_ms); 3441 } 3442 3443 if (_cm->has_overflown()) { 3444 // This is the interesting one. We aborted because a global 3445 // overflow was raised. This means we have to restart the 3446 // marking phase and start iterating over regions. However, in 3447 // order to do this we have to make sure that all tasks stop 3448 // what they are doing and re-initialize in a safe manner. We 3449 // will achieve this with the use of two barrier sync points. 3450 3451 if (!is_serial) { 3452 // We only need to enter the sync barrier if being called 3453 // from a parallel context 3454 _cm->enter_first_sync_barrier(_worker_id); 3455 3456 // When we exit this sync barrier we know that all tasks have 3457 // stopped doing marking work. So, it's now safe to 3458 // re-initialize our data structures. At the end of this method, 3459 // task 0 will clear the global data structures. 3460 } 3461 3462 // We clear the local state of this task... 3463 clear_region_fields(); 3464 3465 if (!is_serial) { 3466 // ...and enter the second barrier. 3467 _cm->enter_second_sync_barrier(_worker_id); 3468 } 3469 // At this point, if we're during the concurrent phase of 3470 // marking, everything has been re-initialized and we're 3471 // ready to restart. 3472 } 3473 } 3474 3475 _claimed = false; 3476 } 3477 3478 G1CMTask::G1CMTask(uint worker_id, 3479 G1ConcurrentMark* cm, 3480 size_t* marked_bytes, 3481 BitMap* card_bm, 3482 G1CMTaskQueue* task_queue, 3483 G1CMTaskQueueSet* task_queues) 3484 : _g1h(G1CollectedHeap::heap()), 3485 _worker_id(worker_id), _cm(cm), 3486 _claimed(false), 3487 _nextMarkBitMap(NULL), _hash_seed(17), 3488 _task_queue(task_queue), 3489 _task_queues(task_queues), 3490 _cm_oop_closure(NULL), 3491 _marked_bytes_array(marked_bytes), 3492 _card_bm(card_bm) { 3493 guarantee(task_queue != NULL, "invariant"); 3494 guarantee(task_queues != NULL, "invariant"); 3495 3496 _marking_step_diffs_ms.add(0.5); 3497 } 3498 3499 // These are formatting macros that are used below to ensure 3500 // consistent formatting. The *_H_* versions are used to format the 3501 // header for a particular value and they should be kept consistent 3502 // with the corresponding macro. Also note that most of the macros add 3503 // the necessary white space (as a prefix) which makes them a bit 3504 // easier to compose. 3505 3506 // All the output lines are prefixed with this string to be able to 3507 // identify them easily in a large log file. 3508 #define G1PPRL_LINE_PREFIX "###" 3509 3510 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3511 #ifdef _LP64 3512 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3513 #else // _LP64 3514 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3515 #endif // _LP64 3516 3517 // For per-region info 3518 #define G1PPRL_TYPE_FORMAT " %-4s" 3519 #define G1PPRL_TYPE_H_FORMAT " %4s" 3520 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3521 #define G1PPRL_BYTE_H_FORMAT " %9s" 3522 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3523 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3524 3525 // For summary info 3526 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3527 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3528 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3529 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3530 3531 G1PrintRegionLivenessInfoClosure:: 3532 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3533 : _total_used_bytes(0), _total_capacity_bytes(0), 3534 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3535 _hum_used_bytes(0), _hum_capacity_bytes(0), 3536 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3537 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3538 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3539 MemRegion g1_reserved = g1h->g1_reserved(); 3540 double now = os::elapsedTime(); 3541 3542 // Print the header of the output. 3543 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3544 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3545 G1PPRL_SUM_ADDR_FORMAT("reserved") 3546 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3547 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3548 HeapRegion::GrainBytes); 3549 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3550 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3551 G1PPRL_TYPE_H_FORMAT 3552 G1PPRL_ADDR_BASE_H_FORMAT 3553 G1PPRL_BYTE_H_FORMAT 3554 G1PPRL_BYTE_H_FORMAT 3555 G1PPRL_BYTE_H_FORMAT 3556 G1PPRL_DOUBLE_H_FORMAT 3557 G1PPRL_BYTE_H_FORMAT 3558 G1PPRL_BYTE_H_FORMAT, 3559 "type", "address-range", 3560 "used", "prev-live", "next-live", "gc-eff", 3561 "remset", "code-roots"); 3562 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3563 G1PPRL_TYPE_H_FORMAT 3564 G1PPRL_ADDR_BASE_H_FORMAT 3565 G1PPRL_BYTE_H_FORMAT 3566 G1PPRL_BYTE_H_FORMAT 3567 G1PPRL_BYTE_H_FORMAT 3568 G1PPRL_DOUBLE_H_FORMAT 3569 G1PPRL_BYTE_H_FORMAT 3570 G1PPRL_BYTE_H_FORMAT, 3571 "", "", 3572 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3573 "(bytes)", "(bytes)"); 3574 } 3575 3576 // It takes as a parameter a reference to one of the _hum_* fields, it 3577 // deduces the corresponding value for a region in a humongous region 3578 // series (either the region size, or what's left if the _hum_* field 3579 // is < the region size), and updates the _hum_* field accordingly. 3580 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3581 size_t bytes = 0; 3582 // The > 0 check is to deal with the prev and next live bytes which 3583 // could be 0. 3584 if (*hum_bytes > 0) { 3585 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3586 *hum_bytes -= bytes; 3587 } 3588 return bytes; 3589 } 3590 3591 // It deduces the values for a region in a humongous region series 3592 // from the _hum_* fields and updates those accordingly. It assumes 3593 // that that _hum_* fields have already been set up from the "starts 3594 // humongous" region and we visit the regions in address order. 3595 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3596 size_t* capacity_bytes, 3597 size_t* prev_live_bytes, 3598 size_t* next_live_bytes) { 3599 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3600 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3601 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3602 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3603 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3604 } 3605 3606 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3607 const char* type = r->get_type_str(); 3608 HeapWord* bottom = r->bottom(); 3609 HeapWord* end = r->end(); 3610 size_t capacity_bytes = r->capacity(); 3611 size_t used_bytes = r->used(); 3612 size_t prev_live_bytes = r->live_bytes(); 3613 size_t next_live_bytes = r->next_live_bytes(); 3614 double gc_eff = r->gc_efficiency(); 3615 size_t remset_bytes = r->rem_set()->mem_size(); 3616 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3617 3618 if (r->is_starts_humongous()) { 3619 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3620 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3621 "they should have been zeroed after the last time we used them"); 3622 // Set up the _hum_* fields. 3623 _hum_capacity_bytes = capacity_bytes; 3624 _hum_used_bytes = used_bytes; 3625 _hum_prev_live_bytes = prev_live_bytes; 3626 _hum_next_live_bytes = next_live_bytes; 3627 get_hum_bytes(&used_bytes, &capacity_bytes, 3628 &prev_live_bytes, &next_live_bytes); 3629 end = bottom + HeapRegion::GrainWords; 3630 } else if (r->is_continues_humongous()) { 3631 get_hum_bytes(&used_bytes, &capacity_bytes, 3632 &prev_live_bytes, &next_live_bytes); 3633 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3634 } 3635 3636 _total_used_bytes += used_bytes; 3637 _total_capacity_bytes += capacity_bytes; 3638 _total_prev_live_bytes += prev_live_bytes; 3639 _total_next_live_bytes += next_live_bytes; 3640 _total_remset_bytes += remset_bytes; 3641 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3642 3643 // Print a line for this particular region. 3644 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3645 G1PPRL_TYPE_FORMAT 3646 G1PPRL_ADDR_BASE_FORMAT 3647 G1PPRL_BYTE_FORMAT 3648 G1PPRL_BYTE_FORMAT 3649 G1PPRL_BYTE_FORMAT 3650 G1PPRL_DOUBLE_FORMAT 3651 G1PPRL_BYTE_FORMAT 3652 G1PPRL_BYTE_FORMAT, 3653 type, p2i(bottom), p2i(end), 3654 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3655 remset_bytes, strong_code_roots_bytes); 3656 3657 return false; 3658 } 3659 3660 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3661 // add static memory usages to remembered set sizes 3662 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3663 // Print the footer of the output. 3664 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3665 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3666 " SUMMARY" 3667 G1PPRL_SUM_MB_FORMAT("capacity") 3668 G1PPRL_SUM_MB_PERC_FORMAT("used") 3669 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3670 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3671 G1PPRL_SUM_MB_FORMAT("remset") 3672 G1PPRL_SUM_MB_FORMAT("code-roots"), 3673 bytes_to_mb(_total_capacity_bytes), 3674 bytes_to_mb(_total_used_bytes), 3675 perc(_total_used_bytes, _total_capacity_bytes), 3676 bytes_to_mb(_total_prev_live_bytes), 3677 perc(_total_prev_live_bytes, _total_capacity_bytes), 3678 bytes_to_mb(_total_next_live_bytes), 3679 perc(_total_next_live_bytes, _total_capacity_bytes), 3680 bytes_to_mb(_total_remset_bytes), 3681 bytes_to_mb(_total_strong_code_roots_bytes)); 3682 }