1 /* 2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorPolicy.hpp" 32 #include "gc/g1/g1CollectorState.hpp" 33 #include "gc/g1/g1ConcurrentMark.inline.hpp" 34 #include "gc/g1/g1HeapVerifier.hpp" 35 #include "gc/g1/g1OopClosures.inline.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionRemSet.hpp" 39 #include "gc/g1/heapRegionSet.inline.hpp" 40 #include "gc/g1/suspendibleThreadSet.hpp" 41 #include "gc/shared/gcId.hpp" 42 #include "gc/shared/gcTimer.hpp" 43 #include "gc/shared/gcTrace.hpp" 44 #include "gc/shared/gcTraceTime.inline.hpp" 45 #include "gc/shared/genOopClosures.inline.hpp" 46 #include "gc/shared/referencePolicy.hpp" 47 #include "gc/shared/strongRootsScope.hpp" 48 #include "gc/shared/taskqueue.inline.hpp" 49 #include "gc/shared/vmGCOperations.hpp" 50 #include "logging/log.hpp" 51 #include "memory/allocation.hpp" 52 #include "memory/resourceArea.hpp" 53 #include "oops/oop.inline.hpp" 54 #include "runtime/atomic.inline.hpp" 55 #include "runtime/handles.inline.hpp" 56 #include "runtime/java.hpp" 57 #include "runtime/prefetch.inline.hpp" 58 #include "services/memTracker.hpp" 59 60 // Concurrent marking bit map wrapper 61 62 G1CMBitMapRO::G1CMBitMapRO(int shifter) : 63 _bm(), 64 _shifter(shifter) { 65 _bmStartWord = 0; 66 _bmWordSize = 0; 67 } 68 69 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 70 const HeapWord* limit) const { 71 // First we must round addr *up* to a possible object boundary. 72 addr = (HeapWord*)align_size_up((intptr_t)addr, 73 HeapWordSize << _shifter); 74 size_t addrOffset = heapWordToOffset(addr); 75 assert(limit != NULL, "limit must not be NULL"); 76 size_t limitOffset = heapWordToOffset(limit); 77 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 78 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 79 assert(nextAddr >= addr, "get_next_one postcondition"); 80 assert(nextAddr == limit || isMarked(nextAddr), 81 "get_next_one postcondition"); 82 return nextAddr; 83 } 84 85 #ifndef PRODUCT 86 bool G1CMBitMapRO::covers(MemRegion heap_rs) const { 87 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 88 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 89 "size inconsistency"); 90 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 91 _bmWordSize == heap_rs.word_size(); 92 } 93 #endif 94 95 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 96 _bm.print_on_error(st, prefix); 97 } 98 99 size_t G1CMBitMap::compute_size(size_t heap_size) { 100 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 101 } 102 103 size_t G1CMBitMap::mark_distance() { 104 return MinObjAlignmentInBytes * BitsPerByte; 105 } 106 107 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 108 _bmStartWord = heap.start(); 109 _bmWordSize = heap.word_size(); 110 111 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 112 _bm.set_size(_bmWordSize >> _shifter); 113 114 storage->set_mapping_changed_listener(&_listener); 115 } 116 117 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 118 if (zero_filled) { 119 return; 120 } 121 // We need to clear the bitmap on commit, removing any existing information. 122 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 123 _bm->clear_range(mr); 124 } 125 126 void G1CMBitMap::clear_range(MemRegion mr) { 127 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 128 assert(!mr.is_empty(), "unexpected empty region"); 129 // convert address range into offset range 130 _bm.at_put_range(heapWordToOffset(mr.start()), 131 heapWordToOffset(mr.end()), false); 132 } 133 134 G1CMMarkStack::G1CMMarkStack(G1ConcurrentMark* cm) : 135 _base(NULL), _cm(cm) 136 {} 137 138 bool G1CMMarkStack::allocate(size_t capacity) { 139 // allocate a stack of the requisite depth 140 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 141 if (!rs.is_reserved()) { 142 warning("ConcurrentMark MarkStack allocation failure"); 143 return false; 144 } 145 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 146 if (!_virtual_space.initialize(rs, rs.size())) { 147 warning("ConcurrentMark MarkStack backing store failure"); 148 // Release the virtual memory reserved for the marking stack 149 rs.release(); 150 return false; 151 } 152 assert(_virtual_space.committed_size() == rs.size(), 153 "Didn't reserve backing store for all of G1ConcurrentMark stack?"); 154 _base = (oop*) _virtual_space.low(); 155 setEmpty(); 156 _capacity = (jint) capacity; 157 _saved_index = -1; 158 _should_expand = false; 159 return true; 160 } 161 162 void G1CMMarkStack::expand() { 163 // Called, during remark, if we've overflown the marking stack during marking. 164 assert(isEmpty(), "stack should been emptied while handling overflow"); 165 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 166 // Clear expansion flag 167 _should_expand = false; 168 if (_capacity == (jint) MarkStackSizeMax) { 169 log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit"); 170 return; 171 } 172 // Double capacity if possible 173 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 174 // Do not give up existing stack until we have managed to 175 // get the double capacity that we desired. 176 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 177 sizeof(oop))); 178 if (rs.is_reserved()) { 179 // Release the backing store associated with old stack 180 _virtual_space.release(); 181 // Reinitialize virtual space for new stack 182 if (!_virtual_space.initialize(rs, rs.size())) { 183 fatal("Not enough swap for expanded marking stack capacity"); 184 } 185 _base = (oop*)(_virtual_space.low()); 186 _index = 0; 187 _capacity = new_capacity; 188 } else { 189 // Failed to double capacity, continue; 190 log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K", 191 _capacity / K, new_capacity / K); 192 } 193 } 194 195 void G1CMMarkStack::set_should_expand() { 196 // If we're resetting the marking state because of an 197 // marking stack overflow, record that we should, if 198 // possible, expand the stack. 199 _should_expand = _cm->has_overflown(); 200 } 201 202 G1CMMarkStack::~G1CMMarkStack() { 203 if (_base != NULL) { 204 _base = NULL; 205 _virtual_space.release(); 206 } 207 } 208 209 void G1CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 210 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 211 jint start = _index; 212 jint next_index = start + n; 213 if (next_index > _capacity) { 214 _overflow = true; 215 return; 216 } 217 // Otherwise. 218 _index = next_index; 219 for (int i = 0; i < n; i++) { 220 int ind = start + i; 221 assert(ind < _capacity, "By overflow test above."); 222 _base[ind] = ptr_arr[i]; 223 } 224 } 225 226 bool G1CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 227 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 228 jint index = _index; 229 if (index == 0) { 230 *n = 0; 231 return false; 232 } else { 233 int k = MIN2(max, index); 234 jint new_ind = index - k; 235 for (int j = 0; j < k; j++) { 236 ptr_arr[j] = _base[new_ind + j]; 237 } 238 _index = new_ind; 239 *n = k; 240 return true; 241 } 242 } 243 244 void G1CMMarkStack::note_start_of_gc() { 245 assert(_saved_index == -1, 246 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 247 _saved_index = _index; 248 } 249 250 void G1CMMarkStack::note_end_of_gc() { 251 // This is intentionally a guarantee, instead of an assert. If we 252 // accidentally add something to the mark stack during GC, it 253 // will be a correctness issue so it's better if we crash. we'll 254 // only check this once per GC anyway, so it won't be a performance 255 // issue in any way. 256 guarantee(_saved_index == _index, 257 "saved index: %d index: %d", _saved_index, _index); 258 _saved_index = -1; 259 } 260 261 G1CMRootRegions::G1CMRootRegions() : 262 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 263 _should_abort(false), _next_survivor(NULL) { } 264 265 void G1CMRootRegions::init(G1CollectedHeap* g1h, G1ConcurrentMark* cm) { 266 _young_list = g1h->young_list(); 267 _cm = cm; 268 } 269 270 void G1CMRootRegions::prepare_for_scan() { 271 assert(!scan_in_progress(), "pre-condition"); 272 273 // Currently, only survivors can be root regions. 274 assert(_next_survivor == NULL, "pre-condition"); 275 _next_survivor = _young_list->first_survivor_region(); 276 _scan_in_progress = (_next_survivor != NULL); 277 _should_abort = false; 278 } 279 280 HeapRegion* G1CMRootRegions::claim_next() { 281 if (_should_abort) { 282 // If someone has set the should_abort flag, we return NULL to 283 // force the caller to bail out of their loop. 284 return NULL; 285 } 286 287 // Currently, only survivors can be root regions. 288 HeapRegion* res = _next_survivor; 289 if (res != NULL) { 290 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 291 // Read it again in case it changed while we were waiting for the lock. 292 res = _next_survivor; 293 if (res != NULL) { 294 if (res == _young_list->last_survivor_region()) { 295 // We just claimed the last survivor so store NULL to indicate 296 // that we're done. 297 _next_survivor = NULL; 298 } else { 299 _next_survivor = res->get_next_young_region(); 300 } 301 } else { 302 // Someone else claimed the last survivor while we were trying 303 // to take the lock so nothing else to do. 304 } 305 } 306 assert(res == NULL || res->is_survivor(), "post-condition"); 307 308 return res; 309 } 310 311 void G1CMRootRegions::notify_scan_done() { 312 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 313 _scan_in_progress = false; 314 RootRegionScan_lock->notify_all(); 315 } 316 317 void G1CMRootRegions::cancel_scan() { 318 notify_scan_done(); 319 } 320 321 void G1CMRootRegions::scan_finished() { 322 assert(scan_in_progress(), "pre-condition"); 323 324 // Currently, only survivors can be root regions. 325 if (!_should_abort) { 326 assert(_next_survivor == NULL, "we should have claimed all survivors"); 327 } 328 _next_survivor = NULL; 329 330 notify_scan_done(); 331 } 332 333 bool G1CMRootRegions::wait_until_scan_finished() { 334 if (!scan_in_progress()) return false; 335 336 { 337 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 338 while (scan_in_progress()) { 339 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 340 } 341 } 342 return true; 343 } 344 345 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 346 return MAX2((n_par_threads + 2) / 4, 1U); 347 } 348 349 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 350 _g1h(g1h), 351 _markBitMap1(), 352 _markBitMap2(), 353 _parallel_marking_threads(0), 354 _max_parallel_marking_threads(0), 355 _sleep_factor(0.0), 356 _marking_task_overhead(1.0), 357 _cleanup_list("Cleanup List"), 358 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 359 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 360 CardTableModRefBS::card_shift, 361 false /* in_resource_area*/), 362 363 _prevMarkBitMap(&_markBitMap1), 364 _nextMarkBitMap(&_markBitMap2), 365 366 _markStack(this), 367 // _finger set in set_non_marking_state 368 369 _max_worker_id(ParallelGCThreads), 370 // _active_tasks set in set_non_marking_state 371 // _tasks set inside the constructor 372 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 373 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 374 375 _has_overflown(false), 376 _concurrent(false), 377 _has_aborted(false), 378 _restart_for_overflow(false), 379 _concurrent_marking_in_progress(false), 380 _concurrent_phase_status(ConcPhaseNotStarted), 381 382 // _verbose_level set below 383 384 _init_times(), 385 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 386 _cleanup_times(), 387 _total_counting_time(0.0), 388 _total_rs_scrub_time(0.0), 389 390 _parallel_workers(NULL), 391 392 _count_card_bitmaps(NULL), 393 _count_marked_bytes(NULL), 394 _completed_initialization(false) { 395 396 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 397 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 398 399 // Create & start a ConcurrentMark thread. 400 _cmThread = new ConcurrentMarkThread(this); 401 assert(cmThread() != NULL, "CM Thread should have been created"); 402 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 403 if (_cmThread->osthread() == NULL) { 404 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 405 } 406 407 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 408 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 409 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 410 411 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 412 satb_qs.set_buffer_size(G1SATBBufferSize); 413 414 _root_regions.init(_g1h, this); 415 416 if (ConcGCThreads > ParallelGCThreads) { 417 warning("Can't have more ConcGCThreads (%u) " 418 "than ParallelGCThreads (%u).", 419 ConcGCThreads, ParallelGCThreads); 420 return; 421 } 422 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 423 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 424 // if both are set 425 _sleep_factor = 0.0; 426 _marking_task_overhead = 1.0; 427 } else if (G1MarkingOverheadPercent > 0) { 428 // We will calculate the number of parallel marking threads based 429 // on a target overhead with respect to the soft real-time goal 430 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 431 double overall_cm_overhead = 432 (double) MaxGCPauseMillis * marking_overhead / 433 (double) GCPauseIntervalMillis; 434 double cpu_ratio = 1.0 / (double) os::processor_count(); 435 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 436 double marking_task_overhead = 437 overall_cm_overhead / marking_thread_num * 438 (double) os::processor_count(); 439 double sleep_factor = 440 (1.0 - marking_task_overhead) / marking_task_overhead; 441 442 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 443 _sleep_factor = sleep_factor; 444 _marking_task_overhead = marking_task_overhead; 445 } else { 446 // Calculate the number of parallel marking threads by scaling 447 // the number of parallel GC threads. 448 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 449 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 450 _sleep_factor = 0.0; 451 _marking_task_overhead = 1.0; 452 } 453 454 assert(ConcGCThreads > 0, "Should have been set"); 455 _parallel_marking_threads = ConcGCThreads; 456 _max_parallel_marking_threads = _parallel_marking_threads; 457 458 _parallel_workers = new WorkGang("G1 Marker", 459 _max_parallel_marking_threads, false, true); 460 if (_parallel_workers == NULL) { 461 vm_exit_during_initialization("Failed necessary allocation."); 462 } else { 463 _parallel_workers->initialize_workers(); 464 } 465 466 if (FLAG_IS_DEFAULT(MarkStackSize)) { 467 size_t mark_stack_size = 468 MIN2(MarkStackSizeMax, 469 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 470 // Verify that the calculated value for MarkStackSize is in range. 471 // It would be nice to use the private utility routine from Arguments. 472 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 473 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 474 "must be between 1 and " SIZE_FORMAT, 475 mark_stack_size, MarkStackSizeMax); 476 return; 477 } 478 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 479 } else { 480 // Verify MarkStackSize is in range. 481 if (FLAG_IS_CMDLINE(MarkStackSize)) { 482 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 483 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 484 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 485 "must be between 1 and " SIZE_FORMAT, 486 MarkStackSize, MarkStackSizeMax); 487 return; 488 } 489 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 490 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 491 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 492 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 493 MarkStackSize, MarkStackSizeMax); 494 return; 495 } 496 } 497 } 498 } 499 500 if (!_markStack.allocate(MarkStackSize)) { 501 warning("Failed to allocate CM marking stack"); 502 return; 503 } 504 505 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 506 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 507 508 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 509 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 510 511 BitMap::idx_t card_bm_size = _card_bm.size(); 512 513 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 514 _active_tasks = _max_worker_id; 515 516 uint max_regions = _g1h->max_regions(); 517 for (uint i = 0; i < _max_worker_id; ++i) { 518 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 519 task_queue->initialize(); 520 _task_queues->register_queue(i, task_queue); 521 522 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 523 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 524 525 _tasks[i] = new G1CMTask(i, this, 526 _count_marked_bytes[i], 527 &_count_card_bitmaps[i], 528 task_queue, _task_queues); 529 530 _accum_task_vtime[i] = 0.0; 531 } 532 533 // Calculate the card number for the bottom of the heap. Used 534 // in biasing indexes into the accounting card bitmaps. 535 _heap_bottom_card_num = 536 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 537 CardTableModRefBS::card_shift); 538 539 // Clear all the liveness counting data 540 clear_all_count_data(); 541 542 // so that the call below can read a sensible value 543 _heap_start = g1h->reserved_region().start(); 544 set_non_marking_state(); 545 _completed_initialization = true; 546 } 547 548 void G1ConcurrentMark::reset() { 549 // Starting values for these two. This should be called in a STW 550 // phase. 551 MemRegion reserved = _g1h->g1_reserved(); 552 _heap_start = reserved.start(); 553 _heap_end = reserved.end(); 554 555 // Separated the asserts so that we know which one fires. 556 assert(_heap_start != NULL, "heap bounds should look ok"); 557 assert(_heap_end != NULL, "heap bounds should look ok"); 558 assert(_heap_start < _heap_end, "heap bounds should look ok"); 559 560 // Reset all the marking data structures and any necessary flags 561 reset_marking_state(); 562 563 // We do reset all of them, since different phases will use 564 // different number of active threads. So, it's easiest to have all 565 // of them ready. 566 for (uint i = 0; i < _max_worker_id; ++i) { 567 _tasks[i]->reset(_nextMarkBitMap); 568 } 569 570 // we need this to make sure that the flag is on during the evac 571 // pause with initial mark piggy-backed 572 set_concurrent_marking_in_progress(); 573 } 574 575 576 void G1ConcurrentMark::reset_marking_state(bool clear_overflow) { 577 _markStack.set_should_expand(); 578 _markStack.setEmpty(); // Also clears the _markStack overflow flag 579 if (clear_overflow) { 580 clear_has_overflown(); 581 } else { 582 assert(has_overflown(), "pre-condition"); 583 } 584 _finger = _heap_start; 585 586 for (uint i = 0; i < _max_worker_id; ++i) { 587 G1CMTaskQueue* queue = _task_queues->queue(i); 588 queue->set_empty(); 589 } 590 } 591 592 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 593 assert(active_tasks <= _max_worker_id, "we should not have more"); 594 595 _active_tasks = active_tasks; 596 // Need to update the three data structures below according to the 597 // number of active threads for this phase. 598 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 599 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 600 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 601 } 602 603 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 604 set_concurrency(active_tasks); 605 606 _concurrent = concurrent; 607 // We propagate this to all tasks, not just the active ones. 608 for (uint i = 0; i < _max_worker_id; ++i) 609 _tasks[i]->set_concurrent(concurrent); 610 611 if (concurrent) { 612 set_concurrent_marking_in_progress(); 613 } else { 614 // We currently assume that the concurrent flag has been set to 615 // false before we start remark. At this point we should also be 616 // in a STW phase. 617 assert(!concurrent_marking_in_progress(), "invariant"); 618 assert(out_of_regions(), 619 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 620 p2i(_finger), p2i(_heap_end)); 621 } 622 } 623 624 void G1ConcurrentMark::set_non_marking_state() { 625 // We set the global marking state to some default values when we're 626 // not doing marking. 627 reset_marking_state(); 628 _active_tasks = 0; 629 clear_concurrent_marking_in_progress(); 630 } 631 632 G1ConcurrentMark::~G1ConcurrentMark() { 633 // The G1ConcurrentMark instance is never freed. 634 ShouldNotReachHere(); 635 } 636 637 class G1ClearBitMapTask : public AbstractGangTask { 638 // Heap region closure used for clearing the given mark bitmap. 639 class G1ClearBitmapHRClosure : public HeapRegionClosure { 640 private: 641 G1CMBitMap* _bitmap; 642 G1ConcurrentMark* _cm; 643 public: 644 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 645 } 646 647 virtual bool doHeapRegion(HeapRegion* r) { 648 size_t const chunk_size_in_words = M / HeapWordSize; 649 650 HeapWord* cur = r->bottom(); 651 HeapWord* const end = r->end(); 652 653 while (cur < end) { 654 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 655 _bitmap->clear_range(mr); 656 657 cur += chunk_size_in_words; 658 659 // Abort iteration if after yielding the marking has been aborted. 660 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 661 return true; 662 } 663 // Repeat the asserts from before the start of the closure. We will do them 664 // as asserts here to minimize their overhead on the product. However, we 665 // will have them as guarantees at the beginning / end of the bitmap 666 // clearing to get some checking in the product. 667 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 668 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 669 } 670 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 671 672 return false; 673 } 674 }; 675 676 G1ClearBitmapHRClosure _cl; 677 HeapRegionClaimer _hr_claimer; 678 bool _suspendible; // If the task is suspendible, workers must join the STS. 679 680 public: 681 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 682 AbstractGangTask("Parallel Clear Bitmap Task"), 683 _cl(bitmap, suspendible ? cm : NULL), 684 _hr_claimer(n_workers), 685 _suspendible(suspendible) 686 { } 687 688 void work(uint worker_id) { 689 SuspendibleThreadSetJoiner sts_join(_suspendible); 690 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true); 691 } 692 693 bool is_complete() { 694 return _cl.complete(); 695 } 696 }; 697 698 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 699 G1ClearBitMapTask task(bitmap, this, workers->active_workers(), may_yield); 700 workers->run_task(&task); 701 guarantee(!may_yield || task.is_complete(), "Must have completed iteration when not yielding."); 702 } 703 704 void G1ConcurrentMark::cleanup_for_next_mark() { 705 // Make sure that the concurrent mark thread looks to still be in 706 // the current cycle. 707 guarantee(cmThread()->during_cycle(), "invariant"); 708 709 // We are finishing up the current cycle by clearing the next 710 // marking bitmap and getting it ready for the next cycle. During 711 // this time no other cycle can start. So, let's make sure that this 712 // is the case. 713 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 714 715 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 716 717 // Clear the liveness counting data. If the marking has been aborted, the abort() 718 // call already did that. 719 if (!has_aborted()) { 720 clear_all_count_data(); 721 } 722 723 // Repeat the asserts from above. 724 guarantee(cmThread()->during_cycle(), "invariant"); 725 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 726 } 727 728 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 729 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 730 clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false); 731 } 732 733 class CheckBitmapClearHRClosure : public HeapRegionClosure { 734 G1CMBitMap* _bitmap; 735 bool _error; 736 public: 737 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 738 } 739 740 virtual bool doHeapRegion(HeapRegion* r) { 741 // This closure can be called concurrently to the mutator, so we must make sure 742 // that the result of the getNextMarkedWordAddress() call is compared to the 743 // value passed to it as limit to detect any found bits. 744 // end never changes in G1. 745 HeapWord* end = r->end(); 746 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 747 } 748 }; 749 750 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 751 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 752 _g1h->heap_region_iterate(&cl); 753 return cl.complete(); 754 } 755 756 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 757 public: 758 bool doHeapRegion(HeapRegion* r) { 759 r->note_start_of_marking(); 760 return false; 761 } 762 }; 763 764 void G1ConcurrentMark::checkpointRootsInitialPre() { 765 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 766 G1CollectorPolicy* g1p = g1h->g1_policy(); 767 768 _has_aborted = false; 769 770 // Initialize marking structures. This has to be done in a STW phase. 771 reset(); 772 773 // For each region note start of marking. 774 NoteStartOfMarkHRClosure startcl; 775 g1h->heap_region_iterate(&startcl); 776 } 777 778 779 void G1ConcurrentMark::checkpointRootsInitialPost() { 780 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 781 782 // Start Concurrent Marking weak-reference discovery. 783 ReferenceProcessor* rp = g1h->ref_processor_cm(); 784 // enable ("weak") refs discovery 785 rp->enable_discovery(); 786 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 787 788 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 789 // This is the start of the marking cycle, we're expected all 790 // threads to have SATB queues with active set to false. 791 satb_mq_set.set_active_all_threads(true, /* new active value */ 792 false /* expected_active */); 793 794 _root_regions.prepare_for_scan(); 795 796 // update_g1_committed() will be called at the end of an evac pause 797 // when marking is on. So, it's also called at the end of the 798 // initial-mark pause to update the heap end, if the heap expands 799 // during it. No need to call it here. 800 } 801 802 /* 803 * Notice that in the next two methods, we actually leave the STS 804 * during the barrier sync and join it immediately afterwards. If we 805 * do not do this, the following deadlock can occur: one thread could 806 * be in the barrier sync code, waiting for the other thread to also 807 * sync up, whereas another one could be trying to yield, while also 808 * waiting for the other threads to sync up too. 809 * 810 * Note, however, that this code is also used during remark and in 811 * this case we should not attempt to leave / enter the STS, otherwise 812 * we'll either hit an assert (debug / fastdebug) or deadlock 813 * (product). So we should only leave / enter the STS if we are 814 * operating concurrently. 815 * 816 * Because the thread that does the sync barrier has left the STS, it 817 * is possible to be suspended for a Full GC or an evacuation pause 818 * could occur. This is actually safe, since the entering the sync 819 * barrier is one of the last things do_marking_step() does, and it 820 * doesn't manipulate any data structures afterwards. 821 */ 822 823 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 824 bool barrier_aborted; 825 { 826 SuspendibleThreadSetLeaver sts_leave(concurrent()); 827 barrier_aborted = !_first_overflow_barrier_sync.enter(); 828 } 829 830 // at this point everyone should have synced up and not be doing any 831 // more work 832 833 if (barrier_aborted) { 834 // If the barrier aborted we ignore the overflow condition and 835 // just abort the whole marking phase as quickly as possible. 836 return; 837 } 838 839 // If we're executing the concurrent phase of marking, reset the marking 840 // state; otherwise the marking state is reset after reference processing, 841 // during the remark pause. 842 // If we reset here as a result of an overflow during the remark we will 843 // see assertion failures from any subsequent set_concurrency_and_phase() 844 // calls. 845 if (concurrent()) { 846 // let the task associated with with worker 0 do this 847 if (worker_id == 0) { 848 // task 0 is responsible for clearing the global data structures 849 // We should be here because of an overflow. During STW we should 850 // not clear the overflow flag since we rely on it being true when 851 // we exit this method to abort the pause and restart concurrent 852 // marking. 853 reset_marking_state(true /* clear_overflow */); 854 855 log_info(gc)("Concurrent Mark reset for overflow"); 856 } 857 } 858 859 // after this, each task should reset its own data structures then 860 // then go into the second barrier 861 } 862 863 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 864 SuspendibleThreadSetLeaver sts_leave(concurrent()); 865 _second_overflow_barrier_sync.enter(); 866 867 // at this point everything should be re-initialized and ready to go 868 } 869 870 class G1CMConcurrentMarkingTask: public AbstractGangTask { 871 private: 872 G1ConcurrentMark* _cm; 873 ConcurrentMarkThread* _cmt; 874 875 public: 876 void work(uint worker_id) { 877 assert(Thread::current()->is_ConcurrentGC_thread(), 878 "this should only be done by a conc GC thread"); 879 ResourceMark rm; 880 881 double start_vtime = os::elapsedVTime(); 882 883 { 884 SuspendibleThreadSetJoiner sts_join; 885 886 assert(worker_id < _cm->active_tasks(), "invariant"); 887 G1CMTask* the_task = _cm->task(worker_id); 888 the_task->record_start_time(); 889 if (!_cm->has_aborted()) { 890 do { 891 double start_vtime_sec = os::elapsedVTime(); 892 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 893 894 the_task->do_marking_step(mark_step_duration_ms, 895 true /* do_termination */, 896 false /* is_serial*/); 897 898 double end_vtime_sec = os::elapsedVTime(); 899 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 900 _cm->clear_has_overflown(); 901 902 _cm->do_yield_check(worker_id); 903 904 jlong sleep_time_ms; 905 if (!_cm->has_aborted() && the_task->has_aborted()) { 906 sleep_time_ms = 907 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 908 { 909 SuspendibleThreadSetLeaver sts_leave; 910 os::sleep(Thread::current(), sleep_time_ms, false); 911 } 912 } 913 } while (!_cm->has_aborted() && the_task->has_aborted()); 914 } 915 the_task->record_end_time(); 916 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 917 } 918 919 double end_vtime = os::elapsedVTime(); 920 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 921 } 922 923 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 924 ConcurrentMarkThread* cmt) : 925 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 926 927 ~G1CMConcurrentMarkingTask() { } 928 }; 929 930 // Calculates the number of active workers for a concurrent 931 // phase. 932 uint G1ConcurrentMark::calc_parallel_marking_threads() { 933 uint n_conc_workers = 0; 934 if (!UseDynamicNumberOfGCThreads || 935 (!FLAG_IS_DEFAULT(ConcGCThreads) && 936 !ForceDynamicNumberOfGCThreads)) { 937 n_conc_workers = max_parallel_marking_threads(); 938 } else { 939 n_conc_workers = 940 AdaptiveSizePolicy::calc_default_active_workers( 941 max_parallel_marking_threads(), 942 1, /* Minimum workers */ 943 parallel_marking_threads(), 944 Threads::number_of_non_daemon_threads()); 945 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 946 // that scaling has already gone into "_max_parallel_marking_threads". 947 } 948 assert(n_conc_workers > 0, "Always need at least 1"); 949 return n_conc_workers; 950 } 951 952 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 953 // Currently, only survivors can be root regions. 954 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 955 G1RootRegionScanClosure cl(_g1h, this, worker_id); 956 957 const uintx interval = PrefetchScanIntervalInBytes; 958 HeapWord* curr = hr->bottom(); 959 const HeapWord* end = hr->top(); 960 while (curr < end) { 961 Prefetch::read(curr, interval); 962 oop obj = oop(curr); 963 int size = obj->oop_iterate_size(&cl); 964 assert(size == obj->size(), "sanity"); 965 curr += size; 966 } 967 } 968 969 class G1CMRootRegionScanTask : public AbstractGangTask { 970 private: 971 G1ConcurrentMark* _cm; 972 973 public: 974 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 975 AbstractGangTask("Root Region Scan"), _cm(cm) { } 976 977 void work(uint worker_id) { 978 assert(Thread::current()->is_ConcurrentGC_thread(), 979 "this should only be done by a conc GC thread"); 980 981 G1CMRootRegions* root_regions = _cm->root_regions(); 982 HeapRegion* hr = root_regions->claim_next(); 983 while (hr != NULL) { 984 _cm->scanRootRegion(hr, worker_id); 985 hr = root_regions->claim_next(); 986 } 987 } 988 }; 989 990 void G1ConcurrentMark::scanRootRegions() { 991 // scan_in_progress() will have been set to true only if there was 992 // at least one root region to scan. So, if it's false, we 993 // should not attempt to do any further work. 994 if (root_regions()->scan_in_progress()) { 995 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 996 GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan"); 997 998 _parallel_marking_threads = calc_parallel_marking_threads(); 999 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1000 "Maximum number of marking threads exceeded"); 1001 uint active_workers = MAX2(1U, parallel_marking_threads()); 1002 1003 G1CMRootRegionScanTask task(this); 1004 _parallel_workers->set_active_workers(active_workers); 1005 _parallel_workers->run_task(&task); 1006 1007 // It's possible that has_aborted() is true here without actually 1008 // aborting the survivor scan earlier. This is OK as it's 1009 // mainly used for sanity checking. 1010 root_regions()->scan_finished(); 1011 } 1012 } 1013 1014 void G1ConcurrentMark::register_concurrent_phase_start(const char* title) { 1015 uint old_val = 0; 1016 do { 1017 old_val = Atomic::cmpxchg(ConcPhaseStarted, &_concurrent_phase_status, ConcPhaseNotStarted); 1018 } while (old_val != ConcPhaseNotStarted); 1019 _g1h->gc_timer_cm()->register_gc_concurrent_start(title); 1020 } 1021 1022 void G1ConcurrentMark::register_concurrent_phase_end_common(bool end_timer) { 1023 if (_concurrent_phase_status == ConcPhaseNotStarted) { 1024 return; 1025 } 1026 1027 uint old_val = Atomic::cmpxchg(ConcPhaseStopping, &_concurrent_phase_status, ConcPhaseStarted); 1028 if (old_val == ConcPhaseStarted) { 1029 _g1h->gc_timer_cm()->register_gc_concurrent_end(); 1030 // If 'end_timer' is true, we came here to end timer which needs concurrent phase ended. 1031 // We need to end it before changing the status to 'ConcPhaseNotStarted' to prevent 1032 // starting a new concurrent phase by 'ConcurrentMarkThread'. 1033 if (end_timer) { 1034 _g1h->gc_timer_cm()->register_gc_end(); 1035 } 1036 old_val = Atomic::cmpxchg(ConcPhaseNotStarted, &_concurrent_phase_status, ConcPhaseStopping); 1037 assert(old_val == ConcPhaseStopping, "Should not have changed since we entered this scope."); 1038 } else { 1039 do { 1040 // Let other thread finish changing '_concurrent_phase_status' to 'ConcPhaseNotStarted'. 1041 os::naked_short_sleep(1); 1042 } while (_concurrent_phase_status != ConcPhaseNotStarted); 1043 } 1044 } 1045 1046 void G1ConcurrentMark::register_concurrent_phase_end() { 1047 register_concurrent_phase_end_common(false); 1048 } 1049 1050 void G1ConcurrentMark::register_concurrent_gc_end_and_stop_timer() { 1051 register_concurrent_phase_end_common(true); 1052 } 1053 1054 void G1ConcurrentMark::markFromRoots() { 1055 // we might be tempted to assert that: 1056 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1057 // "inconsistent argument?"); 1058 // However that wouldn't be right, because it's possible that 1059 // a safepoint is indeed in progress as a younger generation 1060 // stop-the-world GC happens even as we mark in this generation. 1061 1062 _restart_for_overflow = false; 1063 1064 // _g1h has _n_par_threads 1065 _parallel_marking_threads = calc_parallel_marking_threads(); 1066 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1067 "Maximum number of marking threads exceeded"); 1068 1069 uint active_workers = MAX2(1U, parallel_marking_threads()); 1070 assert(active_workers > 0, "Should have been set"); 1071 1072 // Parallel task terminator is set in "set_concurrency_and_phase()" 1073 set_concurrency_and_phase(active_workers, true /* concurrent */); 1074 1075 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1076 _parallel_workers->set_active_workers(active_workers); 1077 _parallel_workers->run_task(&markingTask); 1078 print_stats(); 1079 } 1080 1081 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1082 // world is stopped at this checkpoint 1083 assert(SafepointSynchronize::is_at_safepoint(), 1084 "world should be stopped"); 1085 1086 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1087 1088 // If a full collection has happened, we shouldn't do this. 1089 if (has_aborted()) { 1090 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1091 return; 1092 } 1093 1094 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1095 1096 if (VerifyDuringGC) { 1097 HandleMark hm; // handle scope 1098 g1h->prepare_for_verify(); 1099 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1100 } 1101 g1h->verifier()->check_bitmaps("Remark Start"); 1102 1103 G1CollectorPolicy* g1p = g1h->g1_policy(); 1104 g1p->record_concurrent_mark_remark_start(); 1105 1106 double start = os::elapsedTime(); 1107 1108 checkpointRootsFinalWork(); 1109 1110 double mark_work_end = os::elapsedTime(); 1111 1112 weakRefsWork(clear_all_soft_refs); 1113 1114 if (has_overflown()) { 1115 // Oops. We overflowed. Restart concurrent marking. 1116 _restart_for_overflow = true; 1117 log_develop_trace(gc)("Remark led to restart for overflow."); 1118 1119 // Verify the heap w.r.t. the previous marking bitmap. 1120 if (VerifyDuringGC) { 1121 HandleMark hm; // handle scope 1122 g1h->prepare_for_verify(); 1123 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1124 } 1125 1126 // Clear the marking state because we will be restarting 1127 // marking due to overflowing the global mark stack. 1128 reset_marking_state(); 1129 } else { 1130 { 1131 GCTraceTime(Debug, gc) trace("Aggregate Data", g1h->gc_timer_cm()); 1132 1133 // Aggregate the per-task counting data that we have accumulated 1134 // while marking. 1135 aggregate_count_data(); 1136 } 1137 1138 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1139 // We're done with marking. 1140 // This is the end of the marking cycle, we're expected all 1141 // threads to have SATB queues with active set to true. 1142 satb_mq_set.set_active_all_threads(false, /* new active value */ 1143 true /* expected_active */); 1144 1145 if (VerifyDuringGC) { 1146 HandleMark hm; // handle scope 1147 g1h->prepare_for_verify(); 1148 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1149 } 1150 g1h->verifier()->check_bitmaps("Remark End"); 1151 assert(!restart_for_overflow(), "sanity"); 1152 // Completely reset the marking state since marking completed 1153 set_non_marking_state(); 1154 } 1155 1156 // Expand the marking stack, if we have to and if we can. 1157 if (_markStack.should_expand()) { 1158 _markStack.expand(); 1159 } 1160 1161 // Statistics 1162 double now = os::elapsedTime(); 1163 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1164 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1165 _remark_times.add((now - start) * 1000.0); 1166 1167 g1p->record_concurrent_mark_remark_end(); 1168 1169 G1CMIsAliveClosure is_alive(g1h); 1170 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1171 } 1172 1173 // Base class of the closures that finalize and verify the 1174 // liveness counting data. 1175 class G1CMCountDataClosureBase: public HeapRegionClosure { 1176 protected: 1177 G1CollectedHeap* _g1h; 1178 G1ConcurrentMark* _cm; 1179 CardTableModRefBS* _ct_bs; 1180 1181 BitMap* _region_bm; 1182 BitMap* _card_bm; 1183 1184 // Takes a region that's not empty (i.e., it has at least one 1185 // live object in it and sets its corresponding bit on the region 1186 // bitmap to 1. 1187 void set_bit_for_region(HeapRegion* hr) { 1188 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1189 _region_bm->par_at_put(index, true); 1190 } 1191 1192 public: 1193 G1CMCountDataClosureBase(G1CollectedHeap* g1h, 1194 BitMap* region_bm, BitMap* card_bm): 1195 _g1h(g1h), _cm(g1h->concurrent_mark()), 1196 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1197 _region_bm(region_bm), _card_bm(card_bm) { } 1198 }; 1199 1200 // Closure that calculates the # live objects per region. Used 1201 // for verification purposes during the cleanup pause. 1202 class CalcLiveObjectsClosure: public G1CMCountDataClosureBase { 1203 G1CMBitMapRO* _bm; 1204 size_t _region_marked_bytes; 1205 1206 public: 1207 CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h, 1208 BitMap* region_bm, BitMap* card_bm) : 1209 G1CMCountDataClosureBase(g1h, region_bm, card_bm), 1210 _bm(bm), _region_marked_bytes(0) { } 1211 1212 bool doHeapRegion(HeapRegion* hr) { 1213 HeapWord* ntams = hr->next_top_at_mark_start(); 1214 HeapWord* start = hr->bottom(); 1215 1216 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1217 "Preconditions not met - " 1218 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1219 p2i(start), p2i(ntams), p2i(hr->end())); 1220 1221 // Find the first marked object at or after "start". 1222 start = _bm->getNextMarkedWordAddress(start, ntams); 1223 1224 size_t marked_bytes = 0; 1225 1226 while (start < ntams) { 1227 oop obj = oop(start); 1228 int obj_sz = obj->size(); 1229 HeapWord* obj_end = start + obj_sz; 1230 1231 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1232 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1233 1234 // Note: if we're looking at the last region in heap - obj_end 1235 // could be actually just beyond the end of the heap; end_idx 1236 // will then correspond to a (non-existent) card that is also 1237 // just beyond the heap. 1238 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1239 // end of object is not card aligned - increment to cover 1240 // all the cards spanned by the object 1241 end_idx += 1; 1242 } 1243 1244 // Set the bits in the card BM for the cards spanned by this object. 1245 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1246 1247 // Add the size of this object to the number of marked bytes. 1248 marked_bytes += (size_t)obj_sz * HeapWordSize; 1249 1250 // This will happen if we are handling a humongous object that spans 1251 // several heap regions. 1252 if (obj_end > hr->end()) { 1253 break; 1254 } 1255 // Find the next marked object after this one. 1256 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1257 } 1258 1259 // Mark the allocated-since-marking portion... 1260 HeapWord* top = hr->top(); 1261 if (ntams < top) { 1262 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1263 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1264 1265 // Note: if we're looking at the last region in heap - top 1266 // could be actually just beyond the end of the heap; end_idx 1267 // will then correspond to a (non-existent) card that is also 1268 // just beyond the heap. 1269 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1270 // end of object is not card aligned - increment to cover 1271 // all the cards spanned by the object 1272 end_idx += 1; 1273 } 1274 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1275 1276 // This definitely means the region has live objects. 1277 set_bit_for_region(hr); 1278 } 1279 1280 // Update the live region bitmap. 1281 if (marked_bytes > 0) { 1282 set_bit_for_region(hr); 1283 } 1284 1285 // Set the marked bytes for the current region so that 1286 // it can be queried by a calling verification routine 1287 _region_marked_bytes = marked_bytes; 1288 1289 return false; 1290 } 1291 1292 size_t region_marked_bytes() const { return _region_marked_bytes; } 1293 }; 1294 1295 // Heap region closure used for verifying the counting data 1296 // that was accumulated concurrently and aggregated during 1297 // the remark pause. This closure is applied to the heap 1298 // regions during the STW cleanup pause. 1299 1300 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1301 G1CollectedHeap* _g1h; 1302 G1ConcurrentMark* _cm; 1303 CalcLiveObjectsClosure _calc_cl; 1304 BitMap* _region_bm; // Region BM to be verified 1305 BitMap* _card_bm; // Card BM to be verified 1306 1307 BitMap* _exp_region_bm; // Expected Region BM values 1308 BitMap* _exp_card_bm; // Expected card BM values 1309 1310 int _failures; 1311 1312 public: 1313 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1314 BitMap* region_bm, 1315 BitMap* card_bm, 1316 BitMap* exp_region_bm, 1317 BitMap* exp_card_bm) : 1318 _g1h(g1h), _cm(g1h->concurrent_mark()), 1319 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1320 _region_bm(region_bm), _card_bm(card_bm), 1321 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1322 _failures(0) { } 1323 1324 int failures() const { return _failures; } 1325 1326 bool doHeapRegion(HeapRegion* hr) { 1327 int failures = 0; 1328 1329 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1330 // this region and set the corresponding bits in the expected region 1331 // and card bitmaps. 1332 bool res = _calc_cl.doHeapRegion(hr); 1333 assert(res == false, "should be continuing"); 1334 1335 // Verify the marked bytes for this region. 1336 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1337 size_t act_marked_bytes = hr->next_marked_bytes(); 1338 1339 if (exp_marked_bytes > act_marked_bytes) { 1340 if (hr->is_starts_humongous()) { 1341 // For start_humongous regions, the size of the whole object will be 1342 // in exp_marked_bytes. 1343 HeapRegion* region = hr; 1344 int num_regions; 1345 for (num_regions = 0; region != NULL; num_regions++) { 1346 region = _g1h->next_region_in_humongous(region); 1347 } 1348 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1349 failures += 1; 1350 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1351 failures += 1; 1352 } 1353 } else { 1354 // We're not OK if expected marked bytes > actual marked bytes. It means 1355 // we have missed accounting some objects during the actual marking. 1356 failures += 1; 1357 } 1358 } 1359 1360 // Verify the bit, for this region, in the actual and expected 1361 // (which was just calculated) region bit maps. 1362 // We're not OK if the bit in the calculated expected region 1363 // bitmap is set and the bit in the actual region bitmap is not. 1364 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1365 1366 bool expected = _exp_region_bm->at(index); 1367 bool actual = _region_bm->at(index); 1368 if (expected && !actual) { 1369 failures += 1; 1370 } 1371 1372 // Verify that the card bit maps for the cards spanned by the current 1373 // region match. We have an error if we have a set bit in the expected 1374 // bit map and the corresponding bit in the actual bitmap is not set. 1375 1376 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1377 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1378 1379 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1380 expected = _exp_card_bm->at(i); 1381 actual = _card_bm->at(i); 1382 1383 if (expected && !actual) { 1384 failures += 1; 1385 } 1386 } 1387 1388 _failures += failures; 1389 1390 // We could stop iteration over the heap when we 1391 // find the first violating region by returning true. 1392 return false; 1393 } 1394 }; 1395 1396 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1397 protected: 1398 G1CollectedHeap* _g1h; 1399 G1ConcurrentMark* _cm; 1400 BitMap* _actual_region_bm; 1401 BitMap* _actual_card_bm; 1402 1403 uint _n_workers; 1404 1405 BitMap* _expected_region_bm; 1406 BitMap* _expected_card_bm; 1407 1408 int _failures; 1409 1410 HeapRegionClaimer _hrclaimer; 1411 1412 public: 1413 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1414 BitMap* region_bm, BitMap* card_bm, 1415 BitMap* expected_region_bm, BitMap* expected_card_bm) 1416 : AbstractGangTask("G1 verify final counting"), 1417 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1418 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1419 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1420 _failures(0), 1421 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1422 assert(VerifyDuringGC, "don't call this otherwise"); 1423 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1424 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1425 } 1426 1427 void work(uint worker_id) { 1428 assert(worker_id < _n_workers, "invariant"); 1429 1430 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1431 _actual_region_bm, _actual_card_bm, 1432 _expected_region_bm, 1433 _expected_card_bm); 1434 1435 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1436 1437 Atomic::add(verify_cl.failures(), &_failures); 1438 } 1439 1440 int failures() const { return _failures; } 1441 }; 1442 1443 // Closure that finalizes the liveness counting data. 1444 // Used during the cleanup pause. 1445 // Sets the bits corresponding to the interval [NTAMS, top] 1446 // (which contains the implicitly live objects) in the 1447 // card liveness bitmap. Also sets the bit for each region, 1448 // containing live data, in the region liveness bitmap. 1449 1450 class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase { 1451 public: 1452 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1453 BitMap* region_bm, 1454 BitMap* card_bm) : 1455 G1CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1456 1457 bool doHeapRegion(HeapRegion* hr) { 1458 HeapWord* ntams = hr->next_top_at_mark_start(); 1459 HeapWord* top = hr->top(); 1460 1461 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1462 1463 // Mark the allocated-since-marking portion... 1464 if (ntams < top) { 1465 // This definitely means the region has live objects. 1466 set_bit_for_region(hr); 1467 1468 // Now set the bits in the card bitmap for [ntams, top) 1469 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1470 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1471 1472 // Note: if we're looking at the last region in heap - top 1473 // could be actually just beyond the end of the heap; end_idx 1474 // will then correspond to a (non-existent) card that is also 1475 // just beyond the heap. 1476 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1477 // end of object is not card aligned - increment to cover 1478 // all the cards spanned by the object 1479 end_idx += 1; 1480 } 1481 1482 assert(end_idx <= _card_bm->size(), 1483 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1484 end_idx, _card_bm->size()); 1485 assert(start_idx < _card_bm->size(), 1486 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1487 start_idx, _card_bm->size()); 1488 1489 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1490 } 1491 1492 // Set the bit for the region if it contains live data 1493 if (hr->next_marked_bytes() > 0) { 1494 set_bit_for_region(hr); 1495 } 1496 1497 return false; 1498 } 1499 }; 1500 1501 class G1ParFinalCountTask: public AbstractGangTask { 1502 protected: 1503 G1CollectedHeap* _g1h; 1504 G1ConcurrentMark* _cm; 1505 BitMap* _actual_region_bm; 1506 BitMap* _actual_card_bm; 1507 1508 uint _n_workers; 1509 HeapRegionClaimer _hrclaimer; 1510 1511 public: 1512 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1513 : AbstractGangTask("G1 final counting"), 1514 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1515 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1516 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1517 } 1518 1519 void work(uint worker_id) { 1520 assert(worker_id < _n_workers, "invariant"); 1521 1522 FinalCountDataUpdateClosure final_update_cl(_g1h, 1523 _actual_region_bm, 1524 _actual_card_bm); 1525 1526 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1527 } 1528 }; 1529 1530 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1531 G1CollectedHeap* _g1; 1532 size_t _freed_bytes; 1533 FreeRegionList* _local_cleanup_list; 1534 uint _old_regions_removed; 1535 uint _humongous_regions_removed; 1536 HRRSCleanupTask* _hrrs_cleanup_task; 1537 1538 public: 1539 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1540 FreeRegionList* local_cleanup_list, 1541 HRRSCleanupTask* hrrs_cleanup_task) : 1542 _g1(g1), 1543 _freed_bytes(0), 1544 _local_cleanup_list(local_cleanup_list), 1545 _old_regions_removed(0), 1546 _humongous_regions_removed(0), 1547 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1548 1549 size_t freed_bytes() { return _freed_bytes; } 1550 const uint old_regions_removed() { return _old_regions_removed; } 1551 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1552 1553 bool doHeapRegion(HeapRegion *hr) { 1554 if (hr->is_archive()) { 1555 return false; 1556 } 1557 // We use a claim value of zero here because all regions 1558 // were claimed with value 1 in the FinalCount task. 1559 _g1->reset_gc_time_stamps(hr); 1560 hr->note_end_of_marking(); 1561 1562 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1563 _freed_bytes += hr->used(); 1564 hr->set_containing_set(NULL); 1565 if (hr->is_humongous()) { 1566 _humongous_regions_removed++; 1567 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1568 } else { 1569 _old_regions_removed++; 1570 _g1->free_region(hr, _local_cleanup_list, true); 1571 } 1572 } else { 1573 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1574 } 1575 1576 return false; 1577 } 1578 }; 1579 1580 class G1ParNoteEndTask: public AbstractGangTask { 1581 friend class G1NoteEndOfConcMarkClosure; 1582 1583 protected: 1584 G1CollectedHeap* _g1h; 1585 FreeRegionList* _cleanup_list; 1586 HeapRegionClaimer _hrclaimer; 1587 1588 public: 1589 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1590 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1591 } 1592 1593 void work(uint worker_id) { 1594 FreeRegionList local_cleanup_list("Local Cleanup List"); 1595 HRRSCleanupTask hrrs_cleanup_task; 1596 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1597 &hrrs_cleanup_task); 1598 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1599 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1600 1601 // Now update the lists 1602 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1603 { 1604 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1605 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1606 1607 // If we iterate over the global cleanup list at the end of 1608 // cleanup to do this printing we will not guarantee to only 1609 // generate output for the newly-reclaimed regions (the list 1610 // might not be empty at the beginning of cleanup; we might 1611 // still be working on its previous contents). So we do the 1612 // printing here, before we append the new regions to the global 1613 // cleanup list. 1614 1615 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1616 if (hr_printer->is_active()) { 1617 FreeRegionListIterator iter(&local_cleanup_list); 1618 while (iter.more_available()) { 1619 HeapRegion* hr = iter.get_next(); 1620 hr_printer->cleanup(hr); 1621 } 1622 } 1623 1624 _cleanup_list->add_ordered(&local_cleanup_list); 1625 assert(local_cleanup_list.is_empty(), "post-condition"); 1626 1627 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1628 } 1629 } 1630 }; 1631 1632 void G1ConcurrentMark::cleanup() { 1633 // world is stopped at this checkpoint 1634 assert(SafepointSynchronize::is_at_safepoint(), 1635 "world should be stopped"); 1636 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1637 1638 // If a full collection has happened, we shouldn't do this. 1639 if (has_aborted()) { 1640 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1641 return; 1642 } 1643 1644 g1h->verifier()->verify_region_sets_optional(); 1645 1646 if (VerifyDuringGC) { 1647 HandleMark hm; // handle scope 1648 g1h->prepare_for_verify(); 1649 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1650 } 1651 g1h->verifier()->check_bitmaps("Cleanup Start"); 1652 1653 G1CollectorPolicy* g1p = g1h->g1_policy(); 1654 g1p->record_concurrent_mark_cleanup_start(); 1655 1656 double start = os::elapsedTime(); 1657 1658 HeapRegionRemSet::reset_for_cleanup_tasks(); 1659 1660 // Do counting once more with the world stopped for good measure. 1661 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1662 1663 g1h->workers()->run_task(&g1_par_count_task); 1664 1665 if (VerifyDuringGC) { 1666 // Verify that the counting data accumulated during marking matches 1667 // that calculated by walking the marking bitmap. 1668 1669 // Bitmaps to hold expected values 1670 BitMap expected_region_bm(_region_bm.size(), true); 1671 BitMap expected_card_bm(_card_bm.size(), true); 1672 1673 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1674 &_region_bm, 1675 &_card_bm, 1676 &expected_region_bm, 1677 &expected_card_bm); 1678 1679 g1h->workers()->run_task(&g1_par_verify_task); 1680 1681 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1682 } 1683 1684 size_t start_used_bytes = g1h->used(); 1685 g1h->collector_state()->set_mark_in_progress(false); 1686 1687 double count_end = os::elapsedTime(); 1688 double this_final_counting_time = (count_end - start); 1689 _total_counting_time += this_final_counting_time; 1690 1691 if (log_is_enabled(Trace, gc, liveness)) { 1692 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1693 _g1h->heap_region_iterate(&cl); 1694 } 1695 1696 // Install newly created mark bitMap as "prev". 1697 swapMarkBitMaps(); 1698 1699 g1h->reset_gc_time_stamp(); 1700 1701 uint n_workers = _g1h->workers()->active_workers(); 1702 1703 // Note end of marking in all heap regions. 1704 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1705 g1h->workers()->run_task(&g1_par_note_end_task); 1706 g1h->check_gc_time_stamps(); 1707 1708 if (!cleanup_list_is_empty()) { 1709 // The cleanup list is not empty, so we'll have to process it 1710 // concurrently. Notify anyone else that might be wanting free 1711 // regions that there will be more free regions coming soon. 1712 g1h->set_free_regions_coming(); 1713 } 1714 1715 // call below, since it affects the metric by which we sort the heap 1716 // regions. 1717 if (G1ScrubRemSets) { 1718 double rs_scrub_start = os::elapsedTime(); 1719 g1h->scrub_rem_set(&_region_bm, &_card_bm); 1720 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1721 } 1722 1723 // this will also free any regions totally full of garbage objects, 1724 // and sort the regions. 1725 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1726 1727 // Statistics. 1728 double end = os::elapsedTime(); 1729 _cleanup_times.add((end - start) * 1000.0); 1730 1731 // Clean up will have freed any regions completely full of garbage. 1732 // Update the soft reference policy with the new heap occupancy. 1733 Universe::update_heap_info_at_gc(); 1734 1735 if (VerifyDuringGC) { 1736 HandleMark hm; // handle scope 1737 g1h->prepare_for_verify(); 1738 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1739 } 1740 1741 g1h->verifier()->check_bitmaps("Cleanup End"); 1742 1743 g1h->verifier()->verify_region_sets_optional(); 1744 1745 // We need to make this be a "collection" so any collection pause that 1746 // races with it goes around and waits for completeCleanup to finish. 1747 g1h->increment_total_collections(); 1748 1749 // Clean out dead classes and update Metaspace sizes. 1750 if (ClassUnloadingWithConcurrentMark) { 1751 ClassLoaderDataGraph::purge(); 1752 } 1753 MetaspaceGC::compute_new_size(); 1754 1755 // We reclaimed old regions so we should calculate the sizes to make 1756 // sure we update the old gen/space data. 1757 g1h->g1mm()->update_sizes(); 1758 g1h->allocation_context_stats().update_after_mark(); 1759 1760 g1h->trace_heap_after_concurrent_cycle(); 1761 } 1762 1763 void G1ConcurrentMark::completeCleanup() { 1764 if (has_aborted()) return; 1765 1766 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1767 1768 _cleanup_list.verify_optional(); 1769 FreeRegionList tmp_free_list("Tmp Free List"); 1770 1771 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1772 "cleanup list has %u entries", 1773 _cleanup_list.length()); 1774 1775 // No one else should be accessing the _cleanup_list at this point, 1776 // so it is not necessary to take any locks 1777 while (!_cleanup_list.is_empty()) { 1778 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1779 assert(hr != NULL, "Got NULL from a non-empty list"); 1780 hr->par_clear(); 1781 tmp_free_list.add_ordered(hr); 1782 1783 // Instead of adding one region at a time to the secondary_free_list, 1784 // we accumulate them in the local list and move them a few at a 1785 // time. This also cuts down on the number of notify_all() calls 1786 // we do during this process. We'll also append the local list when 1787 // _cleanup_list is empty (which means we just removed the last 1788 // region from the _cleanup_list). 1789 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1790 _cleanup_list.is_empty()) { 1791 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1792 "appending %u entries to the secondary_free_list, " 1793 "cleanup list still has %u entries", 1794 tmp_free_list.length(), 1795 _cleanup_list.length()); 1796 1797 { 1798 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1799 g1h->secondary_free_list_add(&tmp_free_list); 1800 SecondaryFreeList_lock->notify_all(); 1801 } 1802 #ifndef PRODUCT 1803 if (G1StressConcRegionFreeing) { 1804 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1805 os::sleep(Thread::current(), (jlong) 1, false); 1806 } 1807 } 1808 #endif 1809 } 1810 } 1811 assert(tmp_free_list.is_empty(), "post-condition"); 1812 } 1813 1814 // Supporting Object and Oop closures for reference discovery 1815 // and processing in during marking 1816 1817 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1818 HeapWord* addr = (HeapWord*)obj; 1819 return addr != NULL && 1820 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1821 } 1822 1823 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1824 // Uses the G1CMTask associated with a worker thread (for serial reference 1825 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1826 // trace referent objects. 1827 // 1828 // Using the G1CMTask and embedded local queues avoids having the worker 1829 // threads operating on the global mark stack. This reduces the risk 1830 // of overflowing the stack - which we would rather avoid at this late 1831 // state. Also using the tasks' local queues removes the potential 1832 // of the workers interfering with each other that could occur if 1833 // operating on the global stack. 1834 1835 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1836 G1ConcurrentMark* _cm; 1837 G1CMTask* _task; 1838 int _ref_counter_limit; 1839 int _ref_counter; 1840 bool _is_serial; 1841 public: 1842 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1843 _cm(cm), _task(task), _is_serial(is_serial), 1844 _ref_counter_limit(G1RefProcDrainInterval) { 1845 assert(_ref_counter_limit > 0, "sanity"); 1846 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1847 _ref_counter = _ref_counter_limit; 1848 } 1849 1850 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1851 virtual void do_oop( oop* p) { do_oop_work(p); } 1852 1853 template <class T> void do_oop_work(T* p) { 1854 if (!_cm->has_overflown()) { 1855 oop obj = oopDesc::load_decode_heap_oop(p); 1856 _task->deal_with_reference(obj); 1857 _ref_counter--; 1858 1859 if (_ref_counter == 0) { 1860 // We have dealt with _ref_counter_limit references, pushing them 1861 // and objects reachable from them on to the local stack (and 1862 // possibly the global stack). Call G1CMTask::do_marking_step() to 1863 // process these entries. 1864 // 1865 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1866 // there's nothing more to do (i.e. we're done with the entries that 1867 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1868 // above) or we overflow. 1869 // 1870 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1871 // flag while there may still be some work to do. (See the comment at 1872 // the beginning of G1CMTask::do_marking_step() for those conditions - 1873 // one of which is reaching the specified time target.) It is only 1874 // when G1CMTask::do_marking_step() returns without setting the 1875 // has_aborted() flag that the marking step has completed. 1876 do { 1877 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1878 _task->do_marking_step(mark_step_duration_ms, 1879 false /* do_termination */, 1880 _is_serial); 1881 } while (_task->has_aborted() && !_cm->has_overflown()); 1882 _ref_counter = _ref_counter_limit; 1883 } 1884 } 1885 } 1886 }; 1887 1888 // 'Drain' oop closure used by both serial and parallel reference processing. 1889 // Uses the G1CMTask associated with a given worker thread (for serial 1890 // reference processing the G1CMtask for worker 0 is used). Calls the 1891 // do_marking_step routine, with an unbelievably large timeout value, 1892 // to drain the marking data structures of the remaining entries 1893 // added by the 'keep alive' oop closure above. 1894 1895 class G1CMDrainMarkingStackClosure: public VoidClosure { 1896 G1ConcurrentMark* _cm; 1897 G1CMTask* _task; 1898 bool _is_serial; 1899 public: 1900 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1901 _cm(cm), _task(task), _is_serial(is_serial) { 1902 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1903 } 1904 1905 void do_void() { 1906 do { 1907 // We call G1CMTask::do_marking_step() to completely drain the local 1908 // and global marking stacks of entries pushed by the 'keep alive' 1909 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1910 // 1911 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1912 // if there's nothing more to do (i.e. we've completely drained the 1913 // entries that were pushed as a a result of applying the 'keep alive' 1914 // closure to the entries on the discovered ref lists) or we overflow 1915 // the global marking stack. 1916 // 1917 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1918 // flag while there may still be some work to do. (See the comment at 1919 // the beginning of G1CMTask::do_marking_step() for those conditions - 1920 // one of which is reaching the specified time target.) It is only 1921 // when G1CMTask::do_marking_step() returns without setting the 1922 // has_aborted() flag that the marking step has completed. 1923 1924 _task->do_marking_step(1000000000.0 /* something very large */, 1925 true /* do_termination */, 1926 _is_serial); 1927 } while (_task->has_aborted() && !_cm->has_overflown()); 1928 } 1929 }; 1930 1931 // Implementation of AbstractRefProcTaskExecutor for parallel 1932 // reference processing at the end of G1 concurrent marking 1933 1934 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1935 private: 1936 G1CollectedHeap* _g1h; 1937 G1ConcurrentMark* _cm; 1938 WorkGang* _workers; 1939 uint _active_workers; 1940 1941 public: 1942 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1943 G1ConcurrentMark* cm, 1944 WorkGang* workers, 1945 uint n_workers) : 1946 _g1h(g1h), _cm(cm), 1947 _workers(workers), _active_workers(n_workers) { } 1948 1949 // Executes the given task using concurrent marking worker threads. 1950 virtual void execute(ProcessTask& task); 1951 virtual void execute(EnqueueTask& task); 1952 }; 1953 1954 class G1CMRefProcTaskProxy: public AbstractGangTask { 1955 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1956 ProcessTask& _proc_task; 1957 G1CollectedHeap* _g1h; 1958 G1ConcurrentMark* _cm; 1959 1960 public: 1961 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1962 G1CollectedHeap* g1h, 1963 G1ConcurrentMark* cm) : 1964 AbstractGangTask("Process reference objects in parallel"), 1965 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1966 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1967 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1968 } 1969 1970 virtual void work(uint worker_id) { 1971 ResourceMark rm; 1972 HandleMark hm; 1973 G1CMTask* task = _cm->task(worker_id); 1974 G1CMIsAliveClosure g1_is_alive(_g1h); 1975 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1976 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1977 1978 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1979 } 1980 }; 1981 1982 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1983 assert(_workers != NULL, "Need parallel worker threads."); 1984 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1985 1986 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1987 1988 // We need to reset the concurrency level before each 1989 // proxy task execution, so that the termination protocol 1990 // and overflow handling in G1CMTask::do_marking_step() knows 1991 // how many workers to wait for. 1992 _cm->set_concurrency(_active_workers); 1993 _workers->run_task(&proc_task_proxy); 1994 } 1995 1996 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1997 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1998 EnqueueTask& _enq_task; 1999 2000 public: 2001 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2002 AbstractGangTask("Enqueue reference objects in parallel"), 2003 _enq_task(enq_task) { } 2004 2005 virtual void work(uint worker_id) { 2006 _enq_task.work(worker_id); 2007 } 2008 }; 2009 2010 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2011 assert(_workers != NULL, "Need parallel worker threads."); 2012 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 2013 2014 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2015 2016 // Not strictly necessary but... 2017 // 2018 // We need to reset the concurrency level before each 2019 // proxy task execution, so that the termination protocol 2020 // and overflow handling in G1CMTask::do_marking_step() knows 2021 // how many workers to wait for. 2022 _cm->set_concurrency(_active_workers); 2023 _workers->run_task(&enq_task_proxy); 2024 } 2025 2026 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 2027 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 2028 } 2029 2030 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2031 if (has_overflown()) { 2032 // Skip processing the discovered references if we have 2033 // overflown the global marking stack. Reference objects 2034 // only get discovered once so it is OK to not 2035 // de-populate the discovered reference lists. We could have, 2036 // but the only benefit would be that, when marking restarts, 2037 // less reference objects are discovered. 2038 return; 2039 } 2040 2041 ResourceMark rm; 2042 HandleMark hm; 2043 2044 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2045 2046 // Is alive closure. 2047 G1CMIsAliveClosure g1_is_alive(g1h); 2048 2049 // Inner scope to exclude the cleaning of the string and symbol 2050 // tables from the displayed time. 2051 { 2052 GCTraceTime(Debug, gc) trace("Reference Processing", g1h->gc_timer_cm()); 2053 2054 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2055 2056 // See the comment in G1CollectedHeap::ref_processing_init() 2057 // about how reference processing currently works in G1. 2058 2059 // Set the soft reference policy 2060 rp->setup_policy(clear_all_soft_refs); 2061 assert(_markStack.isEmpty(), "mark stack should be empty"); 2062 2063 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2064 // in serial reference processing. Note these closures are also 2065 // used for serially processing (by the the current thread) the 2066 // JNI references during parallel reference processing. 2067 // 2068 // These closures do not need to synchronize with the worker 2069 // threads involved in parallel reference processing as these 2070 // instances are executed serially by the current thread (e.g. 2071 // reference processing is not multi-threaded and is thus 2072 // performed by the current thread instead of a gang worker). 2073 // 2074 // The gang tasks involved in parallel reference processing create 2075 // their own instances of these closures, which do their own 2076 // synchronization among themselves. 2077 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2078 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2079 2080 // We need at least one active thread. If reference processing 2081 // is not multi-threaded we use the current (VMThread) thread, 2082 // otherwise we use the work gang from the G1CollectedHeap and 2083 // we utilize all the worker threads we can. 2084 bool processing_is_mt = rp->processing_is_mt(); 2085 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2086 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2087 2088 // Parallel processing task executor. 2089 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2090 g1h->workers(), active_workers); 2091 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2092 2093 // Set the concurrency level. The phase was already set prior to 2094 // executing the remark task. 2095 set_concurrency(active_workers); 2096 2097 // Set the degree of MT processing here. If the discovery was done MT, 2098 // the number of threads involved during discovery could differ from 2099 // the number of active workers. This is OK as long as the discovered 2100 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2101 rp->set_active_mt_degree(active_workers); 2102 2103 // Process the weak references. 2104 const ReferenceProcessorStats& stats = 2105 rp->process_discovered_references(&g1_is_alive, 2106 &g1_keep_alive, 2107 &g1_drain_mark_stack, 2108 executor, 2109 g1h->gc_timer_cm()); 2110 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2111 2112 // The do_oop work routines of the keep_alive and drain_marking_stack 2113 // oop closures will set the has_overflown flag if we overflow the 2114 // global marking stack. 2115 2116 assert(_markStack.overflow() || _markStack.isEmpty(), 2117 "mark stack should be empty (unless it overflowed)"); 2118 2119 if (_markStack.overflow()) { 2120 // This should have been done already when we tried to push an 2121 // entry on to the global mark stack. But let's do it again. 2122 set_has_overflown(); 2123 } 2124 2125 assert(rp->num_q() == active_workers, "why not"); 2126 2127 rp->enqueue_discovered_references(executor); 2128 2129 rp->verify_no_references_recorded(); 2130 assert(!rp->discovery_enabled(), "Post condition"); 2131 } 2132 2133 if (has_overflown()) { 2134 // We can not trust g1_is_alive if the marking stack overflowed 2135 return; 2136 } 2137 2138 assert(_markStack.isEmpty(), "Marking should have completed"); 2139 2140 // Unload Klasses, String, Symbols, Code Cache, etc. 2141 { 2142 GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm()); 2143 2144 if (ClassUnloadingWithConcurrentMark) { 2145 bool purged_classes; 2146 2147 { 2148 GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm()); 2149 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2150 } 2151 2152 { 2153 GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm()); 2154 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2155 } 2156 } 2157 2158 if (G1StringDedup::is_enabled()) { 2159 GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm()); 2160 G1StringDedup::unlink(&g1_is_alive); 2161 } 2162 } 2163 } 2164 2165 void G1ConcurrentMark::swapMarkBitMaps() { 2166 G1CMBitMapRO* temp = _prevMarkBitMap; 2167 _prevMarkBitMap = (G1CMBitMapRO*)_nextMarkBitMap; 2168 _nextMarkBitMap = (G1CMBitMap*) temp; 2169 } 2170 2171 // Closure for marking entries in SATB buffers. 2172 class G1CMSATBBufferClosure : public SATBBufferClosure { 2173 private: 2174 G1CMTask* _task; 2175 G1CollectedHeap* _g1h; 2176 2177 // This is very similar to G1CMTask::deal_with_reference, but with 2178 // more relaxed requirements for the argument, so this must be more 2179 // circumspect about treating the argument as an object. 2180 void do_entry(void* entry) const { 2181 _task->increment_refs_reached(); 2182 HeapRegion* hr = _g1h->heap_region_containing(entry); 2183 if (entry < hr->next_top_at_mark_start()) { 2184 // Until we get here, we don't know whether entry refers to a valid 2185 // object; it could instead have been a stale reference. 2186 oop obj = static_cast<oop>(entry); 2187 assert(obj->is_oop(true /* ignore mark word */), 2188 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2189 _task->make_reference_grey(obj, hr); 2190 } 2191 } 2192 2193 public: 2194 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 2195 : _task(task), _g1h(g1h) { } 2196 2197 virtual void do_buffer(void** buffer, size_t size) { 2198 for (size_t i = 0; i < size; ++i) { 2199 do_entry(buffer[i]); 2200 } 2201 } 2202 }; 2203 2204 class G1RemarkThreadsClosure : public ThreadClosure { 2205 G1CMSATBBufferClosure _cm_satb_cl; 2206 G1CMOopClosure _cm_cl; 2207 MarkingCodeBlobClosure _code_cl; 2208 int _thread_parity; 2209 2210 public: 2211 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 2212 _cm_satb_cl(task, g1h), 2213 _cm_cl(g1h, g1h->concurrent_mark(), task), 2214 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2215 _thread_parity(Threads::thread_claim_parity()) {} 2216 2217 void do_thread(Thread* thread) { 2218 if (thread->is_Java_thread()) { 2219 if (thread->claim_oops_do(true, _thread_parity)) { 2220 JavaThread* jt = (JavaThread*)thread; 2221 2222 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2223 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2224 // * Alive if on the stack of an executing method 2225 // * Weakly reachable otherwise 2226 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2227 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2228 jt->nmethods_do(&_code_cl); 2229 2230 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2231 } 2232 } else if (thread->is_VM_thread()) { 2233 if (thread->claim_oops_do(true, _thread_parity)) { 2234 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2235 } 2236 } 2237 } 2238 }; 2239 2240 class G1CMRemarkTask: public AbstractGangTask { 2241 private: 2242 G1ConcurrentMark* _cm; 2243 public: 2244 void work(uint worker_id) { 2245 // Since all available tasks are actually started, we should 2246 // only proceed if we're supposed to be active. 2247 if (worker_id < _cm->active_tasks()) { 2248 G1CMTask* task = _cm->task(worker_id); 2249 task->record_start_time(); 2250 { 2251 ResourceMark rm; 2252 HandleMark hm; 2253 2254 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2255 Threads::threads_do(&threads_f); 2256 } 2257 2258 do { 2259 task->do_marking_step(1000000000.0 /* something very large */, 2260 true /* do_termination */, 2261 false /* is_serial */); 2262 } while (task->has_aborted() && !_cm->has_overflown()); 2263 // If we overflow, then we do not want to restart. We instead 2264 // want to abort remark and do concurrent marking again. 2265 task->record_end_time(); 2266 } 2267 } 2268 2269 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 2270 AbstractGangTask("Par Remark"), _cm(cm) { 2271 _cm->terminator()->reset_for_reuse(active_workers); 2272 } 2273 }; 2274 2275 void G1ConcurrentMark::checkpointRootsFinalWork() { 2276 ResourceMark rm; 2277 HandleMark hm; 2278 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2279 2280 GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm()); 2281 2282 g1h->ensure_parsability(false); 2283 2284 // this is remark, so we'll use up all active threads 2285 uint active_workers = g1h->workers()->active_workers(); 2286 set_concurrency_and_phase(active_workers, false /* concurrent */); 2287 // Leave _parallel_marking_threads at it's 2288 // value originally calculated in the G1ConcurrentMark 2289 // constructor and pass values of the active workers 2290 // through the gang in the task. 2291 2292 { 2293 StrongRootsScope srs(active_workers); 2294 2295 G1CMRemarkTask remarkTask(this, active_workers); 2296 // We will start all available threads, even if we decide that the 2297 // active_workers will be fewer. The extra ones will just bail out 2298 // immediately. 2299 g1h->workers()->run_task(&remarkTask); 2300 } 2301 2302 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2303 guarantee(has_overflown() || 2304 satb_mq_set.completed_buffers_num() == 0, 2305 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 2306 BOOL_TO_STR(has_overflown()), 2307 satb_mq_set.completed_buffers_num()); 2308 2309 print_stats(); 2310 } 2311 2312 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2313 // Note we are overriding the read-only view of the prev map here, via 2314 // the cast. 2315 ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr); 2316 } 2317 2318 HeapRegion* 2319 G1ConcurrentMark::claim_region(uint worker_id) { 2320 // "checkpoint" the finger 2321 HeapWord* finger = _finger; 2322 2323 // _heap_end will not change underneath our feet; it only changes at 2324 // yield points. 2325 while (finger < _heap_end) { 2326 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2327 2328 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2329 2330 // Above heap_region_containing may return NULL as we always scan claim 2331 // until the end of the heap. In this case, just jump to the next region. 2332 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2333 2334 // Is the gap between reading the finger and doing the CAS too long? 2335 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2336 if (res == finger && curr_region != NULL) { 2337 // we succeeded 2338 HeapWord* bottom = curr_region->bottom(); 2339 HeapWord* limit = curr_region->next_top_at_mark_start(); 2340 2341 // notice that _finger == end cannot be guaranteed here since, 2342 // someone else might have moved the finger even further 2343 assert(_finger >= end, "the finger should have moved forward"); 2344 2345 if (limit > bottom) { 2346 return curr_region; 2347 } else { 2348 assert(limit == bottom, 2349 "the region limit should be at bottom"); 2350 // we return NULL and the caller should try calling 2351 // claim_region() again. 2352 return NULL; 2353 } 2354 } else { 2355 assert(_finger > finger, "the finger should have moved forward"); 2356 // read it again 2357 finger = _finger; 2358 } 2359 } 2360 2361 return NULL; 2362 } 2363 2364 #ifndef PRODUCT 2365 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2366 private: 2367 G1CollectedHeap* _g1h; 2368 const char* _phase; 2369 int _info; 2370 2371 public: 2372 VerifyNoCSetOops(const char* phase, int info = -1) : 2373 _g1h(G1CollectedHeap::heap()), 2374 _phase(phase), 2375 _info(info) 2376 { } 2377 2378 void operator()(oop obj) const { 2379 guarantee(obj->is_oop(), 2380 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2381 p2i(obj), _phase, _info); 2382 guarantee(!_g1h->obj_in_cs(obj), 2383 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2384 p2i(obj), _phase, _info); 2385 } 2386 }; 2387 2388 void G1ConcurrentMark::verify_no_cset_oops() { 2389 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2390 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2391 return; 2392 } 2393 2394 // Verify entries on the global mark stack 2395 _markStack.iterate(VerifyNoCSetOops("Stack")); 2396 2397 // Verify entries on the task queues 2398 for (uint i = 0; i < _max_worker_id; ++i) { 2399 G1CMTaskQueue* queue = _task_queues->queue(i); 2400 queue->iterate(VerifyNoCSetOops("Queue", i)); 2401 } 2402 2403 // Verify the global finger 2404 HeapWord* global_finger = finger(); 2405 if (global_finger != NULL && global_finger < _heap_end) { 2406 // Since we always iterate over all regions, we might get a NULL HeapRegion 2407 // here. 2408 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2409 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2410 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2411 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2412 } 2413 2414 // Verify the task fingers 2415 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2416 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2417 G1CMTask* task = _tasks[i]; 2418 HeapWord* task_finger = task->finger(); 2419 if (task_finger != NULL && task_finger < _heap_end) { 2420 // See above note on the global finger verification. 2421 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2422 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2423 !task_hr->in_collection_set(), 2424 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2425 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2426 } 2427 } 2428 } 2429 #endif // PRODUCT 2430 2431 // Aggregate the counting data that was constructed concurrently 2432 // with marking. 2433 class AggregateCountDataHRClosure: public HeapRegionClosure { 2434 G1CollectedHeap* _g1h; 2435 G1ConcurrentMark* _cm; 2436 CardTableModRefBS* _ct_bs; 2437 BitMap* _cm_card_bm; 2438 uint _max_worker_id; 2439 2440 public: 2441 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2442 BitMap* cm_card_bm, 2443 uint max_worker_id) : 2444 _g1h(g1h), _cm(g1h->concurrent_mark()), 2445 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2446 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2447 2448 bool doHeapRegion(HeapRegion* hr) { 2449 HeapWord* start = hr->bottom(); 2450 HeapWord* limit = hr->next_top_at_mark_start(); 2451 HeapWord* end = hr->end(); 2452 2453 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2454 "Preconditions not met - " 2455 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2456 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2457 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2458 2459 assert(hr->next_marked_bytes() == 0, "Precondition"); 2460 2461 if (start == limit) { 2462 // NTAMS of this region has not been set so nothing to do. 2463 return false; 2464 } 2465 2466 // 'start' should be in the heap. 2467 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2468 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2469 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2470 2471 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2472 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2473 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2474 2475 // If ntams is not card aligned then we bump card bitmap index 2476 // for limit so that we get the all the cards spanned by 2477 // the object ending at ntams. 2478 // Note: if this is the last region in the heap then ntams 2479 // could be actually just beyond the end of the the heap; 2480 // limit_idx will then correspond to a (non-existent) card 2481 // that is also outside the heap. 2482 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2483 limit_idx += 1; 2484 } 2485 2486 assert(limit_idx <= end_idx, "or else use atomics"); 2487 2488 // Aggregate the "stripe" in the count data associated with hr. 2489 uint hrm_index = hr->hrm_index(); 2490 size_t marked_bytes = 0; 2491 2492 for (uint i = 0; i < _max_worker_id; i += 1) { 2493 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2494 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2495 2496 // Fetch the marked_bytes in this region for task i and 2497 // add it to the running total for this region. 2498 marked_bytes += marked_bytes_array[hrm_index]; 2499 2500 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2501 // into the global card bitmap. 2502 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2503 2504 while (scan_idx < limit_idx) { 2505 assert(task_card_bm->at(scan_idx) == true, "should be"); 2506 _cm_card_bm->set_bit(scan_idx); 2507 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2508 2509 // BitMap::get_next_one_offset() can handle the case when 2510 // its left_offset parameter is greater than its right_offset 2511 // parameter. It does, however, have an early exit if 2512 // left_offset == right_offset. So let's limit the value 2513 // passed in for left offset here. 2514 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2515 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2516 } 2517 } 2518 2519 // Update the marked bytes for this region. 2520 hr->add_to_marked_bytes(marked_bytes); 2521 2522 // Next heap region 2523 return false; 2524 } 2525 }; 2526 2527 class G1AggregateCountDataTask: public AbstractGangTask { 2528 protected: 2529 G1CollectedHeap* _g1h; 2530 G1ConcurrentMark* _cm; 2531 BitMap* _cm_card_bm; 2532 uint _max_worker_id; 2533 uint _active_workers; 2534 HeapRegionClaimer _hrclaimer; 2535 2536 public: 2537 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2538 G1ConcurrentMark* cm, 2539 BitMap* cm_card_bm, 2540 uint max_worker_id, 2541 uint n_workers) : 2542 AbstractGangTask("Count Aggregation"), 2543 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2544 _max_worker_id(max_worker_id), 2545 _active_workers(n_workers), 2546 _hrclaimer(_active_workers) { 2547 } 2548 2549 void work(uint worker_id) { 2550 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2551 2552 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2553 } 2554 }; 2555 2556 2557 void G1ConcurrentMark::aggregate_count_data() { 2558 uint n_workers = _g1h->workers()->active_workers(); 2559 2560 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2561 _max_worker_id, n_workers); 2562 2563 _g1h->workers()->run_task(&g1_par_agg_task); 2564 } 2565 2566 // Clear the per-worker arrays used to store the per-region counting data 2567 void G1ConcurrentMark::clear_all_count_data() { 2568 // Clear the global card bitmap - it will be filled during 2569 // liveness count aggregation (during remark) and the 2570 // final counting task. 2571 _card_bm.clear(); 2572 2573 // Clear the global region bitmap - it will be filled as part 2574 // of the final counting task. 2575 _region_bm.clear(); 2576 2577 uint max_regions = _g1h->max_regions(); 2578 assert(_max_worker_id > 0, "uninitialized"); 2579 2580 for (uint i = 0; i < _max_worker_id; i += 1) { 2581 BitMap* task_card_bm = count_card_bitmap_for(i); 2582 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2583 2584 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2585 assert(marked_bytes_array != NULL, "uninitialized"); 2586 2587 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2588 task_card_bm->clear(); 2589 } 2590 } 2591 2592 void G1ConcurrentMark::print_stats() { 2593 if (!log_is_enabled(Debug, gc, stats)) { 2594 return; 2595 } 2596 log_debug(gc, stats)("---------------------------------------------------------------------"); 2597 for (size_t i = 0; i < _active_tasks; ++i) { 2598 _tasks[i]->print_stats(); 2599 log_debug(gc, stats)("---------------------------------------------------------------------"); 2600 } 2601 } 2602 2603 // abandon current marking iteration due to a Full GC 2604 void G1ConcurrentMark::abort() { 2605 if (!cmThread()->during_cycle() || _has_aborted) { 2606 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2607 return; 2608 } 2609 2610 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2611 // concurrent bitmap clearing. 2612 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2613 2614 // Note we cannot clear the previous marking bitmap here 2615 // since VerifyDuringGC verifies the objects marked during 2616 // a full GC against the previous bitmap. 2617 2618 // Clear the liveness counting data 2619 clear_all_count_data(); 2620 // Empty mark stack 2621 reset_marking_state(); 2622 for (uint i = 0; i < _max_worker_id; ++i) { 2623 _tasks[i]->clear_region_fields(); 2624 } 2625 _first_overflow_barrier_sync.abort(); 2626 _second_overflow_barrier_sync.abort(); 2627 _has_aborted = true; 2628 2629 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2630 satb_mq_set.abandon_partial_marking(); 2631 // This can be called either during or outside marking, we'll read 2632 // the expected_active value from the SATB queue set. 2633 satb_mq_set.set_active_all_threads( 2634 false, /* new active value */ 2635 satb_mq_set.is_active() /* expected_active */); 2636 2637 _g1h->trace_heap_after_concurrent_cycle(); 2638 2639 _g1h->register_concurrent_cycle_end(); 2640 } 2641 2642 static void print_ms_time_info(const char* prefix, const char* name, 2643 NumberSeq& ns) { 2644 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2645 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2646 if (ns.num() > 0) { 2647 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2648 prefix, ns.sd(), ns.maximum()); 2649 } 2650 } 2651 2652 void G1ConcurrentMark::print_summary_info() { 2653 LogHandle(gc, marking) log; 2654 if (!log.is_trace()) { 2655 return; 2656 } 2657 2658 log.trace(" Concurrent marking:"); 2659 print_ms_time_info(" ", "init marks", _init_times); 2660 print_ms_time_info(" ", "remarks", _remark_times); 2661 { 2662 print_ms_time_info(" ", "final marks", _remark_mark_times); 2663 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2664 2665 } 2666 print_ms_time_info(" ", "cleanups", _cleanup_times); 2667 log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2668 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2669 if (G1ScrubRemSets) { 2670 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2671 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2672 } 2673 log.trace(" Total stop_world time = %8.2f s.", 2674 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2675 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2676 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2677 } 2678 2679 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2680 _parallel_workers->print_worker_threads_on(st); 2681 } 2682 2683 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2684 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2685 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2686 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2687 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2688 } 2689 2690 // We take a break if someone is trying to stop the world. 2691 bool G1ConcurrentMark::do_yield_check(uint worker_id) { 2692 if (SuspendibleThreadSet::should_yield()) { 2693 SuspendibleThreadSet::yield(); 2694 return true; 2695 } else { 2696 return false; 2697 } 2698 } 2699 2700 // Closure for iteration over bitmaps 2701 class G1CMBitMapClosure : public BitMapClosure { 2702 private: 2703 // the bitmap that is being iterated over 2704 G1CMBitMap* _nextMarkBitMap; 2705 G1ConcurrentMark* _cm; 2706 G1CMTask* _task; 2707 2708 public: 2709 G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) : 2710 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2711 2712 bool do_bit(size_t offset) { 2713 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2714 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2715 assert( addr < _cm->finger(), "invariant"); 2716 assert(addr >= _task->finger(), "invariant"); 2717 2718 // We move that task's local finger along. 2719 _task->move_finger_to(addr); 2720 2721 _task->scan_object(oop(addr)); 2722 // we only partially drain the local queue and global stack 2723 _task->drain_local_queue(true); 2724 _task->drain_global_stack(true); 2725 2726 // if the has_aborted flag has been raised, we need to bail out of 2727 // the iteration 2728 return !_task->has_aborted(); 2729 } 2730 }; 2731 2732 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2733 ReferenceProcessor* result = g1h->ref_processor_cm(); 2734 assert(result != NULL, "CM reference processor should not be NULL"); 2735 return result; 2736 } 2737 2738 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2739 G1ConcurrentMark* cm, 2740 G1CMTask* task) 2741 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2742 _g1h(g1h), _cm(cm), _task(task) 2743 { } 2744 2745 void G1CMTask::setup_for_region(HeapRegion* hr) { 2746 assert(hr != NULL, 2747 "claim_region() should have filtered out NULL regions"); 2748 _curr_region = hr; 2749 _finger = hr->bottom(); 2750 update_region_limit(); 2751 } 2752 2753 void G1CMTask::update_region_limit() { 2754 HeapRegion* hr = _curr_region; 2755 HeapWord* bottom = hr->bottom(); 2756 HeapWord* limit = hr->next_top_at_mark_start(); 2757 2758 if (limit == bottom) { 2759 // The region was collected underneath our feet. 2760 // We set the finger to bottom to ensure that the bitmap 2761 // iteration that will follow this will not do anything. 2762 // (this is not a condition that holds when we set the region up, 2763 // as the region is not supposed to be empty in the first place) 2764 _finger = bottom; 2765 } else if (limit >= _region_limit) { 2766 assert(limit >= _finger, "peace of mind"); 2767 } else { 2768 assert(limit < _region_limit, "only way to get here"); 2769 // This can happen under some pretty unusual circumstances. An 2770 // evacuation pause empties the region underneath our feet (NTAMS 2771 // at bottom). We then do some allocation in the region (NTAMS 2772 // stays at bottom), followed by the region being used as a GC 2773 // alloc region (NTAMS will move to top() and the objects 2774 // originally below it will be grayed). All objects now marked in 2775 // the region are explicitly grayed, if below the global finger, 2776 // and we do not need in fact to scan anything else. So, we simply 2777 // set _finger to be limit to ensure that the bitmap iteration 2778 // doesn't do anything. 2779 _finger = limit; 2780 } 2781 2782 _region_limit = limit; 2783 } 2784 2785 void G1CMTask::giveup_current_region() { 2786 assert(_curr_region != NULL, "invariant"); 2787 clear_region_fields(); 2788 } 2789 2790 void G1CMTask::clear_region_fields() { 2791 // Values for these three fields that indicate that we're not 2792 // holding on to a region. 2793 _curr_region = NULL; 2794 _finger = NULL; 2795 _region_limit = NULL; 2796 } 2797 2798 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2799 if (cm_oop_closure == NULL) { 2800 assert(_cm_oop_closure != NULL, "invariant"); 2801 } else { 2802 assert(_cm_oop_closure == NULL, "invariant"); 2803 } 2804 _cm_oop_closure = cm_oop_closure; 2805 } 2806 2807 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2808 guarantee(nextMarkBitMap != NULL, "invariant"); 2809 _nextMarkBitMap = nextMarkBitMap; 2810 clear_region_fields(); 2811 2812 _calls = 0; 2813 _elapsed_time_ms = 0.0; 2814 _termination_time_ms = 0.0; 2815 _termination_start_time_ms = 0.0; 2816 } 2817 2818 bool G1CMTask::should_exit_termination() { 2819 regular_clock_call(); 2820 // This is called when we are in the termination protocol. We should 2821 // quit if, for some reason, this task wants to abort or the global 2822 // stack is not empty (this means that we can get work from it). 2823 return !_cm->mark_stack_empty() || has_aborted(); 2824 } 2825 2826 void G1CMTask::reached_limit() { 2827 assert(_words_scanned >= _words_scanned_limit || 2828 _refs_reached >= _refs_reached_limit , 2829 "shouldn't have been called otherwise"); 2830 regular_clock_call(); 2831 } 2832 2833 void G1CMTask::regular_clock_call() { 2834 if (has_aborted()) return; 2835 2836 // First, we need to recalculate the words scanned and refs reached 2837 // limits for the next clock call. 2838 recalculate_limits(); 2839 2840 // During the regular clock call we do the following 2841 2842 // (1) If an overflow has been flagged, then we abort. 2843 if (_cm->has_overflown()) { 2844 set_has_aborted(); 2845 return; 2846 } 2847 2848 // If we are not concurrent (i.e. we're doing remark) we don't need 2849 // to check anything else. The other steps are only needed during 2850 // the concurrent marking phase. 2851 if (!concurrent()) return; 2852 2853 // (2) If marking has been aborted for Full GC, then we also abort. 2854 if (_cm->has_aborted()) { 2855 set_has_aborted(); 2856 return; 2857 } 2858 2859 double curr_time_ms = os::elapsedVTime() * 1000.0; 2860 2861 // (4) We check whether we should yield. If we have to, then we abort. 2862 if (SuspendibleThreadSet::should_yield()) { 2863 // We should yield. To do this we abort the task. The caller is 2864 // responsible for yielding. 2865 set_has_aborted(); 2866 return; 2867 } 2868 2869 // (5) We check whether we've reached our time quota. If we have, 2870 // then we abort. 2871 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2872 if (elapsed_time_ms > _time_target_ms) { 2873 set_has_aborted(); 2874 _has_timed_out = true; 2875 return; 2876 } 2877 2878 // (6) Finally, we check whether there are enough completed STAB 2879 // buffers available for processing. If there are, we abort. 2880 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2881 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2882 // we do need to process SATB buffers, we'll abort and restart 2883 // the marking task to do so 2884 set_has_aborted(); 2885 return; 2886 } 2887 } 2888 2889 void G1CMTask::recalculate_limits() { 2890 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2891 _words_scanned_limit = _real_words_scanned_limit; 2892 2893 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2894 _refs_reached_limit = _real_refs_reached_limit; 2895 } 2896 2897 void G1CMTask::decrease_limits() { 2898 // This is called when we believe that we're going to do an infrequent 2899 // operation which will increase the per byte scanned cost (i.e. move 2900 // entries to/from the global stack). It basically tries to decrease the 2901 // scanning limit so that the clock is called earlier. 2902 2903 _words_scanned_limit = _real_words_scanned_limit - 2904 3 * words_scanned_period / 4; 2905 _refs_reached_limit = _real_refs_reached_limit - 2906 3 * refs_reached_period / 4; 2907 } 2908 2909 void G1CMTask::move_entries_to_global_stack() { 2910 // local array where we'll store the entries that will be popped 2911 // from the local queue 2912 oop buffer[global_stack_transfer_size]; 2913 2914 int n = 0; 2915 oop obj; 2916 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 2917 buffer[n] = obj; 2918 ++n; 2919 } 2920 2921 if (n > 0) { 2922 // we popped at least one entry from the local queue 2923 2924 if (!_cm->mark_stack_push(buffer, n)) { 2925 set_has_aborted(); 2926 } 2927 } 2928 2929 // this operation was quite expensive, so decrease the limits 2930 decrease_limits(); 2931 } 2932 2933 void G1CMTask::get_entries_from_global_stack() { 2934 // local array where we'll store the entries that will be popped 2935 // from the global stack. 2936 oop buffer[global_stack_transfer_size]; 2937 int n; 2938 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 2939 assert(n <= global_stack_transfer_size, 2940 "we should not pop more than the given limit"); 2941 if (n > 0) { 2942 // yes, we did actually pop at least one entry 2943 for (int i = 0; i < n; ++i) { 2944 bool success = _task_queue->push(buffer[i]); 2945 // We only call this when the local queue is empty or under a 2946 // given target limit. So, we do not expect this push to fail. 2947 assert(success, "invariant"); 2948 } 2949 } 2950 2951 // this operation was quite expensive, so decrease the limits 2952 decrease_limits(); 2953 } 2954 2955 void G1CMTask::drain_local_queue(bool partially) { 2956 if (has_aborted()) return; 2957 2958 // Decide what the target size is, depending whether we're going to 2959 // drain it partially (so that other tasks can steal if they run out 2960 // of things to do) or totally (at the very end). 2961 size_t target_size; 2962 if (partially) { 2963 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2964 } else { 2965 target_size = 0; 2966 } 2967 2968 if (_task_queue->size() > target_size) { 2969 oop obj; 2970 bool ret = _task_queue->pop_local(obj); 2971 while (ret) { 2972 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 2973 assert(!_g1h->is_on_master_free_list( 2974 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 2975 2976 scan_object(obj); 2977 2978 if (_task_queue->size() <= target_size || has_aborted()) { 2979 ret = false; 2980 } else { 2981 ret = _task_queue->pop_local(obj); 2982 } 2983 } 2984 } 2985 } 2986 2987 void G1CMTask::drain_global_stack(bool partially) { 2988 if (has_aborted()) return; 2989 2990 // We have a policy to drain the local queue before we attempt to 2991 // drain the global stack. 2992 assert(partially || _task_queue->size() == 0, "invariant"); 2993 2994 // Decide what the target size is, depending whether we're going to 2995 // drain it partially (so that other tasks can steal if they run out 2996 // of things to do) or totally (at the very end). Notice that, 2997 // because we move entries from the global stack in chunks or 2998 // because another task might be doing the same, we might in fact 2999 // drop below the target. But, this is not a problem. 3000 size_t target_size; 3001 if (partially) { 3002 target_size = _cm->partial_mark_stack_size_target(); 3003 } else { 3004 target_size = 0; 3005 } 3006 3007 if (_cm->mark_stack_size() > target_size) { 3008 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3009 get_entries_from_global_stack(); 3010 drain_local_queue(partially); 3011 } 3012 } 3013 } 3014 3015 // SATB Queue has several assumptions on whether to call the par or 3016 // non-par versions of the methods. this is why some of the code is 3017 // replicated. We should really get rid of the single-threaded version 3018 // of the code to simplify things. 3019 void G1CMTask::drain_satb_buffers() { 3020 if (has_aborted()) return; 3021 3022 // We set this so that the regular clock knows that we're in the 3023 // middle of draining buffers and doesn't set the abort flag when it 3024 // notices that SATB buffers are available for draining. It'd be 3025 // very counter productive if it did that. :-) 3026 _draining_satb_buffers = true; 3027 3028 G1CMSATBBufferClosure satb_cl(this, _g1h); 3029 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3030 3031 // This keeps claiming and applying the closure to completed buffers 3032 // until we run out of buffers or we need to abort. 3033 while (!has_aborted() && 3034 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3035 regular_clock_call(); 3036 } 3037 3038 _draining_satb_buffers = false; 3039 3040 assert(has_aborted() || 3041 concurrent() || 3042 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3043 3044 // again, this was a potentially expensive operation, decrease the 3045 // limits to get the regular clock call early 3046 decrease_limits(); 3047 } 3048 3049 void G1CMTask::print_stats() { 3050 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 3051 _worker_id, _calls); 3052 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3053 _elapsed_time_ms, _termination_time_ms); 3054 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3055 _step_times_ms.num(), _step_times_ms.avg(), 3056 _step_times_ms.sd()); 3057 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 3058 _step_times_ms.maximum(), _step_times_ms.sum()); 3059 } 3060 3061 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3062 return _task_queues->steal(worker_id, hash_seed, obj); 3063 } 3064 3065 /***************************************************************************** 3066 3067 The do_marking_step(time_target_ms, ...) method is the building 3068 block of the parallel marking framework. It can be called in parallel 3069 with other invocations of do_marking_step() on different tasks 3070 (but only one per task, obviously) and concurrently with the 3071 mutator threads, or during remark, hence it eliminates the need 3072 for two versions of the code. When called during remark, it will 3073 pick up from where the task left off during the concurrent marking 3074 phase. Interestingly, tasks are also claimable during evacuation 3075 pauses too, since do_marking_step() ensures that it aborts before 3076 it needs to yield. 3077 3078 The data structures that it uses to do marking work are the 3079 following: 3080 3081 (1) Marking Bitmap. If there are gray objects that appear only 3082 on the bitmap (this happens either when dealing with an overflow 3083 or when the initial marking phase has simply marked the roots 3084 and didn't push them on the stack), then tasks claim heap 3085 regions whose bitmap they then scan to find gray objects. A 3086 global finger indicates where the end of the last claimed region 3087 is. A local finger indicates how far into the region a task has 3088 scanned. The two fingers are used to determine how to gray an 3089 object (i.e. whether simply marking it is OK, as it will be 3090 visited by a task in the future, or whether it needs to be also 3091 pushed on a stack). 3092 3093 (2) Local Queue. The local queue of the task which is accessed 3094 reasonably efficiently by the task. Other tasks can steal from 3095 it when they run out of work. Throughout the marking phase, a 3096 task attempts to keep its local queue short but not totally 3097 empty, so that entries are available for stealing by other 3098 tasks. Only when there is no more work, a task will totally 3099 drain its local queue. 3100 3101 (3) Global Mark Stack. This handles local queue overflow. During 3102 marking only sets of entries are moved between it and the local 3103 queues, as access to it requires a mutex and more fine-grain 3104 interaction with it which might cause contention. If it 3105 overflows, then the marking phase should restart and iterate 3106 over the bitmap to identify gray objects. Throughout the marking 3107 phase, tasks attempt to keep the global mark stack at a small 3108 length but not totally empty, so that entries are available for 3109 popping by other tasks. Only when there is no more work, tasks 3110 will totally drain the global mark stack. 3111 3112 (4) SATB Buffer Queue. This is where completed SATB buffers are 3113 made available. Buffers are regularly removed from this queue 3114 and scanned for roots, so that the queue doesn't get too 3115 long. During remark, all completed buffers are processed, as 3116 well as the filled in parts of any uncompleted buffers. 3117 3118 The do_marking_step() method tries to abort when the time target 3119 has been reached. There are a few other cases when the 3120 do_marking_step() method also aborts: 3121 3122 (1) When the marking phase has been aborted (after a Full GC). 3123 3124 (2) When a global overflow (on the global stack) has been 3125 triggered. Before the task aborts, it will actually sync up with 3126 the other tasks to ensure that all the marking data structures 3127 (local queues, stacks, fingers etc.) are re-initialized so that 3128 when do_marking_step() completes, the marking phase can 3129 immediately restart. 3130 3131 (3) When enough completed SATB buffers are available. The 3132 do_marking_step() method only tries to drain SATB buffers right 3133 at the beginning. So, if enough buffers are available, the 3134 marking step aborts and the SATB buffers are processed at 3135 the beginning of the next invocation. 3136 3137 (4) To yield. when we have to yield then we abort and yield 3138 right at the end of do_marking_step(). This saves us from a lot 3139 of hassle as, by yielding we might allow a Full GC. If this 3140 happens then objects will be compacted underneath our feet, the 3141 heap might shrink, etc. We save checking for this by just 3142 aborting and doing the yield right at the end. 3143 3144 From the above it follows that the do_marking_step() method should 3145 be called in a loop (or, otherwise, regularly) until it completes. 3146 3147 If a marking step completes without its has_aborted() flag being 3148 true, it means it has completed the current marking phase (and 3149 also all other marking tasks have done so and have all synced up). 3150 3151 A method called regular_clock_call() is invoked "regularly" (in 3152 sub ms intervals) throughout marking. It is this clock method that 3153 checks all the abort conditions which were mentioned above and 3154 decides when the task should abort. A work-based scheme is used to 3155 trigger this clock method: when the number of object words the 3156 marking phase has scanned or the number of references the marking 3157 phase has visited reach a given limit. Additional invocations to 3158 the method clock have been planted in a few other strategic places 3159 too. The initial reason for the clock method was to avoid calling 3160 vtime too regularly, as it is quite expensive. So, once it was in 3161 place, it was natural to piggy-back all the other conditions on it 3162 too and not constantly check them throughout the code. 3163 3164 If do_termination is true then do_marking_step will enter its 3165 termination protocol. 3166 3167 The value of is_serial must be true when do_marking_step is being 3168 called serially (i.e. by the VMThread) and do_marking_step should 3169 skip any synchronization in the termination and overflow code. 3170 Examples include the serial remark code and the serial reference 3171 processing closures. 3172 3173 The value of is_serial must be false when do_marking_step is 3174 being called by any of the worker threads in a work gang. 3175 Examples include the concurrent marking code (CMMarkingTask), 3176 the MT remark code, and the MT reference processing closures. 3177 3178 *****************************************************************************/ 3179 3180 void G1CMTask::do_marking_step(double time_target_ms, 3181 bool do_termination, 3182 bool is_serial) { 3183 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3184 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3185 3186 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3187 assert(_task_queues != NULL, "invariant"); 3188 assert(_task_queue != NULL, "invariant"); 3189 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3190 3191 assert(!_claimed, 3192 "only one thread should claim this task at any one time"); 3193 3194 // OK, this doesn't safeguard again all possible scenarios, as it is 3195 // possible for two threads to set the _claimed flag at the same 3196 // time. But it is only for debugging purposes anyway and it will 3197 // catch most problems. 3198 _claimed = true; 3199 3200 _start_time_ms = os::elapsedVTime() * 1000.0; 3201 3202 // If do_stealing is true then do_marking_step will attempt to 3203 // steal work from the other G1CMTasks. It only makes sense to 3204 // enable stealing when the termination protocol is enabled 3205 // and do_marking_step() is not being called serially. 3206 bool do_stealing = do_termination && !is_serial; 3207 3208 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3209 _time_target_ms = time_target_ms - diff_prediction_ms; 3210 3211 // set up the variables that are used in the work-based scheme to 3212 // call the regular clock method 3213 _words_scanned = 0; 3214 _refs_reached = 0; 3215 recalculate_limits(); 3216 3217 // clear all flags 3218 clear_has_aborted(); 3219 _has_timed_out = false; 3220 _draining_satb_buffers = false; 3221 3222 ++_calls; 3223 3224 // Set up the bitmap and oop closures. Anything that uses them is 3225 // eventually called from this method, so it is OK to allocate these 3226 // statically. 3227 G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3228 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3229 set_cm_oop_closure(&cm_oop_closure); 3230 3231 if (_cm->has_overflown()) { 3232 // This can happen if the mark stack overflows during a GC pause 3233 // and this task, after a yield point, restarts. We have to abort 3234 // as we need to get into the overflow protocol which happens 3235 // right at the end of this task. 3236 set_has_aborted(); 3237 } 3238 3239 // First drain any available SATB buffers. After this, we will not 3240 // look at SATB buffers before the next invocation of this method. 3241 // If enough completed SATB buffers are queued up, the regular clock 3242 // will abort this task so that it restarts. 3243 drain_satb_buffers(); 3244 // ...then partially drain the local queue and the global stack 3245 drain_local_queue(true); 3246 drain_global_stack(true); 3247 3248 do { 3249 if (!has_aborted() && _curr_region != NULL) { 3250 // This means that we're already holding on to a region. 3251 assert(_finger != NULL, "if region is not NULL, then the finger " 3252 "should not be NULL either"); 3253 3254 // We might have restarted this task after an evacuation pause 3255 // which might have evacuated the region we're holding on to 3256 // underneath our feet. Let's read its limit again to make sure 3257 // that we do not iterate over a region of the heap that 3258 // contains garbage (update_region_limit() will also move 3259 // _finger to the start of the region if it is found empty). 3260 update_region_limit(); 3261 // We will start from _finger not from the start of the region, 3262 // as we might be restarting this task after aborting half-way 3263 // through scanning this region. In this case, _finger points to 3264 // the address where we last found a marked object. If this is a 3265 // fresh region, _finger points to start(). 3266 MemRegion mr = MemRegion(_finger, _region_limit); 3267 3268 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3269 "humongous regions should go around loop once only"); 3270 3271 // Some special cases: 3272 // If the memory region is empty, we can just give up the region. 3273 // If the current region is humongous then we only need to check 3274 // the bitmap for the bit associated with the start of the object, 3275 // scan the object if it's live, and give up the region. 3276 // Otherwise, let's iterate over the bitmap of the part of the region 3277 // that is left. 3278 // If the iteration is successful, give up the region. 3279 if (mr.is_empty()) { 3280 giveup_current_region(); 3281 regular_clock_call(); 3282 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3283 if (_nextMarkBitMap->isMarked(mr.start())) { 3284 // The object is marked - apply the closure 3285 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3286 bitmap_closure.do_bit(offset); 3287 } 3288 // Even if this task aborted while scanning the humongous object 3289 // we can (and should) give up the current region. 3290 giveup_current_region(); 3291 regular_clock_call(); 3292 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3293 giveup_current_region(); 3294 regular_clock_call(); 3295 } else { 3296 assert(has_aborted(), "currently the only way to do so"); 3297 // The only way to abort the bitmap iteration is to return 3298 // false from the do_bit() method. However, inside the 3299 // do_bit() method we move the _finger to point to the 3300 // object currently being looked at. So, if we bail out, we 3301 // have definitely set _finger to something non-null. 3302 assert(_finger != NULL, "invariant"); 3303 3304 // Region iteration was actually aborted. So now _finger 3305 // points to the address of the object we last scanned. If we 3306 // leave it there, when we restart this task, we will rescan 3307 // the object. It is easy to avoid this. We move the finger by 3308 // enough to point to the next possible object header (the 3309 // bitmap knows by how much we need to move it as it knows its 3310 // granularity). 3311 assert(_finger < _region_limit, "invariant"); 3312 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3313 // Check if bitmap iteration was aborted while scanning the last object 3314 if (new_finger >= _region_limit) { 3315 giveup_current_region(); 3316 } else { 3317 move_finger_to(new_finger); 3318 } 3319 } 3320 } 3321 // At this point we have either completed iterating over the 3322 // region we were holding on to, or we have aborted. 3323 3324 // We then partially drain the local queue and the global stack. 3325 // (Do we really need this?) 3326 drain_local_queue(true); 3327 drain_global_stack(true); 3328 3329 // Read the note on the claim_region() method on why it might 3330 // return NULL with potentially more regions available for 3331 // claiming and why we have to check out_of_regions() to determine 3332 // whether we're done or not. 3333 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3334 // We are going to try to claim a new region. We should have 3335 // given up on the previous one. 3336 // Separated the asserts so that we know which one fires. 3337 assert(_curr_region == NULL, "invariant"); 3338 assert(_finger == NULL, "invariant"); 3339 assert(_region_limit == NULL, "invariant"); 3340 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3341 if (claimed_region != NULL) { 3342 // Yes, we managed to claim one 3343 setup_for_region(claimed_region); 3344 assert(_curr_region == claimed_region, "invariant"); 3345 } 3346 // It is important to call the regular clock here. It might take 3347 // a while to claim a region if, for example, we hit a large 3348 // block of empty regions. So we need to call the regular clock 3349 // method once round the loop to make sure it's called 3350 // frequently enough. 3351 regular_clock_call(); 3352 } 3353 3354 if (!has_aborted() && _curr_region == NULL) { 3355 assert(_cm->out_of_regions(), 3356 "at this point we should be out of regions"); 3357 } 3358 } while ( _curr_region != NULL && !has_aborted()); 3359 3360 if (!has_aborted()) { 3361 // We cannot check whether the global stack is empty, since other 3362 // tasks might be pushing objects to it concurrently. 3363 assert(_cm->out_of_regions(), 3364 "at this point we should be out of regions"); 3365 // Try to reduce the number of available SATB buffers so that 3366 // remark has less work to do. 3367 drain_satb_buffers(); 3368 } 3369 3370 // Since we've done everything else, we can now totally drain the 3371 // local queue and global stack. 3372 drain_local_queue(false); 3373 drain_global_stack(false); 3374 3375 // Attempt at work stealing from other task's queues. 3376 if (do_stealing && !has_aborted()) { 3377 // We have not aborted. This means that we have finished all that 3378 // we could. Let's try to do some stealing... 3379 3380 // We cannot check whether the global stack is empty, since other 3381 // tasks might be pushing objects to it concurrently. 3382 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3383 "only way to reach here"); 3384 while (!has_aborted()) { 3385 oop obj; 3386 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3387 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3388 "any stolen object should be marked"); 3389 scan_object(obj); 3390 3391 // And since we're towards the end, let's totally drain the 3392 // local queue and global stack. 3393 drain_local_queue(false); 3394 drain_global_stack(false); 3395 } else { 3396 break; 3397 } 3398 } 3399 } 3400 3401 // We still haven't aborted. Now, let's try to get into the 3402 // termination protocol. 3403 if (do_termination && !has_aborted()) { 3404 // We cannot check whether the global stack is empty, since other 3405 // tasks might be concurrently pushing objects on it. 3406 // Separated the asserts so that we know which one fires. 3407 assert(_cm->out_of_regions(), "only way to reach here"); 3408 assert(_task_queue->size() == 0, "only way to reach here"); 3409 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3410 3411 // The G1CMTask class also extends the TerminatorTerminator class, 3412 // hence its should_exit_termination() method will also decide 3413 // whether to exit the termination protocol or not. 3414 bool finished = (is_serial || 3415 _cm->terminator()->offer_termination(this)); 3416 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3417 _termination_time_ms += 3418 termination_end_time_ms - _termination_start_time_ms; 3419 3420 if (finished) { 3421 // We're all done. 3422 3423 if (_worker_id == 0) { 3424 // let's allow task 0 to do this 3425 if (concurrent()) { 3426 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3427 // we need to set this to false before the next 3428 // safepoint. This way we ensure that the marking phase 3429 // doesn't observe any more heap expansions. 3430 _cm->clear_concurrent_marking_in_progress(); 3431 } 3432 } 3433 3434 // We can now guarantee that the global stack is empty, since 3435 // all other tasks have finished. We separated the guarantees so 3436 // that, if a condition is false, we can immediately find out 3437 // which one. 3438 guarantee(_cm->out_of_regions(), "only way to reach here"); 3439 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3440 guarantee(_task_queue->size() == 0, "only way to reach here"); 3441 guarantee(!_cm->has_overflown(), "only way to reach here"); 3442 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3443 } else { 3444 // Apparently there's more work to do. Let's abort this task. It 3445 // will restart it and we can hopefully find more things to do. 3446 set_has_aborted(); 3447 } 3448 } 3449 3450 // Mainly for debugging purposes to make sure that a pointer to the 3451 // closure which was statically allocated in this frame doesn't 3452 // escape it by accident. 3453 set_cm_oop_closure(NULL); 3454 double end_time_ms = os::elapsedVTime() * 1000.0; 3455 double elapsed_time_ms = end_time_ms - _start_time_ms; 3456 // Update the step history. 3457 _step_times_ms.add(elapsed_time_ms); 3458 3459 if (has_aborted()) { 3460 // The task was aborted for some reason. 3461 if (_has_timed_out) { 3462 double diff_ms = elapsed_time_ms - _time_target_ms; 3463 // Keep statistics of how well we did with respect to hitting 3464 // our target only if we actually timed out (if we aborted for 3465 // other reasons, then the results might get skewed). 3466 _marking_step_diffs_ms.add(diff_ms); 3467 } 3468 3469 if (_cm->has_overflown()) { 3470 // This is the interesting one. We aborted because a global 3471 // overflow was raised. This means we have to restart the 3472 // marking phase and start iterating over regions. However, in 3473 // order to do this we have to make sure that all tasks stop 3474 // what they are doing and re-initialize in a safe manner. We 3475 // will achieve this with the use of two barrier sync points. 3476 3477 if (!is_serial) { 3478 // We only need to enter the sync barrier if being called 3479 // from a parallel context 3480 _cm->enter_first_sync_barrier(_worker_id); 3481 3482 // When we exit this sync barrier we know that all tasks have 3483 // stopped doing marking work. So, it's now safe to 3484 // re-initialize our data structures. At the end of this method, 3485 // task 0 will clear the global data structures. 3486 } 3487 3488 // We clear the local state of this task... 3489 clear_region_fields(); 3490 3491 if (!is_serial) { 3492 // ...and enter the second barrier. 3493 _cm->enter_second_sync_barrier(_worker_id); 3494 } 3495 // At this point, if we're during the concurrent phase of 3496 // marking, everything has been re-initialized and we're 3497 // ready to restart. 3498 } 3499 } 3500 3501 _claimed = false; 3502 } 3503 3504 G1CMTask::G1CMTask(uint worker_id, 3505 G1ConcurrentMark* cm, 3506 size_t* marked_bytes, 3507 BitMap* card_bm, 3508 G1CMTaskQueue* task_queue, 3509 G1CMTaskQueueSet* task_queues) 3510 : _g1h(G1CollectedHeap::heap()), 3511 _worker_id(worker_id), _cm(cm), 3512 _claimed(false), 3513 _nextMarkBitMap(NULL), _hash_seed(17), 3514 _task_queue(task_queue), 3515 _task_queues(task_queues), 3516 _cm_oop_closure(NULL), 3517 _marked_bytes_array(marked_bytes), 3518 _card_bm(card_bm) { 3519 guarantee(task_queue != NULL, "invariant"); 3520 guarantee(task_queues != NULL, "invariant"); 3521 3522 _marking_step_diffs_ms.add(0.5); 3523 } 3524 3525 // These are formatting macros that are used below to ensure 3526 // consistent formatting. The *_H_* versions are used to format the 3527 // header for a particular value and they should be kept consistent 3528 // with the corresponding macro. Also note that most of the macros add 3529 // the necessary white space (as a prefix) which makes them a bit 3530 // easier to compose. 3531 3532 // All the output lines are prefixed with this string to be able to 3533 // identify them easily in a large log file. 3534 #define G1PPRL_LINE_PREFIX "###" 3535 3536 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3537 #ifdef _LP64 3538 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3539 #else // _LP64 3540 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3541 #endif // _LP64 3542 3543 // For per-region info 3544 #define G1PPRL_TYPE_FORMAT " %-4s" 3545 #define G1PPRL_TYPE_H_FORMAT " %4s" 3546 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3547 #define G1PPRL_BYTE_H_FORMAT " %9s" 3548 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3549 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3550 3551 // For summary info 3552 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3553 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3554 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3555 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3556 3557 G1PrintRegionLivenessInfoClosure:: 3558 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3559 : _total_used_bytes(0), _total_capacity_bytes(0), 3560 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3561 _hum_used_bytes(0), _hum_capacity_bytes(0), 3562 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3563 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3564 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3565 MemRegion g1_reserved = g1h->g1_reserved(); 3566 double now = os::elapsedTime(); 3567 3568 // Print the header of the output. 3569 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3570 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3571 G1PPRL_SUM_ADDR_FORMAT("reserved") 3572 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3573 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3574 HeapRegion::GrainBytes); 3575 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3576 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3577 G1PPRL_TYPE_H_FORMAT 3578 G1PPRL_ADDR_BASE_H_FORMAT 3579 G1PPRL_BYTE_H_FORMAT 3580 G1PPRL_BYTE_H_FORMAT 3581 G1PPRL_BYTE_H_FORMAT 3582 G1PPRL_DOUBLE_H_FORMAT 3583 G1PPRL_BYTE_H_FORMAT 3584 G1PPRL_BYTE_H_FORMAT, 3585 "type", "address-range", 3586 "used", "prev-live", "next-live", "gc-eff", 3587 "remset", "code-roots"); 3588 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3589 G1PPRL_TYPE_H_FORMAT 3590 G1PPRL_ADDR_BASE_H_FORMAT 3591 G1PPRL_BYTE_H_FORMAT 3592 G1PPRL_BYTE_H_FORMAT 3593 G1PPRL_BYTE_H_FORMAT 3594 G1PPRL_DOUBLE_H_FORMAT 3595 G1PPRL_BYTE_H_FORMAT 3596 G1PPRL_BYTE_H_FORMAT, 3597 "", "", 3598 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3599 "(bytes)", "(bytes)"); 3600 } 3601 3602 // It takes as a parameter a reference to one of the _hum_* fields, it 3603 // deduces the corresponding value for a region in a humongous region 3604 // series (either the region size, or what's left if the _hum_* field 3605 // is < the region size), and updates the _hum_* field accordingly. 3606 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3607 size_t bytes = 0; 3608 // The > 0 check is to deal with the prev and next live bytes which 3609 // could be 0. 3610 if (*hum_bytes > 0) { 3611 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3612 *hum_bytes -= bytes; 3613 } 3614 return bytes; 3615 } 3616 3617 // It deduces the values for a region in a humongous region series 3618 // from the _hum_* fields and updates those accordingly. It assumes 3619 // that that _hum_* fields have already been set up from the "starts 3620 // humongous" region and we visit the regions in address order. 3621 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3622 size_t* capacity_bytes, 3623 size_t* prev_live_bytes, 3624 size_t* next_live_bytes) { 3625 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3626 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3627 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3628 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3629 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3630 } 3631 3632 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3633 const char* type = r->get_type_str(); 3634 HeapWord* bottom = r->bottom(); 3635 HeapWord* end = r->end(); 3636 size_t capacity_bytes = r->capacity(); 3637 size_t used_bytes = r->used(); 3638 size_t prev_live_bytes = r->live_bytes(); 3639 size_t next_live_bytes = r->next_live_bytes(); 3640 double gc_eff = r->gc_efficiency(); 3641 size_t remset_bytes = r->rem_set()->mem_size(); 3642 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3643 3644 if (r->is_starts_humongous()) { 3645 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3646 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3647 "they should have been zeroed after the last time we used them"); 3648 // Set up the _hum_* fields. 3649 _hum_capacity_bytes = capacity_bytes; 3650 _hum_used_bytes = used_bytes; 3651 _hum_prev_live_bytes = prev_live_bytes; 3652 _hum_next_live_bytes = next_live_bytes; 3653 get_hum_bytes(&used_bytes, &capacity_bytes, 3654 &prev_live_bytes, &next_live_bytes); 3655 end = bottom + HeapRegion::GrainWords; 3656 } else if (r->is_continues_humongous()) { 3657 get_hum_bytes(&used_bytes, &capacity_bytes, 3658 &prev_live_bytes, &next_live_bytes); 3659 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3660 } 3661 3662 _total_used_bytes += used_bytes; 3663 _total_capacity_bytes += capacity_bytes; 3664 _total_prev_live_bytes += prev_live_bytes; 3665 _total_next_live_bytes += next_live_bytes; 3666 _total_remset_bytes += remset_bytes; 3667 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3668 3669 // Print a line for this particular region. 3670 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3671 G1PPRL_TYPE_FORMAT 3672 G1PPRL_ADDR_BASE_FORMAT 3673 G1PPRL_BYTE_FORMAT 3674 G1PPRL_BYTE_FORMAT 3675 G1PPRL_BYTE_FORMAT 3676 G1PPRL_DOUBLE_FORMAT 3677 G1PPRL_BYTE_FORMAT 3678 G1PPRL_BYTE_FORMAT, 3679 type, p2i(bottom), p2i(end), 3680 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3681 remset_bytes, strong_code_roots_bytes); 3682 3683 return false; 3684 } 3685 3686 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3687 // add static memory usages to remembered set sizes 3688 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3689 // Print the footer of the output. 3690 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3691 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3692 " SUMMARY" 3693 G1PPRL_SUM_MB_FORMAT("capacity") 3694 G1PPRL_SUM_MB_PERC_FORMAT("used") 3695 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3696 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3697 G1PPRL_SUM_MB_FORMAT("remset") 3698 G1PPRL_SUM_MB_FORMAT("code-roots"), 3699 bytes_to_mb(_total_capacity_bytes), 3700 bytes_to_mb(_total_used_bytes), 3701 perc(_total_used_bytes, _total_capacity_bytes), 3702 bytes_to_mb(_total_prev_live_bytes), 3703 perc(_total_prev_live_bytes, _total_capacity_bytes), 3704 bytes_to_mb(_total_next_live_bytes), 3705 perc(_total_next_live_bytes, _total_capacity_bytes), 3706 bytes_to_mb(_total_remset_bytes), 3707 bytes_to_mb(_total_strong_code_roots_bytes)); 3708 }