1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 #include "utilities/align.hpp" 61 #include "utilities/growableArray.hpp" 62 63 G1CMMarkStack::G1CMMarkStack() : 64 _max_chunk_capacity(0), 65 _base(NULL), 66 _chunk_capacity(0) { 67 set_empty(); 68 } 69 70 bool G1CMMarkStack::resize(size_t new_capacity) { 71 assert(is_empty(), "Only resize when stack is empty."); 72 assert(new_capacity <= _max_chunk_capacity, 73 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 74 75 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity); 76 77 if (new_base == NULL) { 78 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 79 return false; 80 } 81 // Release old mapping. 82 if (_base != NULL) { 83 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 84 } 85 86 _base = new_base; 87 _chunk_capacity = new_capacity; 88 set_empty(); 89 90 return true; 91 } 92 93 size_t G1CMMarkStack::capacity_alignment() { 94 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 95 } 96 97 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 98 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 99 100 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 101 102 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 103 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 104 105 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 106 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 107 _max_chunk_capacity, 108 initial_chunk_capacity); 109 110 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 111 initial_chunk_capacity, _max_chunk_capacity); 112 113 return resize(initial_chunk_capacity); 114 } 115 116 void G1CMMarkStack::expand() { 117 if (_chunk_capacity == _max_chunk_capacity) { 118 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 119 return; 120 } 121 size_t old_capacity = _chunk_capacity; 122 // Double capacity if possible 123 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 124 125 if (resize(new_capacity)) { 126 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 127 old_capacity, new_capacity); 128 } else { 129 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 130 old_capacity, new_capacity); 131 } 132 } 133 134 G1CMMarkStack::~G1CMMarkStack() { 135 if (_base != NULL) { 136 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 137 } 138 } 139 140 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 141 elem->next = *list; 142 *list = elem; 143 } 144 145 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 146 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 147 add_chunk_to_list(&_chunk_list, elem); 148 _chunks_in_chunk_list++; 149 } 150 151 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 152 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 153 add_chunk_to_list(&_free_list, elem); 154 } 155 156 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 157 TaskQueueEntryChunk* result = *list; 158 if (result != NULL) { 159 *list = (*list)->next; 160 } 161 return result; 162 } 163 164 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 165 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 166 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 167 if (result != NULL) { 168 _chunks_in_chunk_list--; 169 } 170 return result; 171 } 172 173 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 174 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 175 return remove_chunk_from_list(&_free_list); 176 } 177 178 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 179 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 180 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 181 // wraparound of _hwm. 182 if (_hwm >= _chunk_capacity) { 183 return NULL; 184 } 185 186 size_t cur_idx = Atomic::add(1, &_hwm) - 1; 187 if (cur_idx >= _chunk_capacity) { 188 return NULL; 189 } 190 191 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 192 result->next = NULL; 193 return result; 194 } 195 196 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 197 // Get a new chunk. 198 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 199 200 if (new_chunk == NULL) { 201 // Did not get a chunk from the free list. Allocate from backing memory. 202 new_chunk = allocate_new_chunk(); 203 204 if (new_chunk == NULL) { 205 return false; 206 } 207 } 208 209 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 210 211 add_chunk_to_chunk_list(new_chunk); 212 213 return true; 214 } 215 216 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 217 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 218 219 if (cur == NULL) { 220 return false; 221 } 222 223 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 224 225 add_chunk_to_free_list(cur); 226 return true; 227 } 228 229 void G1CMMarkStack::set_empty() { 230 _chunks_in_chunk_list = 0; 231 _hwm = 0; 232 _chunk_list = NULL; 233 _free_list = NULL; 234 } 235 236 G1CMRootRegions::G1CMRootRegions() : 237 _cm(NULL), _scan_in_progress(false), 238 _should_abort(false), _claimed_survivor_index(0) { } 239 240 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 241 _survivors = survivors; 242 _cm = cm; 243 } 244 245 void G1CMRootRegions::prepare_for_scan() { 246 assert(!scan_in_progress(), "pre-condition"); 247 248 // Currently, only survivors can be root regions. 249 _claimed_survivor_index = 0; 250 _scan_in_progress = _survivors->regions()->is_nonempty(); 251 _should_abort = false; 252 } 253 254 HeapRegion* G1CMRootRegions::claim_next() { 255 if (_should_abort) { 256 // If someone has set the should_abort flag, we return NULL to 257 // force the caller to bail out of their loop. 258 return NULL; 259 } 260 261 // Currently, only survivors can be root regions. 262 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 263 264 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 265 if (claimed_index < survivor_regions->length()) { 266 return survivor_regions->at(claimed_index); 267 } 268 return NULL; 269 } 270 271 uint G1CMRootRegions::num_root_regions() const { 272 return (uint)_survivors->regions()->length(); 273 } 274 275 void G1CMRootRegions::notify_scan_done() { 276 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 277 _scan_in_progress = false; 278 RootRegionScan_lock->notify_all(); 279 } 280 281 void G1CMRootRegions::cancel_scan() { 282 notify_scan_done(); 283 } 284 285 void G1CMRootRegions::scan_finished() { 286 assert(scan_in_progress(), "pre-condition"); 287 288 // Currently, only survivors can be root regions. 289 if (!_should_abort) { 290 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 291 assert((uint)_claimed_survivor_index >= _survivors->length(), 292 "we should have claimed all survivors, claimed index = %u, length = %u", 293 (uint)_claimed_survivor_index, _survivors->length()); 294 } 295 296 notify_scan_done(); 297 } 298 299 bool G1CMRootRegions::wait_until_scan_finished() { 300 if (!scan_in_progress()) return false; 301 302 { 303 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 304 while (scan_in_progress()) { 305 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 306 } 307 } 308 return true; 309 } 310 311 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 312 return MAX2((n_par_threads + 2) / 4, 1U); 313 } 314 315 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 316 _g1h(g1h), 317 _markBitMap1(), 318 _markBitMap2(), 319 _parallel_marking_threads(0), 320 _max_parallel_marking_threads(0), 321 _sleep_factor(0.0), 322 _marking_task_overhead(1.0), 323 _cleanup_list("Cleanup List"), 324 325 _prevMarkBitMap(&_markBitMap1), 326 _nextMarkBitMap(&_markBitMap2), 327 328 _global_mark_stack(), 329 // _finger set in set_non_marking_state 330 331 _max_worker_id(ParallelGCThreads), 332 // _active_tasks set in set_non_marking_state 333 // _tasks set inside the constructor 334 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 335 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 336 337 _has_overflown(false), 338 _concurrent(false), 339 _has_aborted(false), 340 _restart_for_overflow(false), 341 _concurrent_marking_in_progress(false), 342 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 343 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 344 345 // _verbose_level set below 346 347 _init_times(), 348 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 349 _cleanup_times(), 350 _total_counting_time(0.0), 351 _total_rs_scrub_time(0.0), 352 353 _parallel_workers(NULL), 354 355 _completed_initialization(false) { 356 357 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 358 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 359 360 // Create & start a ConcurrentMark thread. 361 _cmThread = new ConcurrentMarkThread(this); 362 assert(cmThread() != NULL, "CM Thread should have been created"); 363 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 364 if (_cmThread->osthread() == NULL) { 365 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 366 } 367 368 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 369 370 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 371 satb_qs.set_buffer_size(G1SATBBufferSize); 372 373 _root_regions.init(_g1h->survivor(), this); 374 375 if (ConcGCThreads > ParallelGCThreads) { 376 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 377 ConcGCThreads, ParallelGCThreads); 378 return; 379 } 380 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 381 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 382 // if both are set 383 _sleep_factor = 0.0; 384 _marking_task_overhead = 1.0; 385 } else if (G1MarkingOverheadPercent > 0) { 386 // We will calculate the number of parallel marking threads based 387 // on a target overhead with respect to the soft real-time goal 388 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 389 double overall_cm_overhead = 390 (double) MaxGCPauseMillis * marking_overhead / 391 (double) GCPauseIntervalMillis; 392 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 393 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 394 double marking_task_overhead = 395 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 396 double sleep_factor = 397 (1.0 - marking_task_overhead) / marking_task_overhead; 398 399 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 400 _sleep_factor = sleep_factor; 401 _marking_task_overhead = marking_task_overhead; 402 } else { 403 // Calculate the number of parallel marking threads by scaling 404 // the number of parallel GC threads. 405 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 406 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 407 _sleep_factor = 0.0; 408 _marking_task_overhead = 1.0; 409 } 410 411 assert(ConcGCThreads > 0, "Should have been set"); 412 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 413 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 414 _parallel_marking_threads = ConcGCThreads; 415 _max_parallel_marking_threads = _parallel_marking_threads; 416 417 _parallel_workers = new WorkGang("G1 Marker", 418 _max_parallel_marking_threads, false, true); 419 if (_parallel_workers == NULL) { 420 vm_exit_during_initialization("Failed necessary allocation."); 421 } else { 422 _parallel_workers->initialize_workers(); 423 } 424 425 if (FLAG_IS_DEFAULT(MarkStackSize)) { 426 size_t mark_stack_size = 427 MIN2(MarkStackSizeMax, 428 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 429 // Verify that the calculated value for MarkStackSize is in range. 430 // It would be nice to use the private utility routine from Arguments. 431 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 432 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 433 "must be between 1 and " SIZE_FORMAT, 434 mark_stack_size, MarkStackSizeMax); 435 return; 436 } 437 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 438 } else { 439 // Verify MarkStackSize is in range. 440 if (FLAG_IS_CMDLINE(MarkStackSize)) { 441 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 442 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 443 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 444 "must be between 1 and " SIZE_FORMAT, 445 MarkStackSize, MarkStackSizeMax); 446 return; 447 } 448 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 449 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 450 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 451 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 452 MarkStackSize, MarkStackSizeMax); 453 return; 454 } 455 } 456 } 457 } 458 459 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 460 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 461 } 462 463 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 464 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 465 466 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 467 _active_tasks = _max_worker_id; 468 469 for (uint i = 0; i < _max_worker_id; ++i) { 470 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 471 task_queue->initialize(); 472 _task_queues->register_queue(i, task_queue); 473 474 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 475 476 _accum_task_vtime[i] = 0.0; 477 } 478 479 // so that the call below can read a sensible value 480 _heap_start = g1h->reserved_region().start(); 481 set_non_marking_state(); 482 _completed_initialization = true; 483 } 484 485 void G1ConcurrentMark::reset() { 486 // Starting values for these two. This should be called in a STW 487 // phase. 488 MemRegion reserved = _g1h->g1_reserved(); 489 _heap_start = reserved.start(); 490 _heap_end = reserved.end(); 491 492 // Separated the asserts so that we know which one fires. 493 assert(_heap_start != NULL, "heap bounds should look ok"); 494 assert(_heap_end != NULL, "heap bounds should look ok"); 495 assert(_heap_start < _heap_end, "heap bounds should look ok"); 496 497 // Reset all the marking data structures and any necessary flags 498 reset_marking_state(); 499 500 // We do reset all of them, since different phases will use 501 // different number of active threads. So, it's easiest to have all 502 // of them ready. 503 for (uint i = 0; i < _max_worker_id; ++i) { 504 _tasks[i]->reset(_nextMarkBitMap); 505 } 506 507 // we need this to make sure that the flag is on during the evac 508 // pause with initial mark piggy-backed 509 set_concurrent_marking_in_progress(); 510 } 511 512 513 void G1ConcurrentMark::reset_marking_state() { 514 _global_mark_stack.set_empty(); 515 516 // Expand the marking stack, if we have to and if we can. 517 if (has_overflown()) { 518 _global_mark_stack.expand(); 519 } 520 521 clear_has_overflown(); 522 _finger = _heap_start; 523 524 for (uint i = 0; i < _max_worker_id; ++i) { 525 G1CMTaskQueue* queue = _task_queues->queue(i); 526 queue->set_empty(); 527 } 528 } 529 530 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 531 assert(active_tasks <= _max_worker_id, "we should not have more"); 532 533 _active_tasks = active_tasks; 534 // Need to update the three data structures below according to the 535 // number of active threads for this phase. 536 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 537 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 538 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 539 } 540 541 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 542 set_concurrency(active_tasks); 543 544 _concurrent = concurrent; 545 // We propagate this to all tasks, not just the active ones. 546 for (uint i = 0; i < _max_worker_id; ++i) 547 _tasks[i]->set_concurrent(concurrent); 548 549 if (concurrent) { 550 set_concurrent_marking_in_progress(); 551 } else { 552 // We currently assume that the concurrent flag has been set to 553 // false before we start remark. At this point we should also be 554 // in a STW phase. 555 assert(!concurrent_marking_in_progress(), "invariant"); 556 assert(out_of_regions(), 557 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 558 p2i(_finger), p2i(_heap_end)); 559 } 560 } 561 562 void G1ConcurrentMark::set_non_marking_state() { 563 // We set the global marking state to some default values when we're 564 // not doing marking. 565 reset_marking_state(); 566 _active_tasks = 0; 567 clear_concurrent_marking_in_progress(); 568 } 569 570 G1ConcurrentMark::~G1ConcurrentMark() { 571 // The G1ConcurrentMark instance is never freed. 572 ShouldNotReachHere(); 573 } 574 575 class G1ClearBitMapTask : public AbstractGangTask { 576 public: 577 static size_t chunk_size() { return M; } 578 579 private: 580 // Heap region closure used for clearing the given mark bitmap. 581 class G1ClearBitmapHRClosure : public HeapRegionClosure { 582 private: 583 G1CMBitMap* _bitmap; 584 G1ConcurrentMark* _cm; 585 public: 586 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 587 } 588 589 virtual bool doHeapRegion(HeapRegion* r) { 590 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 591 592 HeapWord* cur = r->bottom(); 593 HeapWord* const end = r->end(); 594 595 while (cur < end) { 596 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 597 _bitmap->clear_range(mr); 598 599 cur += chunk_size_in_words; 600 601 // Abort iteration if after yielding the marking has been aborted. 602 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 603 return true; 604 } 605 // Repeat the asserts from before the start of the closure. We will do them 606 // as asserts here to minimize their overhead on the product. However, we 607 // will have them as guarantees at the beginning / end of the bitmap 608 // clearing to get some checking in the product. 609 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 610 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 611 } 612 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 613 614 return false; 615 } 616 }; 617 618 G1ClearBitmapHRClosure _cl; 619 HeapRegionClaimer _hr_claimer; 620 bool _suspendible; // If the task is suspendible, workers must join the STS. 621 622 public: 623 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 624 AbstractGangTask("G1 Clear Bitmap"), 625 _cl(bitmap, suspendible ? cm : NULL), 626 _hr_claimer(n_workers), 627 _suspendible(suspendible) 628 { } 629 630 void work(uint worker_id) { 631 SuspendibleThreadSetJoiner sts_join(_suspendible); 632 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer); 633 } 634 635 bool is_complete() { 636 return _cl.complete(); 637 } 638 }; 639 640 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 641 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 642 643 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 644 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 645 646 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 647 648 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 649 650 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 651 workers->run_task(&cl, num_workers); 652 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 653 } 654 655 void G1ConcurrentMark::cleanup_for_next_mark() { 656 // Make sure that the concurrent mark thread looks to still be in 657 // the current cycle. 658 guarantee(cmThread()->during_cycle(), "invariant"); 659 660 // We are finishing up the current cycle by clearing the next 661 // marking bitmap and getting it ready for the next cycle. During 662 // this time no other cycle can start. So, let's make sure that this 663 // is the case. 664 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 665 666 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 667 668 // Clear the live count data. If the marking has been aborted, the abort() 669 // call already did that. 670 if (!has_aborted()) { 671 clear_live_data(_parallel_workers); 672 DEBUG_ONLY(verify_live_data_clear()); 673 } 674 675 // Repeat the asserts from above. 676 guarantee(cmThread()->during_cycle(), "invariant"); 677 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 678 } 679 680 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 681 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 682 clear_bitmap(_prevMarkBitMap, workers, false); 683 } 684 685 class CheckBitmapClearHRClosure : public HeapRegionClosure { 686 G1CMBitMap* _bitmap; 687 bool _error; 688 public: 689 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 690 } 691 692 virtual bool doHeapRegion(HeapRegion* r) { 693 // This closure can be called concurrently to the mutator, so we must make sure 694 // that the result of the getNextMarkedWordAddress() call is compared to the 695 // value passed to it as limit to detect any found bits. 696 // end never changes in G1. 697 HeapWord* end = r->end(); 698 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 699 } 700 }; 701 702 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 703 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 704 _g1h->heap_region_iterate(&cl); 705 return cl.complete(); 706 } 707 708 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 709 public: 710 bool doHeapRegion(HeapRegion* r) { 711 r->note_start_of_marking(); 712 return false; 713 } 714 }; 715 716 void G1ConcurrentMark::checkpointRootsInitialPre() { 717 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 718 719 _has_aborted = false; 720 721 // Initialize marking structures. This has to be done in a STW phase. 722 reset(); 723 724 // For each region note start of marking. 725 NoteStartOfMarkHRClosure startcl; 726 g1h->heap_region_iterate(&startcl); 727 } 728 729 730 void G1ConcurrentMark::checkpointRootsInitialPost() { 731 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 732 733 // Start Concurrent Marking weak-reference discovery. 734 ReferenceProcessor* rp = g1h->ref_processor_cm(); 735 // enable ("weak") refs discovery 736 rp->enable_discovery(); 737 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 738 739 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 740 // This is the start of the marking cycle, we're expected all 741 // threads to have SATB queues with active set to false. 742 satb_mq_set.set_active_all_threads(true, /* new active value */ 743 false /* expected_active */); 744 745 _root_regions.prepare_for_scan(); 746 747 // update_g1_committed() will be called at the end of an evac pause 748 // when marking is on. So, it's also called at the end of the 749 // initial-mark pause to update the heap end, if the heap expands 750 // during it. No need to call it here. 751 } 752 753 /* 754 * Notice that in the next two methods, we actually leave the STS 755 * during the barrier sync and join it immediately afterwards. If we 756 * do not do this, the following deadlock can occur: one thread could 757 * be in the barrier sync code, waiting for the other thread to also 758 * sync up, whereas another one could be trying to yield, while also 759 * waiting for the other threads to sync up too. 760 * 761 * Note, however, that this code is also used during remark and in 762 * this case we should not attempt to leave / enter the STS, otherwise 763 * we'll either hit an assert (debug / fastdebug) or deadlock 764 * (product). So we should only leave / enter the STS if we are 765 * operating concurrently. 766 * 767 * Because the thread that does the sync barrier has left the STS, it 768 * is possible to be suspended for a Full GC or an evacuation pause 769 * could occur. This is actually safe, since the entering the sync 770 * barrier is one of the last things do_marking_step() does, and it 771 * doesn't manipulate any data structures afterwards. 772 */ 773 774 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 775 bool barrier_aborted; 776 { 777 SuspendibleThreadSetLeaver sts_leave(concurrent()); 778 barrier_aborted = !_first_overflow_barrier_sync.enter(); 779 } 780 781 // at this point everyone should have synced up and not be doing any 782 // more work 783 784 if (barrier_aborted) { 785 // If the barrier aborted we ignore the overflow condition and 786 // just abort the whole marking phase as quickly as possible. 787 return; 788 } 789 790 // If we're executing the concurrent phase of marking, reset the marking 791 // state; otherwise the marking state is reset after reference processing, 792 // during the remark pause. 793 // If we reset here as a result of an overflow during the remark we will 794 // see assertion failures from any subsequent set_concurrency_and_phase() 795 // calls. 796 if (concurrent()) { 797 // let the task associated with with worker 0 do this 798 if (worker_id == 0) { 799 // task 0 is responsible for clearing the global data structures 800 // We should be here because of an overflow. During STW we should 801 // not clear the overflow flag since we rely on it being true when 802 // we exit this method to abort the pause and restart concurrent 803 // marking. 804 reset_marking_state(); 805 806 log_info(gc, marking)("Concurrent Mark reset for overflow"); 807 } 808 } 809 810 // after this, each task should reset its own data structures then 811 // then go into the second barrier 812 } 813 814 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 815 SuspendibleThreadSetLeaver sts_leave(concurrent()); 816 _second_overflow_barrier_sync.enter(); 817 818 // at this point everything should be re-initialized and ready to go 819 } 820 821 class G1CMConcurrentMarkingTask: public AbstractGangTask { 822 private: 823 G1ConcurrentMark* _cm; 824 ConcurrentMarkThread* _cmt; 825 826 public: 827 void work(uint worker_id) { 828 assert(Thread::current()->is_ConcurrentGC_thread(), 829 "this should only be done by a conc GC thread"); 830 ResourceMark rm; 831 832 double start_vtime = os::elapsedVTime(); 833 834 { 835 SuspendibleThreadSetJoiner sts_join; 836 837 assert(worker_id < _cm->active_tasks(), "invariant"); 838 G1CMTask* the_task = _cm->task(worker_id); 839 the_task->record_start_time(); 840 if (!_cm->has_aborted()) { 841 do { 842 double start_vtime_sec = os::elapsedVTime(); 843 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 844 845 the_task->do_marking_step(mark_step_duration_ms, 846 true /* do_termination */, 847 false /* is_serial*/); 848 849 double end_vtime_sec = os::elapsedVTime(); 850 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 851 _cm->do_yield_check(); 852 853 jlong sleep_time_ms; 854 if (!_cm->has_aborted() && the_task->has_aborted()) { 855 sleep_time_ms = 856 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 857 { 858 SuspendibleThreadSetLeaver sts_leave; 859 os::sleep(Thread::current(), sleep_time_ms, false); 860 } 861 } 862 } while (!_cm->has_aborted() && the_task->has_aborted()); 863 } 864 the_task->record_end_time(); 865 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 866 } 867 868 double end_vtime = os::elapsedVTime(); 869 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 870 } 871 872 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 873 ConcurrentMarkThread* cmt) : 874 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 875 876 ~G1CMConcurrentMarkingTask() { } 877 }; 878 879 // Calculates the number of active workers for a concurrent 880 // phase. 881 uint G1ConcurrentMark::calc_parallel_marking_threads() { 882 uint n_conc_workers = 0; 883 if (!UseDynamicNumberOfGCThreads || 884 (!FLAG_IS_DEFAULT(ConcGCThreads) && 885 !ForceDynamicNumberOfGCThreads)) { 886 n_conc_workers = max_parallel_marking_threads(); 887 } else { 888 n_conc_workers = 889 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 890 1, /* Minimum workers */ 891 parallel_marking_threads(), 892 Threads::number_of_non_daemon_threads()); 893 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 894 // that scaling has already gone into "_max_parallel_marking_threads". 895 } 896 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 897 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 898 max_parallel_marking_threads(), n_conc_workers); 899 return n_conc_workers; 900 } 901 902 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 903 // Currently, only survivors can be root regions. 904 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 905 G1RootRegionScanClosure cl(_g1h, this); 906 907 const uintx interval = PrefetchScanIntervalInBytes; 908 HeapWord* curr = hr->bottom(); 909 const HeapWord* end = hr->top(); 910 while (curr < end) { 911 Prefetch::read(curr, interval); 912 oop obj = oop(curr); 913 int size = obj->oop_iterate_size(&cl); 914 assert(size == obj->size(), "sanity"); 915 curr += size; 916 } 917 } 918 919 class G1CMRootRegionScanTask : public AbstractGangTask { 920 private: 921 G1ConcurrentMark* _cm; 922 923 public: 924 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 925 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 926 927 void work(uint worker_id) { 928 assert(Thread::current()->is_ConcurrentGC_thread(), 929 "this should only be done by a conc GC thread"); 930 931 G1CMRootRegions* root_regions = _cm->root_regions(); 932 HeapRegion* hr = root_regions->claim_next(); 933 while (hr != NULL) { 934 _cm->scanRootRegion(hr); 935 hr = root_regions->claim_next(); 936 } 937 } 938 }; 939 940 void G1ConcurrentMark::scan_root_regions() { 941 // scan_in_progress() will have been set to true only if there was 942 // at least one root region to scan. So, if it's false, we 943 // should not attempt to do any further work. 944 if (root_regions()->scan_in_progress()) { 945 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 946 947 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 948 // We distribute work on a per-region basis, so starting 949 // more threads than that is useless. 950 root_regions()->num_root_regions()); 951 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 952 "Maximum number of marking threads exceeded"); 953 954 G1CMRootRegionScanTask task(this); 955 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 956 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 957 _parallel_workers->run_task(&task, _parallel_marking_threads); 958 959 // It's possible that has_aborted() is true here without actually 960 // aborting the survivor scan earlier. This is OK as it's 961 // mainly used for sanity checking. 962 root_regions()->scan_finished(); 963 } 964 } 965 966 void G1ConcurrentMark::concurrent_cycle_start() { 967 _gc_timer_cm->register_gc_start(); 968 969 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 970 971 _g1h->trace_heap_before_gc(_gc_tracer_cm); 972 } 973 974 void G1ConcurrentMark::concurrent_cycle_end() { 975 _g1h->trace_heap_after_gc(_gc_tracer_cm); 976 977 if (has_aborted()) { 978 _gc_tracer_cm->report_concurrent_mode_failure(); 979 } 980 981 _gc_timer_cm->register_gc_end(); 982 983 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 984 } 985 986 void G1ConcurrentMark::mark_from_roots() { 987 // we might be tempted to assert that: 988 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 989 // "inconsistent argument?"); 990 // However that wouldn't be right, because it's possible that 991 // a safepoint is indeed in progress as a younger generation 992 // stop-the-world GC happens even as we mark in this generation. 993 994 _restart_for_overflow = false; 995 996 // _g1h has _n_par_threads 997 _parallel_marking_threads = calc_parallel_marking_threads(); 998 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 999 "Maximum number of marking threads exceeded"); 1000 1001 uint active_workers = MAX2(1U, parallel_marking_threads()); 1002 assert(active_workers > 0, "Should have been set"); 1003 1004 // Setting active workers is not guaranteed since fewer 1005 // worker threads may currently exist and more may not be 1006 // available. 1007 active_workers = _parallel_workers->update_active_workers(active_workers); 1008 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1009 1010 // Parallel task terminator is set in "set_concurrency_and_phase()" 1011 set_concurrency_and_phase(active_workers, true /* concurrent */); 1012 1013 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1014 _parallel_workers->run_task(&markingTask); 1015 print_stats(); 1016 } 1017 1018 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1019 // world is stopped at this checkpoint 1020 assert(SafepointSynchronize::is_at_safepoint(), 1021 "world should be stopped"); 1022 1023 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1024 1025 // If a full collection has happened, we shouldn't do this. 1026 if (has_aborted()) { 1027 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1028 return; 1029 } 1030 1031 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1032 1033 if (VerifyDuringGC) { 1034 HandleMark hm; // handle scope 1035 g1h->prepare_for_verify(); 1036 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1037 } 1038 g1h->verifier()->check_bitmaps("Remark Start"); 1039 1040 G1Policy* g1p = g1h->g1_policy(); 1041 g1p->record_concurrent_mark_remark_start(); 1042 1043 double start = os::elapsedTime(); 1044 1045 checkpointRootsFinalWork(); 1046 1047 double mark_work_end = os::elapsedTime(); 1048 1049 weakRefsWork(clear_all_soft_refs); 1050 1051 if (has_overflown()) { 1052 // We overflowed. Restart concurrent marking. 1053 _restart_for_overflow = true; 1054 1055 // Verify the heap w.r.t. the previous marking bitmap. 1056 if (VerifyDuringGC) { 1057 HandleMark hm; // handle scope 1058 g1h->prepare_for_verify(); 1059 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1060 } 1061 1062 // Clear the marking state because we will be restarting 1063 // marking due to overflowing the global mark stack. 1064 reset_marking_state(); 1065 } else { 1066 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1067 // We're done with marking. 1068 // This is the end of the marking cycle, we're expected all 1069 // threads to have SATB queues with active set to true. 1070 satb_mq_set.set_active_all_threads(false, /* new active value */ 1071 true /* expected_active */); 1072 1073 if (VerifyDuringGC) { 1074 HandleMark hm; // handle scope 1075 g1h->prepare_for_verify(); 1076 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1077 } 1078 g1h->verifier()->check_bitmaps("Remark End"); 1079 assert(!restart_for_overflow(), "sanity"); 1080 // Completely reset the marking state since marking completed 1081 set_non_marking_state(); 1082 } 1083 1084 // Statistics 1085 double now = os::elapsedTime(); 1086 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1087 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1088 _remark_times.add((now - start) * 1000.0); 1089 1090 g1p->record_concurrent_mark_remark_end(); 1091 1092 G1CMIsAliveClosure is_alive(g1h); 1093 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1094 } 1095 1096 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1097 G1CollectedHeap* _g1; 1098 size_t _freed_bytes; 1099 FreeRegionList* _local_cleanup_list; 1100 uint _old_regions_removed; 1101 uint _humongous_regions_removed; 1102 HRRSCleanupTask* _hrrs_cleanup_task; 1103 1104 public: 1105 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1106 FreeRegionList* local_cleanup_list, 1107 HRRSCleanupTask* hrrs_cleanup_task) : 1108 _g1(g1), 1109 _freed_bytes(0), 1110 _local_cleanup_list(local_cleanup_list), 1111 _old_regions_removed(0), 1112 _humongous_regions_removed(0), 1113 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1114 1115 size_t freed_bytes() { return _freed_bytes; } 1116 const uint old_regions_removed() { return _old_regions_removed; } 1117 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1118 1119 bool doHeapRegion(HeapRegion *hr) { 1120 if (hr->is_archive()) { 1121 return false; 1122 } 1123 _g1->reset_gc_time_stamps(hr); 1124 hr->note_end_of_marking(); 1125 1126 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1127 _freed_bytes += hr->used(); 1128 hr->set_containing_set(NULL); 1129 if (hr->is_humongous()) { 1130 _humongous_regions_removed++; 1131 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1132 } else { 1133 _old_regions_removed++; 1134 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1135 } 1136 } else { 1137 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1138 } 1139 1140 return false; 1141 } 1142 }; 1143 1144 class G1ParNoteEndTask: public AbstractGangTask { 1145 friend class G1NoteEndOfConcMarkClosure; 1146 1147 protected: 1148 G1CollectedHeap* _g1h; 1149 FreeRegionList* _cleanup_list; 1150 HeapRegionClaimer _hrclaimer; 1151 1152 public: 1153 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1154 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1155 } 1156 1157 void work(uint worker_id) { 1158 FreeRegionList local_cleanup_list("Local Cleanup List"); 1159 HRRSCleanupTask hrrs_cleanup_task; 1160 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1161 &hrrs_cleanup_task); 1162 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1163 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1164 1165 // Now update the lists 1166 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1167 { 1168 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1169 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1170 1171 // If we iterate over the global cleanup list at the end of 1172 // cleanup to do this printing we will not guarantee to only 1173 // generate output for the newly-reclaimed regions (the list 1174 // might not be empty at the beginning of cleanup; we might 1175 // still be working on its previous contents). So we do the 1176 // printing here, before we append the new regions to the global 1177 // cleanup list. 1178 1179 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1180 if (hr_printer->is_active()) { 1181 FreeRegionListIterator iter(&local_cleanup_list); 1182 while (iter.more_available()) { 1183 HeapRegion* hr = iter.get_next(); 1184 hr_printer->cleanup(hr); 1185 } 1186 } 1187 1188 _cleanup_list->add_ordered(&local_cleanup_list); 1189 assert(local_cleanup_list.is_empty(), "post-condition"); 1190 1191 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1192 } 1193 } 1194 }; 1195 1196 void G1ConcurrentMark::cleanup() { 1197 // world is stopped at this checkpoint 1198 assert(SafepointSynchronize::is_at_safepoint(), 1199 "world should be stopped"); 1200 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1201 1202 // If a full collection has happened, we shouldn't do this. 1203 if (has_aborted()) { 1204 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1205 return; 1206 } 1207 1208 g1h->verifier()->verify_region_sets_optional(); 1209 1210 if (VerifyDuringGC) { 1211 HandleMark hm; // handle scope 1212 g1h->prepare_for_verify(); 1213 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1214 } 1215 g1h->verifier()->check_bitmaps("Cleanup Start"); 1216 1217 G1Policy* g1p = g1h->g1_policy(); 1218 g1p->record_concurrent_mark_cleanup_start(); 1219 1220 double start = os::elapsedTime(); 1221 1222 HeapRegionRemSet::reset_for_cleanup_tasks(); 1223 1224 { 1225 GCTraceTime(Debug, gc)("Finalize Live Data"); 1226 finalize_live_data(); 1227 } 1228 1229 if (VerifyDuringGC) { 1230 GCTraceTime(Debug, gc)("Verify Live Data"); 1231 verify_live_data(); 1232 } 1233 1234 g1h->collector_state()->set_mark_in_progress(false); 1235 1236 double count_end = os::elapsedTime(); 1237 double this_final_counting_time = (count_end - start); 1238 _total_counting_time += this_final_counting_time; 1239 1240 if (log_is_enabled(Trace, gc, liveness)) { 1241 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1242 _g1h->heap_region_iterate(&cl); 1243 } 1244 1245 // Install newly created mark bitMap as "prev". 1246 swapMarkBitMaps(); 1247 1248 g1h->reset_gc_time_stamp(); 1249 1250 uint n_workers = _g1h->workers()->active_workers(); 1251 1252 // Note end of marking in all heap regions. 1253 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1254 g1h->workers()->run_task(&g1_par_note_end_task); 1255 g1h->check_gc_time_stamps(); 1256 1257 if (!cleanup_list_is_empty()) { 1258 // The cleanup list is not empty, so we'll have to process it 1259 // concurrently. Notify anyone else that might be wanting free 1260 // regions that there will be more free regions coming soon. 1261 g1h->set_free_regions_coming(); 1262 } 1263 1264 // call below, since it affects the metric by which we sort the heap 1265 // regions. 1266 if (G1ScrubRemSets) { 1267 double rs_scrub_start = os::elapsedTime(); 1268 g1h->scrub_rem_set(); 1269 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1270 } 1271 1272 // this will also free any regions totally full of garbage objects, 1273 // and sort the regions. 1274 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1275 1276 // Statistics. 1277 double end = os::elapsedTime(); 1278 _cleanup_times.add((end - start) * 1000.0); 1279 1280 // Clean up will have freed any regions completely full of garbage. 1281 // Update the soft reference policy with the new heap occupancy. 1282 Universe::update_heap_info_at_gc(); 1283 1284 if (VerifyDuringGC) { 1285 HandleMark hm; // handle scope 1286 g1h->prepare_for_verify(); 1287 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1288 } 1289 1290 g1h->verifier()->check_bitmaps("Cleanup End"); 1291 1292 g1h->verifier()->verify_region_sets_optional(); 1293 1294 // We need to make this be a "collection" so any collection pause that 1295 // races with it goes around and waits for completeCleanup to finish. 1296 g1h->increment_total_collections(); 1297 1298 // Clean out dead classes and update Metaspace sizes. 1299 if (ClassUnloadingWithConcurrentMark) { 1300 ClassLoaderDataGraph::purge(); 1301 } 1302 MetaspaceGC::compute_new_size(); 1303 1304 // We reclaimed old regions so we should calculate the sizes to make 1305 // sure we update the old gen/space data. 1306 g1h->g1mm()->update_sizes(); 1307 g1h->allocation_context_stats().update_after_mark(); 1308 } 1309 1310 void G1ConcurrentMark::complete_cleanup() { 1311 if (has_aborted()) return; 1312 1313 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1314 1315 _cleanup_list.verify_optional(); 1316 FreeRegionList tmp_free_list("Tmp Free List"); 1317 1318 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1319 "cleanup list has %u entries", 1320 _cleanup_list.length()); 1321 1322 // No one else should be accessing the _cleanup_list at this point, 1323 // so it is not necessary to take any locks 1324 while (!_cleanup_list.is_empty()) { 1325 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1326 assert(hr != NULL, "Got NULL from a non-empty list"); 1327 hr->par_clear(); 1328 tmp_free_list.add_ordered(hr); 1329 1330 // Instead of adding one region at a time to the secondary_free_list, 1331 // we accumulate them in the local list and move them a few at a 1332 // time. This also cuts down on the number of notify_all() calls 1333 // we do during this process. We'll also append the local list when 1334 // _cleanup_list is empty (which means we just removed the last 1335 // region from the _cleanup_list). 1336 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1337 _cleanup_list.is_empty()) { 1338 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1339 "appending %u entries to the secondary_free_list, " 1340 "cleanup list still has %u entries", 1341 tmp_free_list.length(), 1342 _cleanup_list.length()); 1343 1344 { 1345 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1346 g1h->secondary_free_list_add(&tmp_free_list); 1347 SecondaryFreeList_lock->notify_all(); 1348 } 1349 #ifndef PRODUCT 1350 if (G1StressConcRegionFreeing) { 1351 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1352 os::sleep(Thread::current(), (jlong) 1, false); 1353 } 1354 } 1355 #endif 1356 } 1357 } 1358 assert(tmp_free_list.is_empty(), "post-condition"); 1359 } 1360 1361 // Supporting Object and Oop closures for reference discovery 1362 // and processing in during marking 1363 1364 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1365 HeapWord* addr = (HeapWord*)obj; 1366 return addr != NULL && 1367 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1368 } 1369 1370 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1371 // Uses the G1CMTask associated with a worker thread (for serial reference 1372 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1373 // trace referent objects. 1374 // 1375 // Using the G1CMTask and embedded local queues avoids having the worker 1376 // threads operating on the global mark stack. This reduces the risk 1377 // of overflowing the stack - which we would rather avoid at this late 1378 // state. Also using the tasks' local queues removes the potential 1379 // of the workers interfering with each other that could occur if 1380 // operating on the global stack. 1381 1382 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1383 G1ConcurrentMark* _cm; 1384 G1CMTask* _task; 1385 int _ref_counter_limit; 1386 int _ref_counter; 1387 bool _is_serial; 1388 public: 1389 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1390 _cm(cm), _task(task), _is_serial(is_serial), 1391 _ref_counter_limit(G1RefProcDrainInterval) { 1392 assert(_ref_counter_limit > 0, "sanity"); 1393 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1394 _ref_counter = _ref_counter_limit; 1395 } 1396 1397 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1398 virtual void do_oop( oop* p) { do_oop_work(p); } 1399 1400 template <class T> void do_oop_work(T* p) { 1401 if (!_cm->has_overflown()) { 1402 oop obj = oopDesc::load_decode_heap_oop(p); 1403 _task->deal_with_reference(obj); 1404 _ref_counter--; 1405 1406 if (_ref_counter == 0) { 1407 // We have dealt with _ref_counter_limit references, pushing them 1408 // and objects reachable from them on to the local stack (and 1409 // possibly the global stack). Call G1CMTask::do_marking_step() to 1410 // process these entries. 1411 // 1412 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1413 // there's nothing more to do (i.e. we're done with the entries that 1414 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1415 // above) or we overflow. 1416 // 1417 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1418 // flag while there may still be some work to do. (See the comment at 1419 // the beginning of G1CMTask::do_marking_step() for those conditions - 1420 // one of which is reaching the specified time target.) It is only 1421 // when G1CMTask::do_marking_step() returns without setting the 1422 // has_aborted() flag that the marking step has completed. 1423 do { 1424 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1425 _task->do_marking_step(mark_step_duration_ms, 1426 false /* do_termination */, 1427 _is_serial); 1428 } while (_task->has_aborted() && !_cm->has_overflown()); 1429 _ref_counter = _ref_counter_limit; 1430 } 1431 } 1432 } 1433 }; 1434 1435 // 'Drain' oop closure used by both serial and parallel reference processing. 1436 // Uses the G1CMTask associated with a given worker thread (for serial 1437 // reference processing the G1CMtask for worker 0 is used). Calls the 1438 // do_marking_step routine, with an unbelievably large timeout value, 1439 // to drain the marking data structures of the remaining entries 1440 // added by the 'keep alive' oop closure above. 1441 1442 class G1CMDrainMarkingStackClosure: public VoidClosure { 1443 G1ConcurrentMark* _cm; 1444 G1CMTask* _task; 1445 bool _is_serial; 1446 public: 1447 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1448 _cm(cm), _task(task), _is_serial(is_serial) { 1449 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1450 } 1451 1452 void do_void() { 1453 do { 1454 // We call G1CMTask::do_marking_step() to completely drain the local 1455 // and global marking stacks of entries pushed by the 'keep alive' 1456 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1457 // 1458 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1459 // if there's nothing more to do (i.e. we've completely drained the 1460 // entries that were pushed as a a result of applying the 'keep alive' 1461 // closure to the entries on the discovered ref lists) or we overflow 1462 // the global marking stack. 1463 // 1464 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1465 // flag while there may still be some work to do. (See the comment at 1466 // the beginning of G1CMTask::do_marking_step() for those conditions - 1467 // one of which is reaching the specified time target.) It is only 1468 // when G1CMTask::do_marking_step() returns without setting the 1469 // has_aborted() flag that the marking step has completed. 1470 1471 _task->do_marking_step(1000000000.0 /* something very large */, 1472 true /* do_termination */, 1473 _is_serial); 1474 } while (_task->has_aborted() && !_cm->has_overflown()); 1475 } 1476 }; 1477 1478 // Implementation of AbstractRefProcTaskExecutor for parallel 1479 // reference processing at the end of G1 concurrent marking 1480 1481 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1482 private: 1483 G1CollectedHeap* _g1h; 1484 G1ConcurrentMark* _cm; 1485 WorkGang* _workers; 1486 uint _active_workers; 1487 1488 public: 1489 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1490 G1ConcurrentMark* cm, 1491 WorkGang* workers, 1492 uint n_workers) : 1493 _g1h(g1h), _cm(cm), 1494 _workers(workers), _active_workers(n_workers) { } 1495 1496 // Executes the given task using concurrent marking worker threads. 1497 virtual void execute(ProcessTask& task); 1498 virtual void execute(EnqueueTask& task); 1499 }; 1500 1501 class G1CMRefProcTaskProxy: public AbstractGangTask { 1502 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1503 ProcessTask& _proc_task; 1504 G1CollectedHeap* _g1h; 1505 G1ConcurrentMark* _cm; 1506 1507 public: 1508 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1509 G1CollectedHeap* g1h, 1510 G1ConcurrentMark* cm) : 1511 AbstractGangTask("Process reference objects in parallel"), 1512 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1513 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1514 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1515 } 1516 1517 virtual void work(uint worker_id) { 1518 ResourceMark rm; 1519 HandleMark hm; 1520 G1CMTask* task = _cm->task(worker_id); 1521 G1CMIsAliveClosure g1_is_alive(_g1h); 1522 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1523 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1524 1525 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1526 } 1527 }; 1528 1529 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1530 assert(_workers != NULL, "Need parallel worker threads."); 1531 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1532 1533 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1534 1535 // We need to reset the concurrency level before each 1536 // proxy task execution, so that the termination protocol 1537 // and overflow handling in G1CMTask::do_marking_step() knows 1538 // how many workers to wait for. 1539 _cm->set_concurrency(_active_workers); 1540 _workers->run_task(&proc_task_proxy); 1541 } 1542 1543 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1544 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1545 EnqueueTask& _enq_task; 1546 1547 public: 1548 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1549 AbstractGangTask("Enqueue reference objects in parallel"), 1550 _enq_task(enq_task) { } 1551 1552 virtual void work(uint worker_id) { 1553 _enq_task.work(worker_id); 1554 } 1555 }; 1556 1557 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1558 assert(_workers != NULL, "Need parallel worker threads."); 1559 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1560 1561 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1562 1563 // Not strictly necessary but... 1564 // 1565 // We need to reset the concurrency level before each 1566 // proxy task execution, so that the termination protocol 1567 // and overflow handling in G1CMTask::do_marking_step() knows 1568 // how many workers to wait for. 1569 _cm->set_concurrency(_active_workers); 1570 _workers->run_task(&enq_task_proxy); 1571 } 1572 1573 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1574 if (has_overflown()) { 1575 // Skip processing the discovered references if we have 1576 // overflown the global marking stack. Reference objects 1577 // only get discovered once so it is OK to not 1578 // de-populate the discovered reference lists. We could have, 1579 // but the only benefit would be that, when marking restarts, 1580 // less reference objects are discovered. 1581 return; 1582 } 1583 1584 ResourceMark rm; 1585 HandleMark hm; 1586 1587 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1588 1589 // Is alive closure. 1590 G1CMIsAliveClosure g1_is_alive(g1h); 1591 1592 // Inner scope to exclude the cleaning of the string and symbol 1593 // tables from the displayed time. 1594 { 1595 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1596 1597 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1598 1599 // See the comment in G1CollectedHeap::ref_processing_init() 1600 // about how reference processing currently works in G1. 1601 1602 // Set the soft reference policy 1603 rp->setup_policy(clear_all_soft_refs); 1604 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1605 1606 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1607 // in serial reference processing. Note these closures are also 1608 // used for serially processing (by the the current thread) the 1609 // JNI references during parallel reference processing. 1610 // 1611 // These closures do not need to synchronize with the worker 1612 // threads involved in parallel reference processing as these 1613 // instances are executed serially by the current thread (e.g. 1614 // reference processing is not multi-threaded and is thus 1615 // performed by the current thread instead of a gang worker). 1616 // 1617 // The gang tasks involved in parallel reference processing create 1618 // their own instances of these closures, which do their own 1619 // synchronization among themselves. 1620 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1621 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1622 1623 // We need at least one active thread. If reference processing 1624 // is not multi-threaded we use the current (VMThread) thread, 1625 // otherwise we use the work gang from the G1CollectedHeap and 1626 // we utilize all the worker threads we can. 1627 bool processing_is_mt = rp->processing_is_mt(); 1628 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1629 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1630 1631 // Parallel processing task executor. 1632 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1633 g1h->workers(), active_workers); 1634 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1635 1636 // Set the concurrency level. The phase was already set prior to 1637 // executing the remark task. 1638 set_concurrency(active_workers); 1639 1640 // Set the degree of MT processing here. If the discovery was done MT, 1641 // the number of threads involved during discovery could differ from 1642 // the number of active workers. This is OK as long as the discovered 1643 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1644 rp->set_active_mt_degree(active_workers); 1645 1646 // Process the weak references. 1647 const ReferenceProcessorStats& stats = 1648 rp->process_discovered_references(&g1_is_alive, 1649 &g1_keep_alive, 1650 &g1_drain_mark_stack, 1651 executor, 1652 _gc_timer_cm); 1653 _gc_tracer_cm->report_gc_reference_stats(stats); 1654 1655 // The do_oop work routines of the keep_alive and drain_marking_stack 1656 // oop closures will set the has_overflown flag if we overflow the 1657 // global marking stack. 1658 1659 assert(has_overflown() || _global_mark_stack.is_empty(), 1660 "Mark stack should be empty (unless it has overflown)"); 1661 1662 assert(rp->num_q() == active_workers, "why not"); 1663 1664 rp->enqueue_discovered_references(executor); 1665 1666 rp->verify_no_references_recorded(); 1667 assert(!rp->discovery_enabled(), "Post condition"); 1668 } 1669 1670 if (has_overflown()) { 1671 // We can not trust g1_is_alive if the marking stack overflowed 1672 return; 1673 } 1674 1675 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1676 1677 // Unload Klasses, String, Symbols, Code Cache, etc. 1678 if (ClassUnloadingWithConcurrentMark) { 1679 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1680 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1681 g1h->complete_cleaning(&g1_is_alive, purged_classes); 1682 } else { 1683 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1684 // No need to clean string table and symbol table as they are treated as strong roots when 1685 // class unloading is disabled. 1686 g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1687 1688 } 1689 } 1690 1691 void G1ConcurrentMark::swapMarkBitMaps() { 1692 G1CMBitMap* temp = _prevMarkBitMap; 1693 _prevMarkBitMap = _nextMarkBitMap; 1694 _nextMarkBitMap = temp; 1695 } 1696 1697 // Closure for marking entries in SATB buffers. 1698 class G1CMSATBBufferClosure : public SATBBufferClosure { 1699 private: 1700 G1CMTask* _task; 1701 G1CollectedHeap* _g1h; 1702 1703 // This is very similar to G1CMTask::deal_with_reference, but with 1704 // more relaxed requirements for the argument, so this must be more 1705 // circumspect about treating the argument as an object. 1706 void do_entry(void* entry) const { 1707 _task->increment_refs_reached(); 1708 oop const obj = static_cast<oop>(entry); 1709 _task->make_reference_grey(obj); 1710 } 1711 1712 public: 1713 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1714 : _task(task), _g1h(g1h) { } 1715 1716 virtual void do_buffer(void** buffer, size_t size) { 1717 for (size_t i = 0; i < size; ++i) { 1718 do_entry(buffer[i]); 1719 } 1720 } 1721 }; 1722 1723 class G1RemarkThreadsClosure : public ThreadClosure { 1724 G1CMSATBBufferClosure _cm_satb_cl; 1725 G1CMOopClosure _cm_cl; 1726 MarkingCodeBlobClosure _code_cl; 1727 int _thread_parity; 1728 1729 public: 1730 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1731 _cm_satb_cl(task, g1h), 1732 _cm_cl(g1h, g1h->concurrent_mark(), task), 1733 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1734 _thread_parity(Threads::thread_claim_parity()) {} 1735 1736 void do_thread(Thread* thread) { 1737 if (thread->is_Java_thread()) { 1738 if (thread->claim_oops_do(true, _thread_parity)) { 1739 JavaThread* jt = (JavaThread*)thread; 1740 1741 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1742 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1743 // * Alive if on the stack of an executing method 1744 // * Weakly reachable otherwise 1745 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1746 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1747 jt->nmethods_do(&_code_cl); 1748 1749 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1750 } 1751 } else if (thread->is_VM_thread()) { 1752 if (thread->claim_oops_do(true, _thread_parity)) { 1753 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1754 } 1755 } 1756 } 1757 }; 1758 1759 class G1CMRemarkTask: public AbstractGangTask { 1760 private: 1761 G1ConcurrentMark* _cm; 1762 public: 1763 void work(uint worker_id) { 1764 // Since all available tasks are actually started, we should 1765 // only proceed if we're supposed to be active. 1766 if (worker_id < _cm->active_tasks()) { 1767 G1CMTask* task = _cm->task(worker_id); 1768 task->record_start_time(); 1769 { 1770 ResourceMark rm; 1771 HandleMark hm; 1772 1773 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1774 Threads::threads_do(&threads_f); 1775 } 1776 1777 do { 1778 task->do_marking_step(1000000000.0 /* something very large */, 1779 true /* do_termination */, 1780 false /* is_serial */); 1781 } while (task->has_aborted() && !_cm->has_overflown()); 1782 // If we overflow, then we do not want to restart. We instead 1783 // want to abort remark and do concurrent marking again. 1784 task->record_end_time(); 1785 } 1786 } 1787 1788 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1789 AbstractGangTask("Par Remark"), _cm(cm) { 1790 _cm->terminator()->reset_for_reuse(active_workers); 1791 } 1792 }; 1793 1794 void G1ConcurrentMark::checkpointRootsFinalWork() { 1795 ResourceMark rm; 1796 HandleMark hm; 1797 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1798 1799 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1800 1801 g1h->ensure_parsability(false); 1802 1803 // this is remark, so we'll use up all active threads 1804 uint active_workers = g1h->workers()->active_workers(); 1805 set_concurrency_and_phase(active_workers, false /* concurrent */); 1806 // Leave _parallel_marking_threads at it's 1807 // value originally calculated in the G1ConcurrentMark 1808 // constructor and pass values of the active workers 1809 // through the gang in the task. 1810 1811 { 1812 StrongRootsScope srs(active_workers); 1813 1814 G1CMRemarkTask remarkTask(this, active_workers); 1815 // We will start all available threads, even if we decide that the 1816 // active_workers will be fewer. The extra ones will just bail out 1817 // immediately. 1818 g1h->workers()->run_task(&remarkTask); 1819 } 1820 1821 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1822 guarantee(has_overflown() || 1823 satb_mq_set.completed_buffers_num() == 0, 1824 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1825 BOOL_TO_STR(has_overflown()), 1826 satb_mq_set.completed_buffers_num()); 1827 1828 print_stats(); 1829 } 1830 1831 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1832 _prevMarkBitMap->clear_range(mr); 1833 } 1834 1835 HeapRegion* 1836 G1ConcurrentMark::claim_region(uint worker_id) { 1837 // "checkpoint" the finger 1838 HeapWord* finger = _finger; 1839 1840 // _heap_end will not change underneath our feet; it only changes at 1841 // yield points. 1842 while (finger < _heap_end) { 1843 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1844 1845 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1846 // Make sure that the reads below do not float before loading curr_region. 1847 OrderAccess::loadload(); 1848 // Above heap_region_containing may return NULL as we always scan claim 1849 // until the end of the heap. In this case, just jump to the next region. 1850 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1851 1852 // Is the gap between reading the finger and doing the CAS too long? 1853 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1854 if (res == finger && curr_region != NULL) { 1855 // we succeeded 1856 HeapWord* bottom = curr_region->bottom(); 1857 HeapWord* limit = curr_region->next_top_at_mark_start(); 1858 1859 // notice that _finger == end cannot be guaranteed here since, 1860 // someone else might have moved the finger even further 1861 assert(_finger >= end, "the finger should have moved forward"); 1862 1863 if (limit > bottom) { 1864 return curr_region; 1865 } else { 1866 assert(limit == bottom, 1867 "the region limit should be at bottom"); 1868 // we return NULL and the caller should try calling 1869 // claim_region() again. 1870 return NULL; 1871 } 1872 } else { 1873 assert(_finger > finger, "the finger should have moved forward"); 1874 // read it again 1875 finger = _finger; 1876 } 1877 } 1878 1879 return NULL; 1880 } 1881 1882 #ifndef PRODUCT 1883 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1884 private: 1885 G1CollectedHeap* _g1h; 1886 const char* _phase; 1887 int _info; 1888 1889 public: 1890 VerifyNoCSetOops(const char* phase, int info = -1) : 1891 _g1h(G1CollectedHeap::heap()), 1892 _phase(phase), 1893 _info(info) 1894 { } 1895 1896 void operator()(G1TaskQueueEntry task_entry) const { 1897 if (task_entry.is_array_slice()) { 1898 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1899 return; 1900 } 1901 guarantee(task_entry.obj()->is_oop(), 1902 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1903 p2i(task_entry.obj()), _phase, _info); 1904 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1905 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1906 p2i(task_entry.obj()), _phase, _info); 1907 } 1908 }; 1909 1910 void G1ConcurrentMark::verify_no_cset_oops() { 1911 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1912 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 1913 return; 1914 } 1915 1916 // Verify entries on the global mark stack 1917 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1918 1919 // Verify entries on the task queues 1920 for (uint i = 0; i < _max_worker_id; ++i) { 1921 G1CMTaskQueue* queue = _task_queues->queue(i); 1922 queue->iterate(VerifyNoCSetOops("Queue", i)); 1923 } 1924 1925 // Verify the global finger 1926 HeapWord* global_finger = finger(); 1927 if (global_finger != NULL && global_finger < _heap_end) { 1928 // Since we always iterate over all regions, we might get a NULL HeapRegion 1929 // here. 1930 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1931 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1932 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1933 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1934 } 1935 1936 // Verify the task fingers 1937 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 1938 for (uint i = 0; i < parallel_marking_threads(); ++i) { 1939 G1CMTask* task = _tasks[i]; 1940 HeapWord* task_finger = task->finger(); 1941 if (task_finger != NULL && task_finger < _heap_end) { 1942 // See above note on the global finger verification. 1943 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1944 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1945 !task_hr->in_collection_set(), 1946 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1947 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1948 } 1949 } 1950 } 1951 #endif // PRODUCT 1952 void G1ConcurrentMark::create_live_data() { 1953 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 1954 } 1955 1956 void G1ConcurrentMark::finalize_live_data() { 1957 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 1958 } 1959 1960 void G1ConcurrentMark::verify_live_data() { 1961 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 1962 } 1963 1964 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 1965 _g1h->g1_rem_set()->clear_card_live_data(workers); 1966 } 1967 1968 #ifdef ASSERT 1969 void G1ConcurrentMark::verify_live_data_clear() { 1970 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 1971 } 1972 #endif 1973 1974 void G1ConcurrentMark::print_stats() { 1975 if (!log_is_enabled(Debug, gc, stats)) { 1976 return; 1977 } 1978 log_debug(gc, stats)("---------------------------------------------------------------------"); 1979 for (size_t i = 0; i < _active_tasks; ++i) { 1980 _tasks[i]->print_stats(); 1981 log_debug(gc, stats)("---------------------------------------------------------------------"); 1982 } 1983 } 1984 1985 void G1ConcurrentMark::abort() { 1986 if (!cmThread()->during_cycle() || _has_aborted) { 1987 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 1988 return; 1989 } 1990 1991 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 1992 // concurrent bitmap clearing. 1993 { 1994 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 1995 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 1996 } 1997 // Note we cannot clear the previous marking bitmap here 1998 // since VerifyDuringGC verifies the objects marked during 1999 // a full GC against the previous bitmap. 2000 2001 { 2002 GCTraceTime(Debug, gc)("Clear Live Data"); 2003 clear_live_data(_g1h->workers()); 2004 } 2005 DEBUG_ONLY({ 2006 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2007 verify_live_data_clear(); 2008 }) 2009 // Empty mark stack 2010 reset_marking_state(); 2011 for (uint i = 0; i < _max_worker_id; ++i) { 2012 _tasks[i]->clear_region_fields(); 2013 } 2014 _first_overflow_barrier_sync.abort(); 2015 _second_overflow_barrier_sync.abort(); 2016 _has_aborted = true; 2017 2018 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2019 satb_mq_set.abandon_partial_marking(); 2020 // This can be called either during or outside marking, we'll read 2021 // the expected_active value from the SATB queue set. 2022 satb_mq_set.set_active_all_threads( 2023 false, /* new active value */ 2024 satb_mq_set.is_active() /* expected_active */); 2025 } 2026 2027 static void print_ms_time_info(const char* prefix, const char* name, 2028 NumberSeq& ns) { 2029 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2030 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2031 if (ns.num() > 0) { 2032 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2033 prefix, ns.sd(), ns.maximum()); 2034 } 2035 } 2036 2037 void G1ConcurrentMark::print_summary_info() { 2038 Log(gc, marking) log; 2039 if (!log.is_trace()) { 2040 return; 2041 } 2042 2043 log.trace(" Concurrent marking:"); 2044 print_ms_time_info(" ", "init marks", _init_times); 2045 print_ms_time_info(" ", "remarks", _remark_times); 2046 { 2047 print_ms_time_info(" ", "final marks", _remark_mark_times); 2048 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2049 2050 } 2051 print_ms_time_info(" ", "cleanups", _cleanup_times); 2052 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2053 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2054 if (G1ScrubRemSets) { 2055 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2056 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2057 } 2058 log.trace(" Total stop_world time = %8.2f s.", 2059 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2060 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2061 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2062 } 2063 2064 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2065 _parallel_workers->print_worker_threads_on(st); 2066 } 2067 2068 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2069 _parallel_workers->threads_do(tc); 2070 } 2071 2072 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2073 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2074 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2075 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2076 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2077 } 2078 2079 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 2080 assert(addr < _cm->finger(), "invariant"); 2081 assert(addr >= _task->finger(), "invariant"); 2082 2083 // We move that task's local finger along. 2084 _task->move_finger_to(addr); 2085 2086 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 2087 // we only partially drain the local queue and global stack 2088 _task->drain_local_queue(true); 2089 _task->drain_global_stack(true); 2090 2091 // if the has_aborted flag has been raised, we need to bail out of 2092 // the iteration 2093 return !_task->has_aborted(); 2094 } 2095 2096 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2097 ReferenceProcessor* result = g1h->ref_processor_cm(); 2098 assert(result != NULL, "CM reference processor should not be NULL"); 2099 return result; 2100 } 2101 2102 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2103 G1ConcurrentMark* cm, 2104 G1CMTask* task) 2105 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2106 _g1h(g1h), _cm(cm), _task(task) 2107 { } 2108 2109 void G1CMTask::setup_for_region(HeapRegion* hr) { 2110 assert(hr != NULL, 2111 "claim_region() should have filtered out NULL regions"); 2112 _curr_region = hr; 2113 _finger = hr->bottom(); 2114 update_region_limit(); 2115 } 2116 2117 void G1CMTask::update_region_limit() { 2118 HeapRegion* hr = _curr_region; 2119 HeapWord* bottom = hr->bottom(); 2120 HeapWord* limit = hr->next_top_at_mark_start(); 2121 2122 if (limit == bottom) { 2123 // The region was collected underneath our feet. 2124 // We set the finger to bottom to ensure that the bitmap 2125 // iteration that will follow this will not do anything. 2126 // (this is not a condition that holds when we set the region up, 2127 // as the region is not supposed to be empty in the first place) 2128 _finger = bottom; 2129 } else if (limit >= _region_limit) { 2130 assert(limit >= _finger, "peace of mind"); 2131 } else { 2132 assert(limit < _region_limit, "only way to get here"); 2133 // This can happen under some pretty unusual circumstances. An 2134 // evacuation pause empties the region underneath our feet (NTAMS 2135 // at bottom). We then do some allocation in the region (NTAMS 2136 // stays at bottom), followed by the region being used as a GC 2137 // alloc region (NTAMS will move to top() and the objects 2138 // originally below it will be grayed). All objects now marked in 2139 // the region are explicitly grayed, if below the global finger, 2140 // and we do not need in fact to scan anything else. So, we simply 2141 // set _finger to be limit to ensure that the bitmap iteration 2142 // doesn't do anything. 2143 _finger = limit; 2144 } 2145 2146 _region_limit = limit; 2147 } 2148 2149 void G1CMTask::giveup_current_region() { 2150 assert(_curr_region != NULL, "invariant"); 2151 clear_region_fields(); 2152 } 2153 2154 void G1CMTask::clear_region_fields() { 2155 // Values for these three fields that indicate that we're not 2156 // holding on to a region. 2157 _curr_region = NULL; 2158 _finger = NULL; 2159 _region_limit = NULL; 2160 } 2161 2162 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2163 if (cm_oop_closure == NULL) { 2164 assert(_cm_oop_closure != NULL, "invariant"); 2165 } else { 2166 assert(_cm_oop_closure == NULL, "invariant"); 2167 } 2168 _cm_oop_closure = cm_oop_closure; 2169 } 2170 2171 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2172 guarantee(nextMarkBitMap != NULL, "invariant"); 2173 _nextMarkBitMap = nextMarkBitMap; 2174 clear_region_fields(); 2175 2176 _calls = 0; 2177 _elapsed_time_ms = 0.0; 2178 _termination_time_ms = 0.0; 2179 _termination_start_time_ms = 0.0; 2180 } 2181 2182 bool G1CMTask::should_exit_termination() { 2183 regular_clock_call(); 2184 // This is called when we are in the termination protocol. We should 2185 // quit if, for some reason, this task wants to abort or the global 2186 // stack is not empty (this means that we can get work from it). 2187 return !_cm->mark_stack_empty() || has_aborted(); 2188 } 2189 2190 void G1CMTask::reached_limit() { 2191 assert(_words_scanned >= _words_scanned_limit || 2192 _refs_reached >= _refs_reached_limit , 2193 "shouldn't have been called otherwise"); 2194 regular_clock_call(); 2195 } 2196 2197 void G1CMTask::regular_clock_call() { 2198 if (has_aborted()) return; 2199 2200 // First, we need to recalculate the words scanned and refs reached 2201 // limits for the next clock call. 2202 recalculate_limits(); 2203 2204 // During the regular clock call we do the following 2205 2206 // (1) If an overflow has been flagged, then we abort. 2207 if (_cm->has_overflown()) { 2208 set_has_aborted(); 2209 return; 2210 } 2211 2212 // If we are not concurrent (i.e. we're doing remark) we don't need 2213 // to check anything else. The other steps are only needed during 2214 // the concurrent marking phase. 2215 if (!concurrent()) return; 2216 2217 // (2) If marking has been aborted for Full GC, then we also abort. 2218 if (_cm->has_aborted()) { 2219 set_has_aborted(); 2220 return; 2221 } 2222 2223 double curr_time_ms = os::elapsedVTime() * 1000.0; 2224 2225 // (4) We check whether we should yield. If we have to, then we abort. 2226 if (SuspendibleThreadSet::should_yield()) { 2227 // We should yield. To do this we abort the task. The caller is 2228 // responsible for yielding. 2229 set_has_aborted(); 2230 return; 2231 } 2232 2233 // (5) We check whether we've reached our time quota. If we have, 2234 // then we abort. 2235 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2236 if (elapsed_time_ms > _time_target_ms) { 2237 set_has_aborted(); 2238 _has_timed_out = true; 2239 return; 2240 } 2241 2242 // (6) Finally, we check whether there are enough completed STAB 2243 // buffers available for processing. If there are, we abort. 2244 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2245 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2246 // we do need to process SATB buffers, we'll abort and restart 2247 // the marking task to do so 2248 set_has_aborted(); 2249 return; 2250 } 2251 } 2252 2253 void G1CMTask::recalculate_limits() { 2254 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2255 _words_scanned_limit = _real_words_scanned_limit; 2256 2257 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2258 _refs_reached_limit = _real_refs_reached_limit; 2259 } 2260 2261 void G1CMTask::decrease_limits() { 2262 // This is called when we believe that we're going to do an infrequent 2263 // operation which will increase the per byte scanned cost (i.e. move 2264 // entries to/from the global stack). It basically tries to decrease the 2265 // scanning limit so that the clock is called earlier. 2266 2267 _words_scanned_limit = _real_words_scanned_limit - 2268 3 * words_scanned_period / 4; 2269 _refs_reached_limit = _real_refs_reached_limit - 2270 3 * refs_reached_period / 4; 2271 } 2272 2273 void G1CMTask::move_entries_to_global_stack() { 2274 // Local array where we'll store the entries that will be popped 2275 // from the local queue. 2276 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2277 2278 size_t n = 0; 2279 G1TaskQueueEntry task_entry; 2280 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2281 buffer[n] = task_entry; 2282 ++n; 2283 } 2284 if (n < G1CMMarkStack::EntriesPerChunk) { 2285 buffer[n] = G1TaskQueueEntry(); 2286 } 2287 2288 if (n > 0) { 2289 if (!_cm->mark_stack_push(buffer)) { 2290 set_has_aborted(); 2291 } 2292 } 2293 2294 // This operation was quite expensive, so decrease the limits. 2295 decrease_limits(); 2296 } 2297 2298 bool G1CMTask::get_entries_from_global_stack() { 2299 // Local array where we'll store the entries that will be popped 2300 // from the global stack. 2301 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2302 2303 if (!_cm->mark_stack_pop(buffer)) { 2304 return false; 2305 } 2306 2307 // We did actually pop at least one entry. 2308 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2309 G1TaskQueueEntry task_entry = buffer[i]; 2310 if (task_entry.is_null()) { 2311 break; 2312 } 2313 assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2314 bool success = _task_queue->push(task_entry); 2315 // We only call this when the local queue is empty or under a 2316 // given target limit. So, we do not expect this push to fail. 2317 assert(success, "invariant"); 2318 } 2319 2320 // This operation was quite expensive, so decrease the limits 2321 decrease_limits(); 2322 return true; 2323 } 2324 2325 void G1CMTask::drain_local_queue(bool partially) { 2326 if (has_aborted()) { 2327 return; 2328 } 2329 2330 // Decide what the target size is, depending whether we're going to 2331 // drain it partially (so that other tasks can steal if they run out 2332 // of things to do) or totally (at the very end). 2333 size_t target_size; 2334 if (partially) { 2335 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2336 } else { 2337 target_size = 0; 2338 } 2339 2340 if (_task_queue->size() > target_size) { 2341 G1TaskQueueEntry entry; 2342 bool ret = _task_queue->pop_local(entry); 2343 while (ret) { 2344 scan_task_entry(entry); 2345 if (_task_queue->size() <= target_size || has_aborted()) { 2346 ret = false; 2347 } else { 2348 ret = _task_queue->pop_local(entry); 2349 } 2350 } 2351 } 2352 } 2353 2354 void G1CMTask::drain_global_stack(bool partially) { 2355 if (has_aborted()) return; 2356 2357 // We have a policy to drain the local queue before we attempt to 2358 // drain the global stack. 2359 assert(partially || _task_queue->size() == 0, "invariant"); 2360 2361 // Decide what the target size is, depending whether we're going to 2362 // drain it partially (so that other tasks can steal if they run out 2363 // of things to do) or totally (at the very end). 2364 // Notice that when draining the global mark stack partially, due to the racyness 2365 // of the mark stack size update we might in fact drop below the target. But, 2366 // this is not a problem. 2367 // In case of total draining, we simply process until the global mark stack is 2368 // totally empty, disregarding the size counter. 2369 if (partially) { 2370 size_t const target_size = _cm->partial_mark_stack_size_target(); 2371 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2372 if (get_entries_from_global_stack()) { 2373 drain_local_queue(partially); 2374 } 2375 } 2376 } else { 2377 while (!has_aborted() && get_entries_from_global_stack()) { 2378 drain_local_queue(partially); 2379 } 2380 } 2381 } 2382 2383 // SATB Queue has several assumptions on whether to call the par or 2384 // non-par versions of the methods. this is why some of the code is 2385 // replicated. We should really get rid of the single-threaded version 2386 // of the code to simplify things. 2387 void G1CMTask::drain_satb_buffers() { 2388 if (has_aborted()) return; 2389 2390 // We set this so that the regular clock knows that we're in the 2391 // middle of draining buffers and doesn't set the abort flag when it 2392 // notices that SATB buffers are available for draining. It'd be 2393 // very counter productive if it did that. :-) 2394 _draining_satb_buffers = true; 2395 2396 G1CMSATBBufferClosure satb_cl(this, _g1h); 2397 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2398 2399 // This keeps claiming and applying the closure to completed buffers 2400 // until we run out of buffers or we need to abort. 2401 while (!has_aborted() && 2402 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2403 regular_clock_call(); 2404 } 2405 2406 _draining_satb_buffers = false; 2407 2408 assert(has_aborted() || 2409 concurrent() || 2410 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2411 2412 // again, this was a potentially expensive operation, decrease the 2413 // limits to get the regular clock call early 2414 decrease_limits(); 2415 } 2416 2417 void G1CMTask::print_stats() { 2418 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2419 _worker_id, _calls); 2420 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2421 _elapsed_time_ms, _termination_time_ms); 2422 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2423 _step_times_ms.num(), _step_times_ms.avg(), 2424 _step_times_ms.sd()); 2425 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2426 _step_times_ms.maximum(), _step_times_ms.sum()); 2427 } 2428 2429 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2430 return _task_queues->steal(worker_id, hash_seed, task_entry); 2431 } 2432 2433 /***************************************************************************** 2434 2435 The do_marking_step(time_target_ms, ...) method is the building 2436 block of the parallel marking framework. It can be called in parallel 2437 with other invocations of do_marking_step() on different tasks 2438 (but only one per task, obviously) and concurrently with the 2439 mutator threads, or during remark, hence it eliminates the need 2440 for two versions of the code. When called during remark, it will 2441 pick up from where the task left off during the concurrent marking 2442 phase. Interestingly, tasks are also claimable during evacuation 2443 pauses too, since do_marking_step() ensures that it aborts before 2444 it needs to yield. 2445 2446 The data structures that it uses to do marking work are the 2447 following: 2448 2449 (1) Marking Bitmap. If there are gray objects that appear only 2450 on the bitmap (this happens either when dealing with an overflow 2451 or when the initial marking phase has simply marked the roots 2452 and didn't push them on the stack), then tasks claim heap 2453 regions whose bitmap they then scan to find gray objects. A 2454 global finger indicates where the end of the last claimed region 2455 is. A local finger indicates how far into the region a task has 2456 scanned. The two fingers are used to determine how to gray an 2457 object (i.e. whether simply marking it is OK, as it will be 2458 visited by a task in the future, or whether it needs to be also 2459 pushed on a stack). 2460 2461 (2) Local Queue. The local queue of the task which is accessed 2462 reasonably efficiently by the task. Other tasks can steal from 2463 it when they run out of work. Throughout the marking phase, a 2464 task attempts to keep its local queue short but not totally 2465 empty, so that entries are available for stealing by other 2466 tasks. Only when there is no more work, a task will totally 2467 drain its local queue. 2468 2469 (3) Global Mark Stack. This handles local queue overflow. During 2470 marking only sets of entries are moved between it and the local 2471 queues, as access to it requires a mutex and more fine-grain 2472 interaction with it which might cause contention. If it 2473 overflows, then the marking phase should restart and iterate 2474 over the bitmap to identify gray objects. Throughout the marking 2475 phase, tasks attempt to keep the global mark stack at a small 2476 length but not totally empty, so that entries are available for 2477 popping by other tasks. Only when there is no more work, tasks 2478 will totally drain the global mark stack. 2479 2480 (4) SATB Buffer Queue. This is where completed SATB buffers are 2481 made available. Buffers are regularly removed from this queue 2482 and scanned for roots, so that the queue doesn't get too 2483 long. During remark, all completed buffers are processed, as 2484 well as the filled in parts of any uncompleted buffers. 2485 2486 The do_marking_step() method tries to abort when the time target 2487 has been reached. There are a few other cases when the 2488 do_marking_step() method also aborts: 2489 2490 (1) When the marking phase has been aborted (after a Full GC). 2491 2492 (2) When a global overflow (on the global stack) has been 2493 triggered. Before the task aborts, it will actually sync up with 2494 the other tasks to ensure that all the marking data structures 2495 (local queues, stacks, fingers etc.) are re-initialized so that 2496 when do_marking_step() completes, the marking phase can 2497 immediately restart. 2498 2499 (3) When enough completed SATB buffers are available. The 2500 do_marking_step() method only tries to drain SATB buffers right 2501 at the beginning. So, if enough buffers are available, the 2502 marking step aborts and the SATB buffers are processed at 2503 the beginning of the next invocation. 2504 2505 (4) To yield. when we have to yield then we abort and yield 2506 right at the end of do_marking_step(). This saves us from a lot 2507 of hassle as, by yielding we might allow a Full GC. If this 2508 happens then objects will be compacted underneath our feet, the 2509 heap might shrink, etc. We save checking for this by just 2510 aborting and doing the yield right at the end. 2511 2512 From the above it follows that the do_marking_step() method should 2513 be called in a loop (or, otherwise, regularly) until it completes. 2514 2515 If a marking step completes without its has_aborted() flag being 2516 true, it means it has completed the current marking phase (and 2517 also all other marking tasks have done so and have all synced up). 2518 2519 A method called regular_clock_call() is invoked "regularly" (in 2520 sub ms intervals) throughout marking. It is this clock method that 2521 checks all the abort conditions which were mentioned above and 2522 decides when the task should abort. A work-based scheme is used to 2523 trigger this clock method: when the number of object words the 2524 marking phase has scanned or the number of references the marking 2525 phase has visited reach a given limit. Additional invocations to 2526 the method clock have been planted in a few other strategic places 2527 too. The initial reason for the clock method was to avoid calling 2528 vtime too regularly, as it is quite expensive. So, once it was in 2529 place, it was natural to piggy-back all the other conditions on it 2530 too and not constantly check them throughout the code. 2531 2532 If do_termination is true then do_marking_step will enter its 2533 termination protocol. 2534 2535 The value of is_serial must be true when do_marking_step is being 2536 called serially (i.e. by the VMThread) and do_marking_step should 2537 skip any synchronization in the termination and overflow code. 2538 Examples include the serial remark code and the serial reference 2539 processing closures. 2540 2541 The value of is_serial must be false when do_marking_step is 2542 being called by any of the worker threads in a work gang. 2543 Examples include the concurrent marking code (CMMarkingTask), 2544 the MT remark code, and the MT reference processing closures. 2545 2546 *****************************************************************************/ 2547 2548 void G1CMTask::do_marking_step(double time_target_ms, 2549 bool do_termination, 2550 bool is_serial) { 2551 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2552 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2553 2554 G1Policy* g1_policy = _g1h->g1_policy(); 2555 assert(_task_queues != NULL, "invariant"); 2556 assert(_task_queue != NULL, "invariant"); 2557 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2558 2559 assert(!_claimed, 2560 "only one thread should claim this task at any one time"); 2561 2562 // OK, this doesn't safeguard again all possible scenarios, as it is 2563 // possible for two threads to set the _claimed flag at the same 2564 // time. But it is only for debugging purposes anyway and it will 2565 // catch most problems. 2566 _claimed = true; 2567 2568 _start_time_ms = os::elapsedVTime() * 1000.0; 2569 2570 // If do_stealing is true then do_marking_step will attempt to 2571 // steal work from the other G1CMTasks. It only makes sense to 2572 // enable stealing when the termination protocol is enabled 2573 // and do_marking_step() is not being called serially. 2574 bool do_stealing = do_termination && !is_serial; 2575 2576 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2577 _time_target_ms = time_target_ms - diff_prediction_ms; 2578 2579 // set up the variables that are used in the work-based scheme to 2580 // call the regular clock method 2581 _words_scanned = 0; 2582 _refs_reached = 0; 2583 recalculate_limits(); 2584 2585 // clear all flags 2586 clear_has_aborted(); 2587 _has_timed_out = false; 2588 _draining_satb_buffers = false; 2589 2590 ++_calls; 2591 2592 // Set up the bitmap and oop closures. Anything that uses them is 2593 // eventually called from this method, so it is OK to allocate these 2594 // statically. 2595 G1CMBitMapClosure bitmap_closure(this, _cm); 2596 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2597 set_cm_oop_closure(&cm_oop_closure); 2598 2599 if (_cm->has_overflown()) { 2600 // This can happen if the mark stack overflows during a GC pause 2601 // and this task, after a yield point, restarts. We have to abort 2602 // as we need to get into the overflow protocol which happens 2603 // right at the end of this task. 2604 set_has_aborted(); 2605 } 2606 2607 // First drain any available SATB buffers. After this, we will not 2608 // look at SATB buffers before the next invocation of this method. 2609 // If enough completed SATB buffers are queued up, the regular clock 2610 // will abort this task so that it restarts. 2611 drain_satb_buffers(); 2612 // ...then partially drain the local queue and the global stack 2613 drain_local_queue(true); 2614 drain_global_stack(true); 2615 2616 do { 2617 if (!has_aborted() && _curr_region != NULL) { 2618 // This means that we're already holding on to a region. 2619 assert(_finger != NULL, "if region is not NULL, then the finger " 2620 "should not be NULL either"); 2621 2622 // We might have restarted this task after an evacuation pause 2623 // which might have evacuated the region we're holding on to 2624 // underneath our feet. Let's read its limit again to make sure 2625 // that we do not iterate over a region of the heap that 2626 // contains garbage (update_region_limit() will also move 2627 // _finger to the start of the region if it is found empty). 2628 update_region_limit(); 2629 // We will start from _finger not from the start of the region, 2630 // as we might be restarting this task after aborting half-way 2631 // through scanning this region. In this case, _finger points to 2632 // the address where we last found a marked object. If this is a 2633 // fresh region, _finger points to start(). 2634 MemRegion mr = MemRegion(_finger, _region_limit); 2635 2636 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2637 "humongous regions should go around loop once only"); 2638 2639 // Some special cases: 2640 // If the memory region is empty, we can just give up the region. 2641 // If the current region is humongous then we only need to check 2642 // the bitmap for the bit associated with the start of the object, 2643 // scan the object if it's live, and give up the region. 2644 // Otherwise, let's iterate over the bitmap of the part of the region 2645 // that is left. 2646 // If the iteration is successful, give up the region. 2647 if (mr.is_empty()) { 2648 giveup_current_region(); 2649 regular_clock_call(); 2650 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2651 if (_nextMarkBitMap->is_marked(mr.start())) { 2652 // The object is marked - apply the closure 2653 bitmap_closure.do_addr(mr.start()); 2654 } 2655 // Even if this task aborted while scanning the humongous object 2656 // we can (and should) give up the current region. 2657 giveup_current_region(); 2658 regular_clock_call(); 2659 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2660 giveup_current_region(); 2661 regular_clock_call(); 2662 } else { 2663 assert(has_aborted(), "currently the only way to do so"); 2664 // The only way to abort the bitmap iteration is to return 2665 // false from the do_bit() method. However, inside the 2666 // do_bit() method we move the _finger to point to the 2667 // object currently being looked at. So, if we bail out, we 2668 // have definitely set _finger to something non-null. 2669 assert(_finger != NULL, "invariant"); 2670 2671 // Region iteration was actually aborted. So now _finger 2672 // points to the address of the object we last scanned. If we 2673 // leave it there, when we restart this task, we will rescan 2674 // the object. It is easy to avoid this. We move the finger by 2675 // enough to point to the next possible object header (the 2676 // bitmap knows by how much we need to move it as it knows its 2677 // granularity). 2678 assert(_finger < _region_limit, "invariant"); 2679 HeapWord* new_finger = _nextMarkBitMap->addr_after_obj(_finger); 2680 // Check if bitmap iteration was aborted while scanning the last object 2681 if (new_finger >= _region_limit) { 2682 giveup_current_region(); 2683 } else { 2684 move_finger_to(new_finger); 2685 } 2686 } 2687 } 2688 // At this point we have either completed iterating over the 2689 // region we were holding on to, or we have aborted. 2690 2691 // We then partially drain the local queue and the global stack. 2692 // (Do we really need this?) 2693 drain_local_queue(true); 2694 drain_global_stack(true); 2695 2696 // Read the note on the claim_region() method on why it might 2697 // return NULL with potentially more regions available for 2698 // claiming and why we have to check out_of_regions() to determine 2699 // whether we're done or not. 2700 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2701 // We are going to try to claim a new region. We should have 2702 // given up on the previous one. 2703 // Separated the asserts so that we know which one fires. 2704 assert(_curr_region == NULL, "invariant"); 2705 assert(_finger == NULL, "invariant"); 2706 assert(_region_limit == NULL, "invariant"); 2707 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2708 if (claimed_region != NULL) { 2709 // Yes, we managed to claim one 2710 setup_for_region(claimed_region); 2711 assert(_curr_region == claimed_region, "invariant"); 2712 } 2713 // It is important to call the regular clock here. It might take 2714 // a while to claim a region if, for example, we hit a large 2715 // block of empty regions. So we need to call the regular clock 2716 // method once round the loop to make sure it's called 2717 // frequently enough. 2718 regular_clock_call(); 2719 } 2720 2721 if (!has_aborted() && _curr_region == NULL) { 2722 assert(_cm->out_of_regions(), 2723 "at this point we should be out of regions"); 2724 } 2725 } while ( _curr_region != NULL && !has_aborted()); 2726 2727 if (!has_aborted()) { 2728 // We cannot check whether the global stack is empty, since other 2729 // tasks might be pushing objects to it concurrently. 2730 assert(_cm->out_of_regions(), 2731 "at this point we should be out of regions"); 2732 // Try to reduce the number of available SATB buffers so that 2733 // remark has less work to do. 2734 drain_satb_buffers(); 2735 } 2736 2737 // Since we've done everything else, we can now totally drain the 2738 // local queue and global stack. 2739 drain_local_queue(false); 2740 drain_global_stack(false); 2741 2742 // Attempt at work stealing from other task's queues. 2743 if (do_stealing && !has_aborted()) { 2744 // We have not aborted. This means that we have finished all that 2745 // we could. Let's try to do some stealing... 2746 2747 // We cannot check whether the global stack is empty, since other 2748 // tasks might be pushing objects to it concurrently. 2749 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2750 "only way to reach here"); 2751 while (!has_aborted()) { 2752 G1TaskQueueEntry entry; 2753 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2754 scan_task_entry(entry); 2755 2756 // And since we're towards the end, let's totally drain the 2757 // local queue and global stack. 2758 drain_local_queue(false); 2759 drain_global_stack(false); 2760 } else { 2761 break; 2762 } 2763 } 2764 } 2765 2766 // We still haven't aborted. Now, let's try to get into the 2767 // termination protocol. 2768 if (do_termination && !has_aborted()) { 2769 // We cannot check whether the global stack is empty, since other 2770 // tasks might be concurrently pushing objects on it. 2771 // Separated the asserts so that we know which one fires. 2772 assert(_cm->out_of_regions(), "only way to reach here"); 2773 assert(_task_queue->size() == 0, "only way to reach here"); 2774 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2775 2776 // The G1CMTask class also extends the TerminatorTerminator class, 2777 // hence its should_exit_termination() method will also decide 2778 // whether to exit the termination protocol or not. 2779 bool finished = (is_serial || 2780 _cm->terminator()->offer_termination(this)); 2781 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2782 _termination_time_ms += 2783 termination_end_time_ms - _termination_start_time_ms; 2784 2785 if (finished) { 2786 // We're all done. 2787 2788 if (_worker_id == 0) { 2789 // let's allow task 0 to do this 2790 if (concurrent()) { 2791 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2792 // we need to set this to false before the next 2793 // safepoint. This way we ensure that the marking phase 2794 // doesn't observe any more heap expansions. 2795 _cm->clear_concurrent_marking_in_progress(); 2796 } 2797 } 2798 2799 // We can now guarantee that the global stack is empty, since 2800 // all other tasks have finished. We separated the guarantees so 2801 // that, if a condition is false, we can immediately find out 2802 // which one. 2803 guarantee(_cm->out_of_regions(), "only way to reach here"); 2804 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2805 guarantee(_task_queue->size() == 0, "only way to reach here"); 2806 guarantee(!_cm->has_overflown(), "only way to reach here"); 2807 } else { 2808 // Apparently there's more work to do. Let's abort this task. It 2809 // will restart it and we can hopefully find more things to do. 2810 set_has_aborted(); 2811 } 2812 } 2813 2814 // Mainly for debugging purposes to make sure that a pointer to the 2815 // closure which was statically allocated in this frame doesn't 2816 // escape it by accident. 2817 set_cm_oop_closure(NULL); 2818 double end_time_ms = os::elapsedVTime() * 1000.0; 2819 double elapsed_time_ms = end_time_ms - _start_time_ms; 2820 // Update the step history. 2821 _step_times_ms.add(elapsed_time_ms); 2822 2823 if (has_aborted()) { 2824 // The task was aborted for some reason. 2825 if (_has_timed_out) { 2826 double diff_ms = elapsed_time_ms - _time_target_ms; 2827 // Keep statistics of how well we did with respect to hitting 2828 // our target only if we actually timed out (if we aborted for 2829 // other reasons, then the results might get skewed). 2830 _marking_step_diffs_ms.add(diff_ms); 2831 } 2832 2833 if (_cm->has_overflown()) { 2834 // This is the interesting one. We aborted because a global 2835 // overflow was raised. This means we have to restart the 2836 // marking phase and start iterating over regions. However, in 2837 // order to do this we have to make sure that all tasks stop 2838 // what they are doing and re-initialize in a safe manner. We 2839 // will achieve this with the use of two barrier sync points. 2840 2841 if (!is_serial) { 2842 // We only need to enter the sync barrier if being called 2843 // from a parallel context 2844 _cm->enter_first_sync_barrier(_worker_id); 2845 2846 // When we exit this sync barrier we know that all tasks have 2847 // stopped doing marking work. So, it's now safe to 2848 // re-initialize our data structures. At the end of this method, 2849 // task 0 will clear the global data structures. 2850 } 2851 2852 // We clear the local state of this task... 2853 clear_region_fields(); 2854 2855 if (!is_serial) { 2856 // ...and enter the second barrier. 2857 _cm->enter_second_sync_barrier(_worker_id); 2858 } 2859 // At this point, if we're during the concurrent phase of 2860 // marking, everything has been re-initialized and we're 2861 // ready to restart. 2862 } 2863 } 2864 2865 _claimed = false; 2866 } 2867 2868 G1CMTask::G1CMTask(uint worker_id, 2869 G1ConcurrentMark* cm, 2870 G1CMTaskQueue* task_queue, 2871 G1CMTaskQueueSet* task_queues) 2872 : _g1h(G1CollectedHeap::heap()), 2873 _worker_id(worker_id), _cm(cm), 2874 _objArray_processor(this), 2875 _claimed(false), 2876 _nextMarkBitMap(NULL), _hash_seed(17), 2877 _task_queue(task_queue), 2878 _task_queues(task_queues), 2879 _cm_oop_closure(NULL) { 2880 guarantee(task_queue != NULL, "invariant"); 2881 guarantee(task_queues != NULL, "invariant"); 2882 2883 _marking_step_diffs_ms.add(0.5); 2884 } 2885 2886 // These are formatting macros that are used below to ensure 2887 // consistent formatting. The *_H_* versions are used to format the 2888 // header for a particular value and they should be kept consistent 2889 // with the corresponding macro. Also note that most of the macros add 2890 // the necessary white space (as a prefix) which makes them a bit 2891 // easier to compose. 2892 2893 // All the output lines are prefixed with this string to be able to 2894 // identify them easily in a large log file. 2895 #define G1PPRL_LINE_PREFIX "###" 2896 2897 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2898 #ifdef _LP64 2899 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2900 #else // _LP64 2901 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2902 #endif // _LP64 2903 2904 // For per-region info 2905 #define G1PPRL_TYPE_FORMAT " %-4s" 2906 #define G1PPRL_TYPE_H_FORMAT " %4s" 2907 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2908 #define G1PPRL_BYTE_H_FORMAT " %9s" 2909 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2910 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2911 2912 // For summary info 2913 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2914 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2915 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2916 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2917 2918 G1PrintRegionLivenessInfoClosure:: 2919 G1PrintRegionLivenessInfoClosure(const char* phase_name) 2920 : _total_used_bytes(0), _total_capacity_bytes(0), 2921 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2922 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 2923 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2924 MemRegion g1_reserved = g1h->g1_reserved(); 2925 double now = os::elapsedTime(); 2926 2927 // Print the header of the output. 2928 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2929 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2930 G1PPRL_SUM_ADDR_FORMAT("reserved") 2931 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2932 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2933 HeapRegion::GrainBytes); 2934 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2935 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2936 G1PPRL_TYPE_H_FORMAT 2937 G1PPRL_ADDR_BASE_H_FORMAT 2938 G1PPRL_BYTE_H_FORMAT 2939 G1PPRL_BYTE_H_FORMAT 2940 G1PPRL_BYTE_H_FORMAT 2941 G1PPRL_DOUBLE_H_FORMAT 2942 G1PPRL_BYTE_H_FORMAT 2943 G1PPRL_BYTE_H_FORMAT, 2944 "type", "address-range", 2945 "used", "prev-live", "next-live", "gc-eff", 2946 "remset", "code-roots"); 2947 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2948 G1PPRL_TYPE_H_FORMAT 2949 G1PPRL_ADDR_BASE_H_FORMAT 2950 G1PPRL_BYTE_H_FORMAT 2951 G1PPRL_BYTE_H_FORMAT 2952 G1PPRL_BYTE_H_FORMAT 2953 G1PPRL_DOUBLE_H_FORMAT 2954 G1PPRL_BYTE_H_FORMAT 2955 G1PPRL_BYTE_H_FORMAT, 2956 "", "", 2957 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2958 "(bytes)", "(bytes)"); 2959 } 2960 2961 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 2962 const char* type = r->get_type_str(); 2963 HeapWord* bottom = r->bottom(); 2964 HeapWord* end = r->end(); 2965 size_t capacity_bytes = r->capacity(); 2966 size_t used_bytes = r->used(); 2967 size_t prev_live_bytes = r->live_bytes(); 2968 size_t next_live_bytes = r->next_live_bytes(); 2969 double gc_eff = r->gc_efficiency(); 2970 size_t remset_bytes = r->rem_set()->mem_size(); 2971 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2972 2973 _total_used_bytes += used_bytes; 2974 _total_capacity_bytes += capacity_bytes; 2975 _total_prev_live_bytes += prev_live_bytes; 2976 _total_next_live_bytes += next_live_bytes; 2977 _total_remset_bytes += remset_bytes; 2978 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2979 2980 // Print a line for this particular region. 2981 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2982 G1PPRL_TYPE_FORMAT 2983 G1PPRL_ADDR_BASE_FORMAT 2984 G1PPRL_BYTE_FORMAT 2985 G1PPRL_BYTE_FORMAT 2986 G1PPRL_BYTE_FORMAT 2987 G1PPRL_DOUBLE_FORMAT 2988 G1PPRL_BYTE_FORMAT 2989 G1PPRL_BYTE_FORMAT, 2990 type, p2i(bottom), p2i(end), 2991 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2992 remset_bytes, strong_code_roots_bytes); 2993 2994 return false; 2995 } 2996 2997 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 2998 // add static memory usages to remembered set sizes 2999 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3000 // Print the footer of the output. 3001 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3002 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3003 " SUMMARY" 3004 G1PPRL_SUM_MB_FORMAT("capacity") 3005 G1PPRL_SUM_MB_PERC_FORMAT("used") 3006 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3007 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3008 G1PPRL_SUM_MB_FORMAT("remset") 3009 G1PPRL_SUM_MB_FORMAT("code-roots"), 3010 bytes_to_mb(_total_capacity_bytes), 3011 bytes_to_mb(_total_used_bytes), 3012 perc(_total_used_bytes, _total_capacity_bytes), 3013 bytes_to_mb(_total_prev_live_bytes), 3014 perc(_total_prev_live_bytes, _total_capacity_bytes), 3015 bytes_to_mb(_total_next_live_bytes), 3016 perc(_total_next_live_bytes, _total_capacity_bytes), 3017 bytes_to_mb(_total_remset_bytes), 3018 bytes_to_mb(_total_strong_code_roots_bytes)); 3019 }