1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 #include "utilities/align.hpp" 61 #include "utilities/growableArray.hpp" 62 63 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 64 assert(addr < _cm->finger(), "invariant"); 65 assert(addr >= _task->finger(), "invariant"); 66 67 // We move that task's local finger along. 68 _task->move_finger_to(addr); 69 70 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 71 // we only partially drain the local queue and global stack 72 _task->drain_local_queue(true); 73 _task->drain_global_stack(true); 74 75 // if the has_aborted flag has been raised, we need to bail out of 76 // the iteration 77 return !_task->has_aborted(); 78 } 79 80 G1CMMarkStack::G1CMMarkStack() : 81 _max_chunk_capacity(0), 82 _base(NULL), 83 _chunk_capacity(0) { 84 set_empty(); 85 } 86 87 bool G1CMMarkStack::resize(size_t new_capacity) { 88 assert(is_empty(), "Only resize when stack is empty."); 89 assert(new_capacity <= _max_chunk_capacity, 90 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 91 92 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 93 94 if (new_base == NULL) { 95 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 96 return false; 97 } 98 // Release old mapping. 99 if (_base != NULL) { 100 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 101 } 102 103 _base = new_base; 104 _chunk_capacity = new_capacity; 105 set_empty(); 106 107 return true; 108 } 109 110 size_t G1CMMarkStack::capacity_alignment() { 111 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 112 } 113 114 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 115 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 116 117 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 118 119 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 120 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 121 122 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 123 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 124 _max_chunk_capacity, 125 initial_chunk_capacity); 126 127 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 128 initial_chunk_capacity, _max_chunk_capacity); 129 130 return resize(initial_chunk_capacity); 131 } 132 133 void G1CMMarkStack::expand() { 134 if (_chunk_capacity == _max_chunk_capacity) { 135 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 136 return; 137 } 138 size_t old_capacity = _chunk_capacity; 139 // Double capacity if possible 140 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 141 142 if (resize(new_capacity)) { 143 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 144 old_capacity, new_capacity); 145 } else { 146 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 147 old_capacity, new_capacity); 148 } 149 } 150 151 G1CMMarkStack::~G1CMMarkStack() { 152 if (_base != NULL) { 153 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 154 } 155 } 156 157 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 158 elem->next = *list; 159 *list = elem; 160 } 161 162 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 163 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 164 add_chunk_to_list(&_chunk_list, elem); 165 _chunks_in_chunk_list++; 166 } 167 168 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 169 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 170 add_chunk_to_list(&_free_list, elem); 171 } 172 173 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 174 TaskQueueEntryChunk* result = *list; 175 if (result != NULL) { 176 *list = (*list)->next; 177 } 178 return result; 179 } 180 181 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 182 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 183 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 184 if (result != NULL) { 185 _chunks_in_chunk_list--; 186 } 187 return result; 188 } 189 190 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 191 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 192 return remove_chunk_from_list(&_free_list); 193 } 194 195 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 196 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 197 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 198 // wraparound of _hwm. 199 if (_hwm >= _chunk_capacity) { 200 return NULL; 201 } 202 203 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 204 if (cur_idx >= _chunk_capacity) { 205 return NULL; 206 } 207 208 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 209 result->next = NULL; 210 return result; 211 } 212 213 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 214 // Get a new chunk. 215 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 216 217 if (new_chunk == NULL) { 218 // Did not get a chunk from the free list. Allocate from backing memory. 219 new_chunk = allocate_new_chunk(); 220 221 if (new_chunk == NULL) { 222 return false; 223 } 224 } 225 226 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 227 228 add_chunk_to_chunk_list(new_chunk); 229 230 return true; 231 } 232 233 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 234 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 235 236 if (cur == NULL) { 237 return false; 238 } 239 240 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 241 242 add_chunk_to_free_list(cur); 243 return true; 244 } 245 246 void G1CMMarkStack::set_empty() { 247 _chunks_in_chunk_list = 0; 248 _hwm = 0; 249 _chunk_list = NULL; 250 _free_list = NULL; 251 } 252 253 G1CMRootRegions::G1CMRootRegions() : 254 _cm(NULL), _scan_in_progress(false), 255 _should_abort(false), _claimed_survivor_index(0) { } 256 257 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 258 _survivors = survivors; 259 _cm = cm; 260 } 261 262 void G1CMRootRegions::prepare_for_scan() { 263 assert(!scan_in_progress(), "pre-condition"); 264 265 // Currently, only survivors can be root regions. 266 _claimed_survivor_index = 0; 267 _scan_in_progress = _survivors->regions()->is_nonempty(); 268 _should_abort = false; 269 } 270 271 HeapRegion* G1CMRootRegions::claim_next() { 272 if (_should_abort) { 273 // If someone has set the should_abort flag, we return NULL to 274 // force the caller to bail out of their loop. 275 return NULL; 276 } 277 278 // Currently, only survivors can be root regions. 279 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 280 281 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 282 if (claimed_index < survivor_regions->length()) { 283 return survivor_regions->at(claimed_index); 284 } 285 return NULL; 286 } 287 288 uint G1CMRootRegions::num_root_regions() const { 289 return (uint)_survivors->regions()->length(); 290 } 291 292 void G1CMRootRegions::notify_scan_done() { 293 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 294 _scan_in_progress = false; 295 RootRegionScan_lock->notify_all(); 296 } 297 298 void G1CMRootRegions::cancel_scan() { 299 notify_scan_done(); 300 } 301 302 void G1CMRootRegions::scan_finished() { 303 assert(scan_in_progress(), "pre-condition"); 304 305 // Currently, only survivors can be root regions. 306 if (!_should_abort) { 307 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 308 assert((uint)_claimed_survivor_index >= _survivors->length(), 309 "we should have claimed all survivors, claimed index = %u, length = %u", 310 (uint)_claimed_survivor_index, _survivors->length()); 311 } 312 313 notify_scan_done(); 314 } 315 316 bool G1CMRootRegions::wait_until_scan_finished() { 317 if (!scan_in_progress()) return false; 318 319 { 320 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 321 while (scan_in_progress()) { 322 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 323 } 324 } 325 return true; 326 } 327 328 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 329 return MAX2((n_par_threads + 2) / 4, 1U); 330 } 331 332 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 333 _g1h(g1h), 334 _markBitMap1(), 335 _markBitMap2(), 336 _parallel_marking_threads(0), 337 _max_parallel_marking_threads(0), 338 _sleep_factor(0.0), 339 _marking_task_overhead(1.0), 340 _cleanup_list("Cleanup List"), 341 342 _prevMarkBitMap(&_markBitMap1), 343 _nextMarkBitMap(&_markBitMap2), 344 345 _global_mark_stack(), 346 // _finger set in set_non_marking_state 347 348 _max_worker_id(ParallelGCThreads), 349 // _active_tasks set in set_non_marking_state 350 // _tasks set inside the constructor 351 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 352 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 353 354 _has_overflown(false), 355 _concurrent(false), 356 _has_aborted(false), 357 _restart_for_overflow(false), 358 _concurrent_marking_in_progress(false), 359 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 360 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 361 362 // _verbose_level set below 363 364 _init_times(), 365 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 366 _cleanup_times(), 367 _total_counting_time(0.0), 368 _total_rs_scrub_time(0.0), 369 370 _parallel_workers(NULL), 371 372 _completed_initialization(false) { 373 374 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 375 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 376 377 // Create & start a ConcurrentMark thread. 378 _cmThread = new ConcurrentMarkThread(this); 379 assert(cmThread() != NULL, "CM Thread should have been created"); 380 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 381 if (_cmThread->osthread() == NULL) { 382 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 383 } 384 385 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 386 387 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 388 satb_qs.set_buffer_size(G1SATBBufferSize); 389 390 _root_regions.init(_g1h->survivor(), this); 391 392 if (ConcGCThreads > ParallelGCThreads) { 393 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 394 ConcGCThreads, ParallelGCThreads); 395 return; 396 } 397 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 398 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 399 // if both are set 400 _sleep_factor = 0.0; 401 _marking_task_overhead = 1.0; 402 } else if (G1MarkingOverheadPercent > 0) { 403 // We will calculate the number of parallel marking threads based 404 // on a target overhead with respect to the soft real-time goal 405 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 406 double overall_cm_overhead = 407 (double) MaxGCPauseMillis * marking_overhead / 408 (double) GCPauseIntervalMillis; 409 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 410 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 411 double marking_task_overhead = 412 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 413 double sleep_factor = 414 (1.0 - marking_task_overhead) / marking_task_overhead; 415 416 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 417 _sleep_factor = sleep_factor; 418 _marking_task_overhead = marking_task_overhead; 419 } else { 420 // Calculate the number of parallel marking threads by scaling 421 // the number of parallel GC threads. 422 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 423 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 424 _sleep_factor = 0.0; 425 _marking_task_overhead = 1.0; 426 } 427 428 assert(ConcGCThreads > 0, "Should have been set"); 429 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 430 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 431 _parallel_marking_threads = ConcGCThreads; 432 _max_parallel_marking_threads = _parallel_marking_threads; 433 434 _parallel_workers = new WorkGang("G1 Marker", 435 _max_parallel_marking_threads, false, true); 436 if (_parallel_workers == NULL) { 437 vm_exit_during_initialization("Failed necessary allocation."); 438 } else { 439 _parallel_workers->initialize_workers(); 440 } 441 442 if (FLAG_IS_DEFAULT(MarkStackSize)) { 443 size_t mark_stack_size = 444 MIN2(MarkStackSizeMax, 445 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 446 // Verify that the calculated value for MarkStackSize is in range. 447 // It would be nice to use the private utility routine from Arguments. 448 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 449 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 450 "must be between 1 and " SIZE_FORMAT, 451 mark_stack_size, MarkStackSizeMax); 452 return; 453 } 454 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 455 } else { 456 // Verify MarkStackSize is in range. 457 if (FLAG_IS_CMDLINE(MarkStackSize)) { 458 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 459 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 460 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 461 "must be between 1 and " SIZE_FORMAT, 462 MarkStackSize, MarkStackSizeMax); 463 return; 464 } 465 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 466 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 467 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 468 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 469 MarkStackSize, MarkStackSizeMax); 470 return; 471 } 472 } 473 } 474 } 475 476 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 477 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 478 } 479 480 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 481 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 482 483 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 484 _active_tasks = _max_worker_id; 485 486 for (uint i = 0; i < _max_worker_id; ++i) { 487 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 488 task_queue->initialize(); 489 _task_queues->register_queue(i, task_queue); 490 491 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 492 493 _accum_task_vtime[i] = 0.0; 494 } 495 496 // so that the call below can read a sensible value 497 _heap_start = g1h->reserved_region().start(); 498 set_non_marking_state(); 499 _completed_initialization = true; 500 } 501 502 void G1ConcurrentMark::reset() { 503 // Starting values for these two. This should be called in a STW 504 // phase. 505 MemRegion reserved = _g1h->g1_reserved(); 506 _heap_start = reserved.start(); 507 _heap_end = reserved.end(); 508 509 // Separated the asserts so that we know which one fires. 510 assert(_heap_start != NULL, "heap bounds should look ok"); 511 assert(_heap_end != NULL, "heap bounds should look ok"); 512 assert(_heap_start < _heap_end, "heap bounds should look ok"); 513 514 // Reset all the marking data structures and any necessary flags 515 reset_marking_state(); 516 517 // We do reset all of them, since different phases will use 518 // different number of active threads. So, it's easiest to have all 519 // of them ready. 520 for (uint i = 0; i < _max_worker_id; ++i) { 521 _tasks[i]->reset(_nextMarkBitMap); 522 } 523 524 // we need this to make sure that the flag is on during the evac 525 // pause with initial mark piggy-backed 526 set_concurrent_marking_in_progress(); 527 } 528 529 530 void G1ConcurrentMark::reset_marking_state() { 531 _global_mark_stack.set_empty(); 532 533 // Expand the marking stack, if we have to and if we can. 534 if (has_overflown()) { 535 _global_mark_stack.expand(); 536 } 537 538 clear_has_overflown(); 539 _finger = _heap_start; 540 541 for (uint i = 0; i < _max_worker_id; ++i) { 542 G1CMTaskQueue* queue = _task_queues->queue(i); 543 queue->set_empty(); 544 } 545 } 546 547 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 548 assert(active_tasks <= _max_worker_id, "we should not have more"); 549 550 _active_tasks = active_tasks; 551 // Need to update the three data structures below according to the 552 // number of active threads for this phase. 553 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 554 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 555 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 556 } 557 558 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 559 set_concurrency(active_tasks); 560 561 _concurrent = concurrent; 562 // We propagate this to all tasks, not just the active ones. 563 for (uint i = 0; i < _max_worker_id; ++i) 564 _tasks[i]->set_concurrent(concurrent); 565 566 if (concurrent) { 567 set_concurrent_marking_in_progress(); 568 } else { 569 // We currently assume that the concurrent flag has been set to 570 // false before we start remark. At this point we should also be 571 // in a STW phase. 572 assert(!concurrent_marking_in_progress(), "invariant"); 573 assert(out_of_regions(), 574 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 575 p2i(_finger), p2i(_heap_end)); 576 } 577 } 578 579 void G1ConcurrentMark::set_non_marking_state() { 580 // We set the global marking state to some default values when we're 581 // not doing marking. 582 reset_marking_state(); 583 _active_tasks = 0; 584 clear_concurrent_marking_in_progress(); 585 } 586 587 G1ConcurrentMark::~G1ConcurrentMark() { 588 // The G1ConcurrentMark instance is never freed. 589 ShouldNotReachHere(); 590 } 591 592 class G1ClearBitMapTask : public AbstractGangTask { 593 public: 594 static size_t chunk_size() { return M; } 595 596 private: 597 // Heap region closure used for clearing the given mark bitmap. 598 class G1ClearBitmapHRClosure : public HeapRegionClosure { 599 private: 600 G1CMBitMap* _bitmap; 601 G1ConcurrentMark* _cm; 602 public: 603 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 604 } 605 606 virtual bool doHeapRegion(HeapRegion* r) { 607 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 608 609 HeapWord* cur = r->bottom(); 610 HeapWord* const end = r->end(); 611 612 while (cur < end) { 613 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 614 _bitmap->clear_range(mr); 615 616 cur += chunk_size_in_words; 617 618 // Abort iteration if after yielding the marking has been aborted. 619 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 620 return true; 621 } 622 // Repeat the asserts from before the start of the closure. We will do them 623 // as asserts here to minimize their overhead on the product. However, we 624 // will have them as guarantees at the beginning / end of the bitmap 625 // clearing to get some checking in the product. 626 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 627 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 628 } 629 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 630 631 return false; 632 } 633 }; 634 635 G1ClearBitmapHRClosure _cl; 636 HeapRegionClaimer _hr_claimer; 637 bool _suspendible; // If the task is suspendible, workers must join the STS. 638 639 public: 640 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 641 AbstractGangTask("G1 Clear Bitmap"), 642 _cl(bitmap, suspendible ? cm : NULL), 643 _hr_claimer(n_workers), 644 _suspendible(suspendible) 645 { } 646 647 void work(uint worker_id) { 648 SuspendibleThreadSetJoiner sts_join(_suspendible); 649 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer); 650 } 651 652 bool is_complete() { 653 return _cl.complete(); 654 } 655 }; 656 657 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 658 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 659 660 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 661 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 662 663 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 664 665 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 666 667 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 668 workers->run_task(&cl, num_workers); 669 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 670 } 671 672 void G1ConcurrentMark::cleanup_for_next_mark() { 673 // Make sure that the concurrent mark thread looks to still be in 674 // the current cycle. 675 guarantee(cmThread()->during_cycle(), "invariant"); 676 677 // We are finishing up the current cycle by clearing the next 678 // marking bitmap and getting it ready for the next cycle. During 679 // this time no other cycle can start. So, let's make sure that this 680 // is the case. 681 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 682 683 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 684 685 // Clear the live count data. If the marking has been aborted, the abort() 686 // call already did that. 687 if (!has_aborted()) { 688 clear_live_data(_parallel_workers); 689 DEBUG_ONLY(verify_live_data_clear()); 690 } 691 692 // Repeat the asserts from above. 693 guarantee(cmThread()->during_cycle(), "invariant"); 694 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 695 } 696 697 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 698 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 699 clear_bitmap(_prevMarkBitMap, workers, false); 700 } 701 702 class CheckBitmapClearHRClosure : public HeapRegionClosure { 703 G1CMBitMap* _bitmap; 704 bool _error; 705 public: 706 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 707 } 708 709 virtual bool doHeapRegion(HeapRegion* r) { 710 // This closure can be called concurrently to the mutator, so we must make sure 711 // that the result of the getNextMarkedWordAddress() call is compared to the 712 // value passed to it as limit to detect any found bits. 713 // end never changes in G1. 714 HeapWord* end = r->end(); 715 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 716 } 717 }; 718 719 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 720 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 721 _g1h->heap_region_iterate(&cl); 722 return cl.complete(); 723 } 724 725 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 726 public: 727 bool doHeapRegion(HeapRegion* r) { 728 r->note_start_of_marking(); 729 return false; 730 } 731 }; 732 733 void G1ConcurrentMark::checkpointRootsInitialPre() { 734 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 735 736 _has_aborted = false; 737 738 // Initialize marking structures. This has to be done in a STW phase. 739 reset(); 740 741 // For each region note start of marking. 742 NoteStartOfMarkHRClosure startcl; 743 g1h->heap_region_iterate(&startcl); 744 } 745 746 747 void G1ConcurrentMark::checkpointRootsInitialPost() { 748 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 749 750 // Start Concurrent Marking weak-reference discovery. 751 ReferenceProcessor* rp = g1h->ref_processor_cm(); 752 // enable ("weak") refs discovery 753 rp->enable_discovery(); 754 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 755 756 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 757 // This is the start of the marking cycle, we're expected all 758 // threads to have SATB queues with active set to false. 759 satb_mq_set.set_active_all_threads(true, /* new active value */ 760 false /* expected_active */); 761 762 _root_regions.prepare_for_scan(); 763 764 // update_g1_committed() will be called at the end of an evac pause 765 // when marking is on. So, it's also called at the end of the 766 // initial-mark pause to update the heap end, if the heap expands 767 // during it. No need to call it here. 768 } 769 770 /* 771 * Notice that in the next two methods, we actually leave the STS 772 * during the barrier sync and join it immediately afterwards. If we 773 * do not do this, the following deadlock can occur: one thread could 774 * be in the barrier sync code, waiting for the other thread to also 775 * sync up, whereas another one could be trying to yield, while also 776 * waiting for the other threads to sync up too. 777 * 778 * Note, however, that this code is also used during remark and in 779 * this case we should not attempt to leave / enter the STS, otherwise 780 * we'll either hit an assert (debug / fastdebug) or deadlock 781 * (product). So we should only leave / enter the STS if we are 782 * operating concurrently. 783 * 784 * Because the thread that does the sync barrier has left the STS, it 785 * is possible to be suspended for a Full GC or an evacuation pause 786 * could occur. This is actually safe, since the entering the sync 787 * barrier is one of the last things do_marking_step() does, and it 788 * doesn't manipulate any data structures afterwards. 789 */ 790 791 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 792 bool barrier_aborted; 793 { 794 SuspendibleThreadSetLeaver sts_leave(concurrent()); 795 barrier_aborted = !_first_overflow_barrier_sync.enter(); 796 } 797 798 // at this point everyone should have synced up and not be doing any 799 // more work 800 801 if (barrier_aborted) { 802 // If the barrier aborted we ignore the overflow condition and 803 // just abort the whole marking phase as quickly as possible. 804 return; 805 } 806 807 // If we're executing the concurrent phase of marking, reset the marking 808 // state; otherwise the marking state is reset after reference processing, 809 // during the remark pause. 810 // If we reset here as a result of an overflow during the remark we will 811 // see assertion failures from any subsequent set_concurrency_and_phase() 812 // calls. 813 if (concurrent()) { 814 // let the task associated with with worker 0 do this 815 if (worker_id == 0) { 816 // task 0 is responsible for clearing the global data structures 817 // We should be here because of an overflow. During STW we should 818 // not clear the overflow flag since we rely on it being true when 819 // we exit this method to abort the pause and restart concurrent 820 // marking. 821 reset_marking_state(); 822 823 log_info(gc, marking)("Concurrent Mark reset for overflow"); 824 } 825 } 826 827 // after this, each task should reset its own data structures then 828 // then go into the second barrier 829 } 830 831 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 832 SuspendibleThreadSetLeaver sts_leave(concurrent()); 833 _second_overflow_barrier_sync.enter(); 834 835 // at this point everything should be re-initialized and ready to go 836 } 837 838 class G1CMConcurrentMarkingTask: public AbstractGangTask { 839 private: 840 G1ConcurrentMark* _cm; 841 ConcurrentMarkThread* _cmt; 842 843 public: 844 void work(uint worker_id) { 845 assert(Thread::current()->is_ConcurrentGC_thread(), 846 "this should only be done by a conc GC thread"); 847 ResourceMark rm; 848 849 double start_vtime = os::elapsedVTime(); 850 851 { 852 SuspendibleThreadSetJoiner sts_join; 853 854 assert(worker_id < _cm->active_tasks(), "invariant"); 855 G1CMTask* the_task = _cm->task(worker_id); 856 the_task->record_start_time(); 857 if (!_cm->has_aborted()) { 858 do { 859 double start_vtime_sec = os::elapsedVTime(); 860 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 861 862 the_task->do_marking_step(mark_step_duration_ms, 863 true /* do_termination */, 864 false /* is_serial*/); 865 866 double end_vtime_sec = os::elapsedVTime(); 867 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 868 _cm->do_yield_check(); 869 870 jlong sleep_time_ms; 871 if (!_cm->has_aborted() && the_task->has_aborted()) { 872 sleep_time_ms = 873 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 874 { 875 SuspendibleThreadSetLeaver sts_leave; 876 os::sleep(Thread::current(), sleep_time_ms, false); 877 } 878 } 879 } while (!_cm->has_aborted() && the_task->has_aborted()); 880 } 881 the_task->record_end_time(); 882 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 883 } 884 885 double end_vtime = os::elapsedVTime(); 886 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 887 } 888 889 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 890 ConcurrentMarkThread* cmt) : 891 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 892 893 ~G1CMConcurrentMarkingTask() { } 894 }; 895 896 // Calculates the number of active workers for a concurrent 897 // phase. 898 uint G1ConcurrentMark::calc_parallel_marking_threads() { 899 uint n_conc_workers = 0; 900 if (!UseDynamicNumberOfGCThreads || 901 (!FLAG_IS_DEFAULT(ConcGCThreads) && 902 !ForceDynamicNumberOfGCThreads)) { 903 n_conc_workers = max_parallel_marking_threads(); 904 } else { 905 n_conc_workers = 906 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 907 1, /* Minimum workers */ 908 parallel_marking_threads(), 909 Threads::number_of_non_daemon_threads()); 910 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 911 // that scaling has already gone into "_max_parallel_marking_threads". 912 } 913 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 914 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 915 max_parallel_marking_threads(), n_conc_workers); 916 return n_conc_workers; 917 } 918 919 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 920 // Currently, only survivors can be root regions. 921 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 922 G1RootRegionScanClosure cl(_g1h, this); 923 924 const uintx interval = PrefetchScanIntervalInBytes; 925 HeapWord* curr = hr->bottom(); 926 const HeapWord* end = hr->top(); 927 while (curr < end) { 928 Prefetch::read(curr, interval); 929 oop obj = oop(curr); 930 int size = obj->oop_iterate_size(&cl); 931 assert(size == obj->size(), "sanity"); 932 curr += size; 933 } 934 } 935 936 class G1CMRootRegionScanTask : public AbstractGangTask { 937 private: 938 G1ConcurrentMark* _cm; 939 940 public: 941 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 942 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 943 944 void work(uint worker_id) { 945 assert(Thread::current()->is_ConcurrentGC_thread(), 946 "this should only be done by a conc GC thread"); 947 948 G1CMRootRegions* root_regions = _cm->root_regions(); 949 HeapRegion* hr = root_regions->claim_next(); 950 while (hr != NULL) { 951 _cm->scanRootRegion(hr); 952 hr = root_regions->claim_next(); 953 } 954 } 955 }; 956 957 void G1ConcurrentMark::scan_root_regions() { 958 // scan_in_progress() will have been set to true only if there was 959 // at least one root region to scan. So, if it's false, we 960 // should not attempt to do any further work. 961 if (root_regions()->scan_in_progress()) { 962 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 963 964 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 965 // We distribute work on a per-region basis, so starting 966 // more threads than that is useless. 967 root_regions()->num_root_regions()); 968 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 969 "Maximum number of marking threads exceeded"); 970 971 G1CMRootRegionScanTask task(this); 972 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 973 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 974 _parallel_workers->run_task(&task, _parallel_marking_threads); 975 976 // It's possible that has_aborted() is true here without actually 977 // aborting the survivor scan earlier. This is OK as it's 978 // mainly used for sanity checking. 979 root_regions()->scan_finished(); 980 } 981 } 982 983 void G1ConcurrentMark::concurrent_cycle_start() { 984 _gc_timer_cm->register_gc_start(); 985 986 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 987 988 _g1h->trace_heap_before_gc(_gc_tracer_cm); 989 } 990 991 void G1ConcurrentMark::concurrent_cycle_end() { 992 _g1h->trace_heap_after_gc(_gc_tracer_cm); 993 994 if (has_aborted()) { 995 _gc_tracer_cm->report_concurrent_mode_failure(); 996 } 997 998 _gc_timer_cm->register_gc_end(); 999 1000 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 1001 } 1002 1003 void G1ConcurrentMark::mark_from_roots() { 1004 // we might be tempted to assert that: 1005 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1006 // "inconsistent argument?"); 1007 // However that wouldn't be right, because it's possible that 1008 // a safepoint is indeed in progress as a younger generation 1009 // stop-the-world GC happens even as we mark in this generation. 1010 1011 _restart_for_overflow = false; 1012 1013 // _g1h has _n_par_threads 1014 _parallel_marking_threads = calc_parallel_marking_threads(); 1015 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1016 "Maximum number of marking threads exceeded"); 1017 1018 uint active_workers = MAX2(1U, parallel_marking_threads()); 1019 assert(active_workers > 0, "Should have been set"); 1020 1021 // Setting active workers is not guaranteed since fewer 1022 // worker threads may currently exist and more may not be 1023 // available. 1024 active_workers = _parallel_workers->update_active_workers(active_workers); 1025 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1026 1027 // Parallel task terminator is set in "set_concurrency_and_phase()" 1028 set_concurrency_and_phase(active_workers, true /* concurrent */); 1029 1030 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1031 _parallel_workers->run_task(&markingTask); 1032 print_stats(); 1033 } 1034 1035 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1036 // world is stopped at this checkpoint 1037 assert(SafepointSynchronize::is_at_safepoint(), 1038 "world should be stopped"); 1039 1040 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1041 1042 // If a full collection has happened, we shouldn't do this. 1043 if (has_aborted()) { 1044 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1045 return; 1046 } 1047 1048 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1049 1050 if (VerifyDuringGC) { 1051 HandleMark hm; // handle scope 1052 g1h->prepare_for_verify(); 1053 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1054 } 1055 g1h->verifier()->check_bitmaps("Remark Start"); 1056 1057 G1Policy* g1p = g1h->g1_policy(); 1058 g1p->record_concurrent_mark_remark_start(); 1059 1060 double start = os::elapsedTime(); 1061 1062 checkpointRootsFinalWork(); 1063 1064 double mark_work_end = os::elapsedTime(); 1065 1066 weakRefsWork(clear_all_soft_refs); 1067 1068 if (has_overflown()) { 1069 // We overflowed. Restart concurrent marking. 1070 _restart_for_overflow = true; 1071 1072 // Verify the heap w.r.t. the previous marking bitmap. 1073 if (VerifyDuringGC) { 1074 HandleMark hm; // handle scope 1075 g1h->prepare_for_verify(); 1076 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1077 } 1078 1079 // Clear the marking state because we will be restarting 1080 // marking due to overflowing the global mark stack. 1081 reset_marking_state(); 1082 } else { 1083 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1084 // We're done with marking. 1085 // This is the end of the marking cycle, we're expected all 1086 // threads to have SATB queues with active set to true. 1087 satb_mq_set.set_active_all_threads(false, /* new active value */ 1088 true /* expected_active */); 1089 1090 if (VerifyDuringGC) { 1091 HandleMark hm; // handle scope 1092 g1h->prepare_for_verify(); 1093 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1094 } 1095 g1h->verifier()->check_bitmaps("Remark End"); 1096 assert(!restart_for_overflow(), "sanity"); 1097 // Completely reset the marking state since marking completed 1098 set_non_marking_state(); 1099 } 1100 1101 // Statistics 1102 double now = os::elapsedTime(); 1103 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1104 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1105 _remark_times.add((now - start) * 1000.0); 1106 1107 g1p->record_concurrent_mark_remark_end(); 1108 1109 G1CMIsAliveClosure is_alive(g1h); 1110 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1111 } 1112 1113 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1114 G1CollectedHeap* _g1; 1115 size_t _freed_bytes; 1116 FreeRegionList* _local_cleanup_list; 1117 uint _old_regions_removed; 1118 uint _humongous_regions_removed; 1119 HRRSCleanupTask* _hrrs_cleanup_task; 1120 1121 public: 1122 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1123 FreeRegionList* local_cleanup_list, 1124 HRRSCleanupTask* hrrs_cleanup_task) : 1125 _g1(g1), 1126 _freed_bytes(0), 1127 _local_cleanup_list(local_cleanup_list), 1128 _old_regions_removed(0), 1129 _humongous_regions_removed(0), 1130 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1131 1132 size_t freed_bytes() { return _freed_bytes; } 1133 const uint old_regions_removed() { return _old_regions_removed; } 1134 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1135 1136 bool doHeapRegion(HeapRegion *hr) { 1137 _g1->reset_gc_time_stamps(hr); 1138 hr->note_end_of_marking(); 1139 1140 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1141 _freed_bytes += hr->used(); 1142 hr->set_containing_set(NULL); 1143 if (hr->is_humongous()) { 1144 _humongous_regions_removed++; 1145 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1146 } else { 1147 _old_regions_removed++; 1148 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1149 } 1150 } else { 1151 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1152 } 1153 1154 return false; 1155 } 1156 }; 1157 1158 class G1ParNoteEndTask: public AbstractGangTask { 1159 friend class G1NoteEndOfConcMarkClosure; 1160 1161 protected: 1162 G1CollectedHeap* _g1h; 1163 FreeRegionList* _cleanup_list; 1164 HeapRegionClaimer _hrclaimer; 1165 1166 public: 1167 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1168 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1169 } 1170 1171 void work(uint worker_id) { 1172 FreeRegionList local_cleanup_list("Local Cleanup List"); 1173 HRRSCleanupTask hrrs_cleanup_task; 1174 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1175 &hrrs_cleanup_task); 1176 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1177 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1178 1179 // Now update the lists 1180 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1181 { 1182 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1183 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1184 1185 // If we iterate over the global cleanup list at the end of 1186 // cleanup to do this printing we will not guarantee to only 1187 // generate output for the newly-reclaimed regions (the list 1188 // might not be empty at the beginning of cleanup; we might 1189 // still be working on its previous contents). So we do the 1190 // printing here, before we append the new regions to the global 1191 // cleanup list. 1192 1193 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1194 if (hr_printer->is_active()) { 1195 FreeRegionListIterator iter(&local_cleanup_list); 1196 while (iter.more_available()) { 1197 HeapRegion* hr = iter.get_next(); 1198 hr_printer->cleanup(hr); 1199 } 1200 } 1201 1202 _cleanup_list->add_ordered(&local_cleanup_list); 1203 assert(local_cleanup_list.is_empty(), "post-condition"); 1204 1205 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1206 } 1207 } 1208 }; 1209 1210 void G1ConcurrentMark::cleanup() { 1211 // world is stopped at this checkpoint 1212 assert(SafepointSynchronize::is_at_safepoint(), 1213 "world should be stopped"); 1214 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1215 1216 // If a full collection has happened, we shouldn't do this. 1217 if (has_aborted()) { 1218 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1219 return; 1220 } 1221 1222 g1h->verifier()->verify_region_sets_optional(); 1223 1224 if (VerifyDuringGC) { 1225 HandleMark hm; // handle scope 1226 g1h->prepare_for_verify(); 1227 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1228 } 1229 g1h->verifier()->check_bitmaps("Cleanup Start"); 1230 1231 G1Policy* g1p = g1h->g1_policy(); 1232 g1p->record_concurrent_mark_cleanup_start(); 1233 1234 double start = os::elapsedTime(); 1235 1236 HeapRegionRemSet::reset_for_cleanup_tasks(); 1237 1238 { 1239 GCTraceTime(Debug, gc)("Finalize Live Data"); 1240 finalize_live_data(); 1241 } 1242 1243 if (VerifyDuringGC) { 1244 GCTraceTime(Debug, gc)("Verify Live Data"); 1245 verify_live_data(); 1246 } 1247 1248 g1h->collector_state()->set_mark_in_progress(false); 1249 1250 double count_end = os::elapsedTime(); 1251 double this_final_counting_time = (count_end - start); 1252 _total_counting_time += this_final_counting_time; 1253 1254 if (log_is_enabled(Trace, gc, liveness)) { 1255 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1256 _g1h->heap_region_iterate(&cl); 1257 } 1258 1259 // Install newly created mark bitMap as "prev". 1260 swapMarkBitMaps(); 1261 1262 g1h->reset_gc_time_stamp(); 1263 1264 uint n_workers = _g1h->workers()->active_workers(); 1265 1266 // Note end of marking in all heap regions. 1267 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1268 g1h->workers()->run_task(&g1_par_note_end_task); 1269 g1h->check_gc_time_stamps(); 1270 1271 if (!cleanup_list_is_empty()) { 1272 // The cleanup list is not empty, so we'll have to process it 1273 // concurrently. Notify anyone else that might be wanting free 1274 // regions that there will be more free regions coming soon. 1275 g1h->set_free_regions_coming(); 1276 } 1277 1278 // call below, since it affects the metric by which we sort the heap 1279 // regions. 1280 if (G1ScrubRemSets) { 1281 double rs_scrub_start = os::elapsedTime(); 1282 g1h->scrub_rem_set(); 1283 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1284 } 1285 1286 // this will also free any regions totally full of garbage objects, 1287 // and sort the regions. 1288 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1289 1290 // Statistics. 1291 double end = os::elapsedTime(); 1292 _cleanup_times.add((end - start) * 1000.0); 1293 1294 // Clean up will have freed any regions completely full of garbage. 1295 // Update the soft reference policy with the new heap occupancy. 1296 Universe::update_heap_info_at_gc(); 1297 1298 if (VerifyDuringGC) { 1299 HandleMark hm; // handle scope 1300 g1h->prepare_for_verify(); 1301 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1302 } 1303 1304 g1h->verifier()->check_bitmaps("Cleanup End"); 1305 1306 g1h->verifier()->verify_region_sets_optional(); 1307 1308 // We need to make this be a "collection" so any collection pause that 1309 // races with it goes around and waits for completeCleanup to finish. 1310 g1h->increment_total_collections(); 1311 1312 // Clean out dead classes and update Metaspace sizes. 1313 if (ClassUnloadingWithConcurrentMark) { 1314 ClassLoaderDataGraph::purge(); 1315 } 1316 MetaspaceGC::compute_new_size(); 1317 1318 // We reclaimed old regions so we should calculate the sizes to make 1319 // sure we update the old gen/space data. 1320 g1h->g1mm()->update_sizes(); 1321 g1h->allocation_context_stats().update_after_mark(); 1322 } 1323 1324 void G1ConcurrentMark::complete_cleanup() { 1325 if (has_aborted()) return; 1326 1327 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1328 1329 _cleanup_list.verify_optional(); 1330 FreeRegionList tmp_free_list("Tmp Free List"); 1331 1332 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1333 "cleanup list has %u entries", 1334 _cleanup_list.length()); 1335 1336 // No one else should be accessing the _cleanup_list at this point, 1337 // so it is not necessary to take any locks 1338 while (!_cleanup_list.is_empty()) { 1339 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1340 assert(hr != NULL, "Got NULL from a non-empty list"); 1341 hr->par_clear(); 1342 tmp_free_list.add_ordered(hr); 1343 1344 // Instead of adding one region at a time to the secondary_free_list, 1345 // we accumulate them in the local list and move them a few at a 1346 // time. This also cuts down on the number of notify_all() calls 1347 // we do during this process. We'll also append the local list when 1348 // _cleanup_list is empty (which means we just removed the last 1349 // region from the _cleanup_list). 1350 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1351 _cleanup_list.is_empty()) { 1352 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1353 "appending %u entries to the secondary_free_list, " 1354 "cleanup list still has %u entries", 1355 tmp_free_list.length(), 1356 _cleanup_list.length()); 1357 1358 { 1359 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1360 g1h->secondary_free_list_add(&tmp_free_list); 1361 SecondaryFreeList_lock->notify_all(); 1362 } 1363 #ifndef PRODUCT 1364 if (G1StressConcRegionFreeing) { 1365 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1366 os::sleep(Thread::current(), (jlong) 1, false); 1367 } 1368 } 1369 #endif 1370 } 1371 } 1372 assert(tmp_free_list.is_empty(), "post-condition"); 1373 } 1374 1375 // Supporting Object and Oop closures for reference discovery 1376 // and processing in during marking 1377 1378 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1379 HeapWord* addr = (HeapWord*)obj; 1380 return addr != NULL && 1381 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1382 } 1383 1384 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1385 // Uses the G1CMTask associated with a worker thread (for serial reference 1386 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1387 // trace referent objects. 1388 // 1389 // Using the G1CMTask and embedded local queues avoids having the worker 1390 // threads operating on the global mark stack. This reduces the risk 1391 // of overflowing the stack - which we would rather avoid at this late 1392 // state. Also using the tasks' local queues removes the potential 1393 // of the workers interfering with each other that could occur if 1394 // operating on the global stack. 1395 1396 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1397 G1ConcurrentMark* _cm; 1398 G1CMTask* _task; 1399 int _ref_counter_limit; 1400 int _ref_counter; 1401 bool _is_serial; 1402 public: 1403 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1404 _cm(cm), _task(task), _is_serial(is_serial), 1405 _ref_counter_limit(G1RefProcDrainInterval) { 1406 assert(_ref_counter_limit > 0, "sanity"); 1407 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1408 _ref_counter = _ref_counter_limit; 1409 } 1410 1411 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1412 virtual void do_oop( oop* p) { do_oop_work(p); } 1413 1414 template <class T> void do_oop_work(T* p) { 1415 if (!_cm->has_overflown()) { 1416 oop obj = oopDesc::load_decode_heap_oop(p); 1417 _task->deal_with_reference(obj); 1418 _ref_counter--; 1419 1420 if (_ref_counter == 0) { 1421 // We have dealt with _ref_counter_limit references, pushing them 1422 // and objects reachable from them on to the local stack (and 1423 // possibly the global stack). Call G1CMTask::do_marking_step() to 1424 // process these entries. 1425 // 1426 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1427 // there's nothing more to do (i.e. we're done with the entries that 1428 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1429 // above) or we overflow. 1430 // 1431 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1432 // flag while there may still be some work to do. (See the comment at 1433 // the beginning of G1CMTask::do_marking_step() for those conditions - 1434 // one of which is reaching the specified time target.) It is only 1435 // when G1CMTask::do_marking_step() returns without setting the 1436 // has_aborted() flag that the marking step has completed. 1437 do { 1438 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1439 _task->do_marking_step(mark_step_duration_ms, 1440 false /* do_termination */, 1441 _is_serial); 1442 } while (_task->has_aborted() && !_cm->has_overflown()); 1443 _ref_counter = _ref_counter_limit; 1444 } 1445 } 1446 } 1447 }; 1448 1449 // 'Drain' oop closure used by both serial and parallel reference processing. 1450 // Uses the G1CMTask associated with a given worker thread (for serial 1451 // reference processing the G1CMtask for worker 0 is used). Calls the 1452 // do_marking_step routine, with an unbelievably large timeout value, 1453 // to drain the marking data structures of the remaining entries 1454 // added by the 'keep alive' oop closure above. 1455 1456 class G1CMDrainMarkingStackClosure: public VoidClosure { 1457 G1ConcurrentMark* _cm; 1458 G1CMTask* _task; 1459 bool _is_serial; 1460 public: 1461 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1462 _cm(cm), _task(task), _is_serial(is_serial) { 1463 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1464 } 1465 1466 void do_void() { 1467 do { 1468 // We call G1CMTask::do_marking_step() to completely drain the local 1469 // and global marking stacks of entries pushed by the 'keep alive' 1470 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1471 // 1472 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1473 // if there's nothing more to do (i.e. we've completely drained the 1474 // entries that were pushed as a a result of applying the 'keep alive' 1475 // closure to the entries on the discovered ref lists) or we overflow 1476 // the global marking stack. 1477 // 1478 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1479 // flag while there may still be some work to do. (See the comment at 1480 // the beginning of G1CMTask::do_marking_step() for those conditions - 1481 // one of which is reaching the specified time target.) It is only 1482 // when G1CMTask::do_marking_step() returns without setting the 1483 // has_aborted() flag that the marking step has completed. 1484 1485 _task->do_marking_step(1000000000.0 /* something very large */, 1486 true /* do_termination */, 1487 _is_serial); 1488 } while (_task->has_aborted() && !_cm->has_overflown()); 1489 } 1490 }; 1491 1492 // Implementation of AbstractRefProcTaskExecutor for parallel 1493 // reference processing at the end of G1 concurrent marking 1494 1495 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1496 private: 1497 G1CollectedHeap* _g1h; 1498 G1ConcurrentMark* _cm; 1499 WorkGang* _workers; 1500 uint _active_workers; 1501 1502 public: 1503 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1504 G1ConcurrentMark* cm, 1505 WorkGang* workers, 1506 uint n_workers) : 1507 _g1h(g1h), _cm(cm), 1508 _workers(workers), _active_workers(n_workers) { } 1509 1510 // Executes the given task using concurrent marking worker threads. 1511 virtual void execute(ProcessTask& task); 1512 virtual void execute(EnqueueTask& task); 1513 }; 1514 1515 class G1CMRefProcTaskProxy: public AbstractGangTask { 1516 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1517 ProcessTask& _proc_task; 1518 G1CollectedHeap* _g1h; 1519 G1ConcurrentMark* _cm; 1520 1521 public: 1522 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1523 G1CollectedHeap* g1h, 1524 G1ConcurrentMark* cm) : 1525 AbstractGangTask("Process reference objects in parallel"), 1526 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1527 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1528 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1529 } 1530 1531 virtual void work(uint worker_id) { 1532 ResourceMark rm; 1533 HandleMark hm; 1534 G1CMTask* task = _cm->task(worker_id); 1535 G1CMIsAliveClosure g1_is_alive(_g1h); 1536 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1537 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1538 1539 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1540 } 1541 }; 1542 1543 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1544 assert(_workers != NULL, "Need parallel worker threads."); 1545 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1546 1547 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1548 1549 // We need to reset the concurrency level before each 1550 // proxy task execution, so that the termination protocol 1551 // and overflow handling in G1CMTask::do_marking_step() knows 1552 // how many workers to wait for. 1553 _cm->set_concurrency(_active_workers); 1554 _workers->run_task(&proc_task_proxy); 1555 } 1556 1557 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1558 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1559 EnqueueTask& _enq_task; 1560 1561 public: 1562 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1563 AbstractGangTask("Enqueue reference objects in parallel"), 1564 _enq_task(enq_task) { } 1565 1566 virtual void work(uint worker_id) { 1567 _enq_task.work(worker_id); 1568 } 1569 }; 1570 1571 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1572 assert(_workers != NULL, "Need parallel worker threads."); 1573 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1574 1575 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1576 1577 // Not strictly necessary but... 1578 // 1579 // We need to reset the concurrency level before each 1580 // proxy task execution, so that the termination protocol 1581 // and overflow handling in G1CMTask::do_marking_step() knows 1582 // how many workers to wait for. 1583 _cm->set_concurrency(_active_workers); 1584 _workers->run_task(&enq_task_proxy); 1585 } 1586 1587 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1588 if (has_overflown()) { 1589 // Skip processing the discovered references if we have 1590 // overflown the global marking stack. Reference objects 1591 // only get discovered once so it is OK to not 1592 // de-populate the discovered reference lists. We could have, 1593 // but the only benefit would be that, when marking restarts, 1594 // less reference objects are discovered. 1595 return; 1596 } 1597 1598 ResourceMark rm; 1599 HandleMark hm; 1600 1601 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1602 1603 // Is alive closure. 1604 G1CMIsAliveClosure g1_is_alive(g1h); 1605 1606 // Inner scope to exclude the cleaning of the string and symbol 1607 // tables from the displayed time. 1608 { 1609 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1610 1611 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1612 1613 // See the comment in G1CollectedHeap::ref_processing_init() 1614 // about how reference processing currently works in G1. 1615 1616 // Set the soft reference policy 1617 rp->setup_policy(clear_all_soft_refs); 1618 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1619 1620 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1621 // in serial reference processing. Note these closures are also 1622 // used for serially processing (by the the current thread) the 1623 // JNI references during parallel reference processing. 1624 // 1625 // These closures do not need to synchronize with the worker 1626 // threads involved in parallel reference processing as these 1627 // instances are executed serially by the current thread (e.g. 1628 // reference processing is not multi-threaded and is thus 1629 // performed by the current thread instead of a gang worker). 1630 // 1631 // The gang tasks involved in parallel reference processing create 1632 // their own instances of these closures, which do their own 1633 // synchronization among themselves. 1634 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1635 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1636 1637 // We need at least one active thread. If reference processing 1638 // is not multi-threaded we use the current (VMThread) thread, 1639 // otherwise we use the work gang from the G1CollectedHeap and 1640 // we utilize all the worker threads we can. 1641 bool processing_is_mt = rp->processing_is_mt(); 1642 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1643 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1644 1645 // Parallel processing task executor. 1646 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1647 g1h->workers(), active_workers); 1648 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1649 1650 // Set the concurrency level. The phase was already set prior to 1651 // executing the remark task. 1652 set_concurrency(active_workers); 1653 1654 // Set the degree of MT processing here. If the discovery was done MT, 1655 // the number of threads involved during discovery could differ from 1656 // the number of active workers. This is OK as long as the discovered 1657 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1658 rp->set_active_mt_degree(active_workers); 1659 1660 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1661 1662 // Process the weak references. 1663 const ReferenceProcessorStats& stats = 1664 rp->process_discovered_references(&g1_is_alive, 1665 &g1_keep_alive, 1666 &g1_drain_mark_stack, 1667 executor, 1668 &pt); 1669 _gc_tracer_cm->report_gc_reference_stats(stats); 1670 pt.print_all_references(); 1671 1672 // The do_oop work routines of the keep_alive and drain_marking_stack 1673 // oop closures will set the has_overflown flag if we overflow the 1674 // global marking stack. 1675 1676 assert(has_overflown() || _global_mark_stack.is_empty(), 1677 "Mark stack should be empty (unless it has overflown)"); 1678 1679 assert(rp->num_q() == active_workers, "why not"); 1680 1681 rp->enqueue_discovered_references(executor, &pt); 1682 1683 rp->verify_no_references_recorded(); 1684 1685 pt.print_enqueue_phase(); 1686 1687 assert(!rp->discovery_enabled(), "Post condition"); 1688 } 1689 1690 if (has_overflown()) { 1691 // We can not trust g1_is_alive if the marking stack overflowed 1692 return; 1693 } 1694 1695 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1696 1697 // Unload Klasses, String, Symbols, Code Cache, etc. 1698 if (ClassUnloadingWithConcurrentMark) { 1699 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1700 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1701 g1h->complete_cleaning(&g1_is_alive, purged_classes); 1702 } else { 1703 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1704 // No need to clean string table and symbol table as they are treated as strong roots when 1705 // class unloading is disabled. 1706 g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1707 1708 } 1709 } 1710 1711 void G1ConcurrentMark::swapMarkBitMaps() { 1712 G1CMBitMap* temp = _prevMarkBitMap; 1713 _prevMarkBitMap = _nextMarkBitMap; 1714 _nextMarkBitMap = temp; 1715 } 1716 1717 // Closure for marking entries in SATB buffers. 1718 class G1CMSATBBufferClosure : public SATBBufferClosure { 1719 private: 1720 G1CMTask* _task; 1721 G1CollectedHeap* _g1h; 1722 1723 // This is very similar to G1CMTask::deal_with_reference, but with 1724 // more relaxed requirements for the argument, so this must be more 1725 // circumspect about treating the argument as an object. 1726 void do_entry(void* entry) const { 1727 _task->increment_refs_reached(); 1728 oop const obj = static_cast<oop>(entry); 1729 _task->make_reference_grey(obj); 1730 } 1731 1732 public: 1733 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1734 : _task(task), _g1h(g1h) { } 1735 1736 virtual void do_buffer(void** buffer, size_t size) { 1737 for (size_t i = 0; i < size; ++i) { 1738 do_entry(buffer[i]); 1739 } 1740 } 1741 }; 1742 1743 class G1RemarkThreadsClosure : public ThreadClosure { 1744 G1CMSATBBufferClosure _cm_satb_cl; 1745 G1CMOopClosure _cm_cl; 1746 MarkingCodeBlobClosure _code_cl; 1747 int _thread_parity; 1748 1749 public: 1750 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1751 _cm_satb_cl(task, g1h), 1752 _cm_cl(g1h, g1h->concurrent_mark(), task), 1753 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1754 _thread_parity(Threads::thread_claim_parity()) {} 1755 1756 void do_thread(Thread* thread) { 1757 if (thread->is_Java_thread()) { 1758 if (thread->claim_oops_do(true, _thread_parity)) { 1759 JavaThread* jt = (JavaThread*)thread; 1760 1761 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1762 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1763 // * Alive if on the stack of an executing method 1764 // * Weakly reachable otherwise 1765 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1766 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1767 jt->nmethods_do(&_code_cl); 1768 1769 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1770 } 1771 } else if (thread->is_VM_thread()) { 1772 if (thread->claim_oops_do(true, _thread_parity)) { 1773 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1774 } 1775 } 1776 } 1777 }; 1778 1779 class G1CMRemarkTask: public AbstractGangTask { 1780 private: 1781 G1ConcurrentMark* _cm; 1782 public: 1783 void work(uint worker_id) { 1784 // Since all available tasks are actually started, we should 1785 // only proceed if we're supposed to be active. 1786 if (worker_id < _cm->active_tasks()) { 1787 G1CMTask* task = _cm->task(worker_id); 1788 task->record_start_time(); 1789 { 1790 ResourceMark rm; 1791 HandleMark hm; 1792 1793 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1794 Threads::threads_do(&threads_f); 1795 } 1796 1797 do { 1798 task->do_marking_step(1000000000.0 /* something very large */, 1799 true /* do_termination */, 1800 false /* is_serial */); 1801 } while (task->has_aborted() && !_cm->has_overflown()); 1802 // If we overflow, then we do not want to restart. We instead 1803 // want to abort remark and do concurrent marking again. 1804 task->record_end_time(); 1805 } 1806 } 1807 1808 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1809 AbstractGangTask("Par Remark"), _cm(cm) { 1810 _cm->terminator()->reset_for_reuse(active_workers); 1811 } 1812 }; 1813 1814 void G1ConcurrentMark::checkpointRootsFinalWork() { 1815 ResourceMark rm; 1816 HandleMark hm; 1817 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1818 1819 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1820 1821 g1h->ensure_parsability(false); 1822 1823 // this is remark, so we'll use up all active threads 1824 uint active_workers = g1h->workers()->active_workers(); 1825 set_concurrency_and_phase(active_workers, false /* concurrent */); 1826 // Leave _parallel_marking_threads at it's 1827 // value originally calculated in the G1ConcurrentMark 1828 // constructor and pass values of the active workers 1829 // through the gang in the task. 1830 1831 { 1832 StrongRootsScope srs(active_workers); 1833 1834 G1CMRemarkTask remarkTask(this, active_workers); 1835 // We will start all available threads, even if we decide that the 1836 // active_workers will be fewer. The extra ones will just bail out 1837 // immediately. 1838 g1h->workers()->run_task(&remarkTask); 1839 } 1840 1841 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1842 guarantee(has_overflown() || 1843 satb_mq_set.completed_buffers_num() == 0, 1844 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1845 BOOL_TO_STR(has_overflown()), 1846 satb_mq_set.completed_buffers_num()); 1847 1848 print_stats(); 1849 } 1850 1851 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1852 _prevMarkBitMap->clear_range(mr); 1853 } 1854 1855 HeapRegion* 1856 G1ConcurrentMark::claim_region(uint worker_id) { 1857 // "checkpoint" the finger 1858 HeapWord* finger = _finger; 1859 1860 // _heap_end will not change underneath our feet; it only changes at 1861 // yield points. 1862 while (finger < _heap_end) { 1863 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1864 1865 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1866 // Make sure that the reads below do not float before loading curr_region. 1867 OrderAccess::loadload(); 1868 // Above heap_region_containing may return NULL as we always scan claim 1869 // until the end of the heap. In this case, just jump to the next region. 1870 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1871 1872 // Is the gap between reading the finger and doing the CAS too long? 1873 HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); 1874 if (res == finger && curr_region != NULL) { 1875 // we succeeded 1876 HeapWord* bottom = curr_region->bottom(); 1877 HeapWord* limit = curr_region->next_top_at_mark_start(); 1878 1879 // notice that _finger == end cannot be guaranteed here since, 1880 // someone else might have moved the finger even further 1881 assert(_finger >= end, "the finger should have moved forward"); 1882 1883 if (limit > bottom) { 1884 return curr_region; 1885 } else { 1886 assert(limit == bottom, 1887 "the region limit should be at bottom"); 1888 // we return NULL and the caller should try calling 1889 // claim_region() again. 1890 return NULL; 1891 } 1892 } else { 1893 assert(_finger > finger, "the finger should have moved forward"); 1894 // read it again 1895 finger = _finger; 1896 } 1897 } 1898 1899 return NULL; 1900 } 1901 1902 #ifndef PRODUCT 1903 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1904 private: 1905 G1CollectedHeap* _g1h; 1906 const char* _phase; 1907 int _info; 1908 1909 public: 1910 VerifyNoCSetOops(const char* phase, int info = -1) : 1911 _g1h(G1CollectedHeap::heap()), 1912 _phase(phase), 1913 _info(info) 1914 { } 1915 1916 void operator()(G1TaskQueueEntry task_entry) const { 1917 if (task_entry.is_array_slice()) { 1918 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1919 return; 1920 } 1921 guarantee(oopDesc::is_oop(task_entry.obj()), 1922 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1923 p2i(task_entry.obj()), _phase, _info); 1924 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1925 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1926 p2i(task_entry.obj()), _phase, _info); 1927 } 1928 }; 1929 1930 void G1ConcurrentMark::verify_no_cset_oops() { 1931 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1932 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 1933 return; 1934 } 1935 1936 // Verify entries on the global mark stack 1937 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1938 1939 // Verify entries on the task queues 1940 for (uint i = 0; i < _max_worker_id; ++i) { 1941 G1CMTaskQueue* queue = _task_queues->queue(i); 1942 queue->iterate(VerifyNoCSetOops("Queue", i)); 1943 } 1944 1945 // Verify the global finger 1946 HeapWord* global_finger = finger(); 1947 if (global_finger != NULL && global_finger < _heap_end) { 1948 // Since we always iterate over all regions, we might get a NULL HeapRegion 1949 // here. 1950 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1951 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1952 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1953 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1954 } 1955 1956 // Verify the task fingers 1957 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 1958 for (uint i = 0; i < parallel_marking_threads(); ++i) { 1959 G1CMTask* task = _tasks[i]; 1960 HeapWord* task_finger = task->finger(); 1961 if (task_finger != NULL && task_finger < _heap_end) { 1962 // See above note on the global finger verification. 1963 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1964 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1965 !task_hr->in_collection_set(), 1966 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1967 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1968 } 1969 } 1970 } 1971 #endif // PRODUCT 1972 void G1ConcurrentMark::create_live_data() { 1973 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 1974 } 1975 1976 void G1ConcurrentMark::finalize_live_data() { 1977 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 1978 } 1979 1980 void G1ConcurrentMark::verify_live_data() { 1981 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 1982 } 1983 1984 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 1985 _g1h->g1_rem_set()->clear_card_live_data(workers); 1986 } 1987 1988 #ifdef ASSERT 1989 void G1ConcurrentMark::verify_live_data_clear() { 1990 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 1991 } 1992 #endif 1993 1994 void G1ConcurrentMark::print_stats() { 1995 if (!log_is_enabled(Debug, gc, stats)) { 1996 return; 1997 } 1998 log_debug(gc, stats)("---------------------------------------------------------------------"); 1999 for (size_t i = 0; i < _active_tasks; ++i) { 2000 _tasks[i]->print_stats(); 2001 log_debug(gc, stats)("---------------------------------------------------------------------"); 2002 } 2003 } 2004 2005 void G1ConcurrentMark::abort() { 2006 if (!cmThread()->during_cycle() || _has_aborted) { 2007 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2008 return; 2009 } 2010 2011 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2012 // concurrent bitmap clearing. 2013 { 2014 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2015 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2016 } 2017 // Note we cannot clear the previous marking bitmap here 2018 // since VerifyDuringGC verifies the objects marked during 2019 // a full GC against the previous bitmap. 2020 2021 { 2022 GCTraceTime(Debug, gc)("Clear Live Data"); 2023 clear_live_data(_g1h->workers()); 2024 } 2025 DEBUG_ONLY({ 2026 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2027 verify_live_data_clear(); 2028 }) 2029 // Empty mark stack 2030 reset_marking_state(); 2031 for (uint i = 0; i < _max_worker_id; ++i) { 2032 _tasks[i]->clear_region_fields(); 2033 } 2034 _first_overflow_barrier_sync.abort(); 2035 _second_overflow_barrier_sync.abort(); 2036 _has_aborted = true; 2037 2038 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2039 satb_mq_set.abandon_partial_marking(); 2040 // This can be called either during or outside marking, we'll read 2041 // the expected_active value from the SATB queue set. 2042 satb_mq_set.set_active_all_threads( 2043 false, /* new active value */ 2044 satb_mq_set.is_active() /* expected_active */); 2045 } 2046 2047 static void print_ms_time_info(const char* prefix, const char* name, 2048 NumberSeq& ns) { 2049 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2050 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2051 if (ns.num() > 0) { 2052 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2053 prefix, ns.sd(), ns.maximum()); 2054 } 2055 } 2056 2057 void G1ConcurrentMark::print_summary_info() { 2058 Log(gc, marking) log; 2059 if (!log.is_trace()) { 2060 return; 2061 } 2062 2063 log.trace(" Concurrent marking:"); 2064 print_ms_time_info(" ", "init marks", _init_times); 2065 print_ms_time_info(" ", "remarks", _remark_times); 2066 { 2067 print_ms_time_info(" ", "final marks", _remark_mark_times); 2068 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2069 2070 } 2071 print_ms_time_info(" ", "cleanups", _cleanup_times); 2072 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2073 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2074 if (G1ScrubRemSets) { 2075 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2076 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2077 } 2078 log.trace(" Total stop_world time = %8.2f s.", 2079 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2080 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2081 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2082 } 2083 2084 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2085 _parallel_workers->print_worker_threads_on(st); 2086 } 2087 2088 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2089 _parallel_workers->threads_do(tc); 2090 } 2091 2092 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2093 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2094 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2095 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2096 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2097 } 2098 2099 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2100 ReferenceProcessor* result = g1h->ref_processor_cm(); 2101 assert(result != NULL, "CM reference processor should not be NULL"); 2102 return result; 2103 } 2104 2105 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2106 G1ConcurrentMark* cm, 2107 G1CMTask* task) 2108 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2109 _g1h(g1h), _cm(cm), _task(task) 2110 { } 2111 2112 void G1CMTask::setup_for_region(HeapRegion* hr) { 2113 assert(hr != NULL, 2114 "claim_region() should have filtered out NULL regions"); 2115 _curr_region = hr; 2116 _finger = hr->bottom(); 2117 update_region_limit(); 2118 } 2119 2120 void G1CMTask::update_region_limit() { 2121 HeapRegion* hr = _curr_region; 2122 HeapWord* bottom = hr->bottom(); 2123 HeapWord* limit = hr->next_top_at_mark_start(); 2124 2125 if (limit == bottom) { 2126 // The region was collected underneath our feet. 2127 // We set the finger to bottom to ensure that the bitmap 2128 // iteration that will follow this will not do anything. 2129 // (this is not a condition that holds when we set the region up, 2130 // as the region is not supposed to be empty in the first place) 2131 _finger = bottom; 2132 } else if (limit >= _region_limit) { 2133 assert(limit >= _finger, "peace of mind"); 2134 } else { 2135 assert(limit < _region_limit, "only way to get here"); 2136 // This can happen under some pretty unusual circumstances. An 2137 // evacuation pause empties the region underneath our feet (NTAMS 2138 // at bottom). We then do some allocation in the region (NTAMS 2139 // stays at bottom), followed by the region being used as a GC 2140 // alloc region (NTAMS will move to top() and the objects 2141 // originally below it will be grayed). All objects now marked in 2142 // the region are explicitly grayed, if below the global finger, 2143 // and we do not need in fact to scan anything else. So, we simply 2144 // set _finger to be limit to ensure that the bitmap iteration 2145 // doesn't do anything. 2146 _finger = limit; 2147 } 2148 2149 _region_limit = limit; 2150 } 2151 2152 void G1CMTask::giveup_current_region() { 2153 assert(_curr_region != NULL, "invariant"); 2154 clear_region_fields(); 2155 } 2156 2157 void G1CMTask::clear_region_fields() { 2158 // Values for these three fields that indicate that we're not 2159 // holding on to a region. 2160 _curr_region = NULL; 2161 _finger = NULL; 2162 _region_limit = NULL; 2163 } 2164 2165 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2166 if (cm_oop_closure == NULL) { 2167 assert(_cm_oop_closure != NULL, "invariant"); 2168 } else { 2169 assert(_cm_oop_closure == NULL, "invariant"); 2170 } 2171 _cm_oop_closure = cm_oop_closure; 2172 } 2173 2174 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2175 guarantee(nextMarkBitMap != NULL, "invariant"); 2176 _nextMarkBitMap = nextMarkBitMap; 2177 clear_region_fields(); 2178 2179 _calls = 0; 2180 _elapsed_time_ms = 0.0; 2181 _termination_time_ms = 0.0; 2182 _termination_start_time_ms = 0.0; 2183 } 2184 2185 bool G1CMTask::should_exit_termination() { 2186 regular_clock_call(); 2187 // This is called when we are in the termination protocol. We should 2188 // quit if, for some reason, this task wants to abort or the global 2189 // stack is not empty (this means that we can get work from it). 2190 return !_cm->mark_stack_empty() || has_aborted(); 2191 } 2192 2193 void G1CMTask::reached_limit() { 2194 assert(_words_scanned >= _words_scanned_limit || 2195 _refs_reached >= _refs_reached_limit , 2196 "shouldn't have been called otherwise"); 2197 regular_clock_call(); 2198 } 2199 2200 void G1CMTask::regular_clock_call() { 2201 if (has_aborted()) return; 2202 2203 // First, we need to recalculate the words scanned and refs reached 2204 // limits for the next clock call. 2205 recalculate_limits(); 2206 2207 // During the regular clock call we do the following 2208 2209 // (1) If an overflow has been flagged, then we abort. 2210 if (_cm->has_overflown()) { 2211 set_has_aborted(); 2212 return; 2213 } 2214 2215 // If we are not concurrent (i.e. we're doing remark) we don't need 2216 // to check anything else. The other steps are only needed during 2217 // the concurrent marking phase. 2218 if (!concurrent()) return; 2219 2220 // (2) If marking has been aborted for Full GC, then we also abort. 2221 if (_cm->has_aborted()) { 2222 set_has_aborted(); 2223 return; 2224 } 2225 2226 double curr_time_ms = os::elapsedVTime() * 1000.0; 2227 2228 // (4) We check whether we should yield. If we have to, then we abort. 2229 if (SuspendibleThreadSet::should_yield()) { 2230 // We should yield. To do this we abort the task. The caller is 2231 // responsible for yielding. 2232 set_has_aborted(); 2233 return; 2234 } 2235 2236 // (5) We check whether we've reached our time quota. If we have, 2237 // then we abort. 2238 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2239 if (elapsed_time_ms > _time_target_ms) { 2240 set_has_aborted(); 2241 _has_timed_out = true; 2242 return; 2243 } 2244 2245 // (6) Finally, we check whether there are enough completed STAB 2246 // buffers available for processing. If there are, we abort. 2247 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2248 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2249 // we do need to process SATB buffers, we'll abort and restart 2250 // the marking task to do so 2251 set_has_aborted(); 2252 return; 2253 } 2254 } 2255 2256 void G1CMTask::recalculate_limits() { 2257 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2258 _words_scanned_limit = _real_words_scanned_limit; 2259 2260 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2261 _refs_reached_limit = _real_refs_reached_limit; 2262 } 2263 2264 void G1CMTask::decrease_limits() { 2265 // This is called when we believe that we're going to do an infrequent 2266 // operation which will increase the per byte scanned cost (i.e. move 2267 // entries to/from the global stack). It basically tries to decrease the 2268 // scanning limit so that the clock is called earlier. 2269 2270 _words_scanned_limit = _real_words_scanned_limit - 2271 3 * words_scanned_period / 4; 2272 _refs_reached_limit = _real_refs_reached_limit - 2273 3 * refs_reached_period / 4; 2274 } 2275 2276 void G1CMTask::move_entries_to_global_stack() { 2277 // Local array where we'll store the entries that will be popped 2278 // from the local queue. 2279 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2280 2281 size_t n = 0; 2282 G1TaskQueueEntry task_entry; 2283 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2284 buffer[n] = task_entry; 2285 ++n; 2286 } 2287 if (n < G1CMMarkStack::EntriesPerChunk) { 2288 buffer[n] = G1TaskQueueEntry(); 2289 } 2290 2291 if (n > 0) { 2292 if (!_cm->mark_stack_push(buffer)) { 2293 set_has_aborted(); 2294 } 2295 } 2296 2297 // This operation was quite expensive, so decrease the limits. 2298 decrease_limits(); 2299 } 2300 2301 bool G1CMTask::get_entries_from_global_stack() { 2302 // Local array where we'll store the entries that will be popped 2303 // from the global stack. 2304 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2305 2306 if (!_cm->mark_stack_pop(buffer)) { 2307 return false; 2308 } 2309 2310 // We did actually pop at least one entry. 2311 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2312 G1TaskQueueEntry task_entry = buffer[i]; 2313 if (task_entry.is_null()) { 2314 break; 2315 } 2316 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2317 bool success = _task_queue->push(task_entry); 2318 // We only call this when the local queue is empty or under a 2319 // given target limit. So, we do not expect this push to fail. 2320 assert(success, "invariant"); 2321 } 2322 2323 // This operation was quite expensive, so decrease the limits 2324 decrease_limits(); 2325 return true; 2326 } 2327 2328 void G1CMTask::drain_local_queue(bool partially) { 2329 if (has_aborted()) { 2330 return; 2331 } 2332 2333 // Decide what the target size is, depending whether we're going to 2334 // drain it partially (so that other tasks can steal if they run out 2335 // of things to do) or totally (at the very end). 2336 size_t target_size; 2337 if (partially) { 2338 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2339 } else { 2340 target_size = 0; 2341 } 2342 2343 if (_task_queue->size() > target_size) { 2344 G1TaskQueueEntry entry; 2345 bool ret = _task_queue->pop_local(entry); 2346 while (ret) { 2347 scan_task_entry(entry); 2348 if (_task_queue->size() <= target_size || has_aborted()) { 2349 ret = false; 2350 } else { 2351 ret = _task_queue->pop_local(entry); 2352 } 2353 } 2354 } 2355 } 2356 2357 void G1CMTask::drain_global_stack(bool partially) { 2358 if (has_aborted()) return; 2359 2360 // We have a policy to drain the local queue before we attempt to 2361 // drain the global stack. 2362 assert(partially || _task_queue->size() == 0, "invariant"); 2363 2364 // Decide what the target size is, depending whether we're going to 2365 // drain it partially (so that other tasks can steal if they run out 2366 // of things to do) or totally (at the very end). 2367 // Notice that when draining the global mark stack partially, due to the racyness 2368 // of the mark stack size update we might in fact drop below the target. But, 2369 // this is not a problem. 2370 // In case of total draining, we simply process until the global mark stack is 2371 // totally empty, disregarding the size counter. 2372 if (partially) { 2373 size_t const target_size = _cm->partial_mark_stack_size_target(); 2374 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2375 if (get_entries_from_global_stack()) { 2376 drain_local_queue(partially); 2377 } 2378 } 2379 } else { 2380 while (!has_aborted() && get_entries_from_global_stack()) { 2381 drain_local_queue(partially); 2382 } 2383 } 2384 } 2385 2386 // SATB Queue has several assumptions on whether to call the par or 2387 // non-par versions of the methods. this is why some of the code is 2388 // replicated. We should really get rid of the single-threaded version 2389 // of the code to simplify things. 2390 void G1CMTask::drain_satb_buffers() { 2391 if (has_aborted()) return; 2392 2393 // We set this so that the regular clock knows that we're in the 2394 // middle of draining buffers and doesn't set the abort flag when it 2395 // notices that SATB buffers are available for draining. It'd be 2396 // very counter productive if it did that. :-) 2397 _draining_satb_buffers = true; 2398 2399 G1CMSATBBufferClosure satb_cl(this, _g1h); 2400 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2401 2402 // This keeps claiming and applying the closure to completed buffers 2403 // until we run out of buffers or we need to abort. 2404 while (!has_aborted() && 2405 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2406 regular_clock_call(); 2407 } 2408 2409 _draining_satb_buffers = false; 2410 2411 assert(has_aborted() || 2412 concurrent() || 2413 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2414 2415 // again, this was a potentially expensive operation, decrease the 2416 // limits to get the regular clock call early 2417 decrease_limits(); 2418 } 2419 2420 void G1CMTask::print_stats() { 2421 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2422 _worker_id, _calls); 2423 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2424 _elapsed_time_ms, _termination_time_ms); 2425 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2426 _step_times_ms.num(), _step_times_ms.avg(), 2427 _step_times_ms.sd()); 2428 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2429 _step_times_ms.maximum(), _step_times_ms.sum()); 2430 } 2431 2432 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2433 return _task_queues->steal(worker_id, hash_seed, task_entry); 2434 } 2435 2436 /***************************************************************************** 2437 2438 The do_marking_step(time_target_ms, ...) method is the building 2439 block of the parallel marking framework. It can be called in parallel 2440 with other invocations of do_marking_step() on different tasks 2441 (but only one per task, obviously) and concurrently with the 2442 mutator threads, or during remark, hence it eliminates the need 2443 for two versions of the code. When called during remark, it will 2444 pick up from where the task left off during the concurrent marking 2445 phase. Interestingly, tasks are also claimable during evacuation 2446 pauses too, since do_marking_step() ensures that it aborts before 2447 it needs to yield. 2448 2449 The data structures that it uses to do marking work are the 2450 following: 2451 2452 (1) Marking Bitmap. If there are gray objects that appear only 2453 on the bitmap (this happens either when dealing with an overflow 2454 or when the initial marking phase has simply marked the roots 2455 and didn't push them on the stack), then tasks claim heap 2456 regions whose bitmap they then scan to find gray objects. A 2457 global finger indicates where the end of the last claimed region 2458 is. A local finger indicates how far into the region a task has 2459 scanned. The two fingers are used to determine how to gray an 2460 object (i.e. whether simply marking it is OK, as it will be 2461 visited by a task in the future, or whether it needs to be also 2462 pushed on a stack). 2463 2464 (2) Local Queue. The local queue of the task which is accessed 2465 reasonably efficiently by the task. Other tasks can steal from 2466 it when they run out of work. Throughout the marking phase, a 2467 task attempts to keep its local queue short but not totally 2468 empty, so that entries are available for stealing by other 2469 tasks. Only when there is no more work, a task will totally 2470 drain its local queue. 2471 2472 (3) Global Mark Stack. This handles local queue overflow. During 2473 marking only sets of entries are moved between it and the local 2474 queues, as access to it requires a mutex and more fine-grain 2475 interaction with it which might cause contention. If it 2476 overflows, then the marking phase should restart and iterate 2477 over the bitmap to identify gray objects. Throughout the marking 2478 phase, tasks attempt to keep the global mark stack at a small 2479 length but not totally empty, so that entries are available for 2480 popping by other tasks. Only when there is no more work, tasks 2481 will totally drain the global mark stack. 2482 2483 (4) SATB Buffer Queue. This is where completed SATB buffers are 2484 made available. Buffers are regularly removed from this queue 2485 and scanned for roots, so that the queue doesn't get too 2486 long. During remark, all completed buffers are processed, as 2487 well as the filled in parts of any uncompleted buffers. 2488 2489 The do_marking_step() method tries to abort when the time target 2490 has been reached. There are a few other cases when the 2491 do_marking_step() method also aborts: 2492 2493 (1) When the marking phase has been aborted (after a Full GC). 2494 2495 (2) When a global overflow (on the global stack) has been 2496 triggered. Before the task aborts, it will actually sync up with 2497 the other tasks to ensure that all the marking data structures 2498 (local queues, stacks, fingers etc.) are re-initialized so that 2499 when do_marking_step() completes, the marking phase can 2500 immediately restart. 2501 2502 (3) When enough completed SATB buffers are available. The 2503 do_marking_step() method only tries to drain SATB buffers right 2504 at the beginning. So, if enough buffers are available, the 2505 marking step aborts and the SATB buffers are processed at 2506 the beginning of the next invocation. 2507 2508 (4) To yield. when we have to yield then we abort and yield 2509 right at the end of do_marking_step(). This saves us from a lot 2510 of hassle as, by yielding we might allow a Full GC. If this 2511 happens then objects will be compacted underneath our feet, the 2512 heap might shrink, etc. We save checking for this by just 2513 aborting and doing the yield right at the end. 2514 2515 From the above it follows that the do_marking_step() method should 2516 be called in a loop (or, otherwise, regularly) until it completes. 2517 2518 If a marking step completes without its has_aborted() flag being 2519 true, it means it has completed the current marking phase (and 2520 also all other marking tasks have done so and have all synced up). 2521 2522 A method called regular_clock_call() is invoked "regularly" (in 2523 sub ms intervals) throughout marking. It is this clock method that 2524 checks all the abort conditions which were mentioned above and 2525 decides when the task should abort. A work-based scheme is used to 2526 trigger this clock method: when the number of object words the 2527 marking phase has scanned or the number of references the marking 2528 phase has visited reach a given limit. Additional invocations to 2529 the method clock have been planted in a few other strategic places 2530 too. The initial reason for the clock method was to avoid calling 2531 vtime too regularly, as it is quite expensive. So, once it was in 2532 place, it was natural to piggy-back all the other conditions on it 2533 too and not constantly check them throughout the code. 2534 2535 If do_termination is true then do_marking_step will enter its 2536 termination protocol. 2537 2538 The value of is_serial must be true when do_marking_step is being 2539 called serially (i.e. by the VMThread) and do_marking_step should 2540 skip any synchronization in the termination and overflow code. 2541 Examples include the serial remark code and the serial reference 2542 processing closures. 2543 2544 The value of is_serial must be false when do_marking_step is 2545 being called by any of the worker threads in a work gang. 2546 Examples include the concurrent marking code (CMMarkingTask), 2547 the MT remark code, and the MT reference processing closures. 2548 2549 *****************************************************************************/ 2550 2551 void G1CMTask::do_marking_step(double time_target_ms, 2552 bool do_termination, 2553 bool is_serial) { 2554 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2555 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2556 2557 G1Policy* g1_policy = _g1h->g1_policy(); 2558 assert(_task_queues != NULL, "invariant"); 2559 assert(_task_queue != NULL, "invariant"); 2560 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2561 2562 assert(!_claimed, 2563 "only one thread should claim this task at any one time"); 2564 2565 // OK, this doesn't safeguard again all possible scenarios, as it is 2566 // possible for two threads to set the _claimed flag at the same 2567 // time. But it is only for debugging purposes anyway and it will 2568 // catch most problems. 2569 _claimed = true; 2570 2571 _start_time_ms = os::elapsedVTime() * 1000.0; 2572 2573 // If do_stealing is true then do_marking_step will attempt to 2574 // steal work from the other G1CMTasks. It only makes sense to 2575 // enable stealing when the termination protocol is enabled 2576 // and do_marking_step() is not being called serially. 2577 bool do_stealing = do_termination && !is_serial; 2578 2579 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2580 _time_target_ms = time_target_ms - diff_prediction_ms; 2581 2582 // set up the variables that are used in the work-based scheme to 2583 // call the regular clock method 2584 _words_scanned = 0; 2585 _refs_reached = 0; 2586 recalculate_limits(); 2587 2588 // clear all flags 2589 clear_has_aborted(); 2590 _has_timed_out = false; 2591 _draining_satb_buffers = false; 2592 2593 ++_calls; 2594 2595 // Set up the bitmap and oop closures. Anything that uses them is 2596 // eventually called from this method, so it is OK to allocate these 2597 // statically. 2598 G1CMBitMapClosure bitmap_closure(this, _cm); 2599 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2600 set_cm_oop_closure(&cm_oop_closure); 2601 2602 if (_cm->has_overflown()) { 2603 // This can happen if the mark stack overflows during a GC pause 2604 // and this task, after a yield point, restarts. We have to abort 2605 // as we need to get into the overflow protocol which happens 2606 // right at the end of this task. 2607 set_has_aborted(); 2608 } 2609 2610 // First drain any available SATB buffers. After this, we will not 2611 // look at SATB buffers before the next invocation of this method. 2612 // If enough completed SATB buffers are queued up, the regular clock 2613 // will abort this task so that it restarts. 2614 drain_satb_buffers(); 2615 // ...then partially drain the local queue and the global stack 2616 drain_local_queue(true); 2617 drain_global_stack(true); 2618 2619 do { 2620 if (!has_aborted() && _curr_region != NULL) { 2621 // This means that we're already holding on to a region. 2622 assert(_finger != NULL, "if region is not NULL, then the finger " 2623 "should not be NULL either"); 2624 2625 // We might have restarted this task after an evacuation pause 2626 // which might have evacuated the region we're holding on to 2627 // underneath our feet. Let's read its limit again to make sure 2628 // that we do not iterate over a region of the heap that 2629 // contains garbage (update_region_limit() will also move 2630 // _finger to the start of the region if it is found empty). 2631 update_region_limit(); 2632 // We will start from _finger not from the start of the region, 2633 // as we might be restarting this task after aborting half-way 2634 // through scanning this region. In this case, _finger points to 2635 // the address where we last found a marked object. If this is a 2636 // fresh region, _finger points to start(). 2637 MemRegion mr = MemRegion(_finger, _region_limit); 2638 2639 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2640 "humongous regions should go around loop once only"); 2641 2642 // Some special cases: 2643 // If the memory region is empty, we can just give up the region. 2644 // If the current region is humongous then we only need to check 2645 // the bitmap for the bit associated with the start of the object, 2646 // scan the object if it's live, and give up the region. 2647 // Otherwise, let's iterate over the bitmap of the part of the region 2648 // that is left. 2649 // If the iteration is successful, give up the region. 2650 if (mr.is_empty()) { 2651 giveup_current_region(); 2652 regular_clock_call(); 2653 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2654 if (_nextMarkBitMap->is_marked(mr.start())) { 2655 // The object is marked - apply the closure 2656 bitmap_closure.do_addr(mr.start()); 2657 } 2658 // Even if this task aborted while scanning the humongous object 2659 // we can (and should) give up the current region. 2660 giveup_current_region(); 2661 regular_clock_call(); 2662 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2663 giveup_current_region(); 2664 regular_clock_call(); 2665 } else { 2666 assert(has_aborted(), "currently the only way to do so"); 2667 // The only way to abort the bitmap iteration is to return 2668 // false from the do_bit() method. However, inside the 2669 // do_bit() method we move the _finger to point to the 2670 // object currently being looked at. So, if we bail out, we 2671 // have definitely set _finger to something non-null. 2672 assert(_finger != NULL, "invariant"); 2673 2674 // Region iteration was actually aborted. So now _finger 2675 // points to the address of the object we last scanned. If we 2676 // leave it there, when we restart this task, we will rescan 2677 // the object. It is easy to avoid this. We move the finger by 2678 // enough to point to the next possible object header. 2679 assert(_finger < _region_limit, "invariant"); 2680 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2681 // Check if bitmap iteration was aborted while scanning the last object 2682 if (new_finger >= _region_limit) { 2683 giveup_current_region(); 2684 } else { 2685 move_finger_to(new_finger); 2686 } 2687 } 2688 } 2689 // At this point we have either completed iterating over the 2690 // region we were holding on to, or we have aborted. 2691 2692 // We then partially drain the local queue and the global stack. 2693 // (Do we really need this?) 2694 drain_local_queue(true); 2695 drain_global_stack(true); 2696 2697 // Read the note on the claim_region() method on why it might 2698 // return NULL with potentially more regions available for 2699 // claiming and why we have to check out_of_regions() to determine 2700 // whether we're done or not. 2701 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2702 // We are going to try to claim a new region. We should have 2703 // given up on the previous one. 2704 // Separated the asserts so that we know which one fires. 2705 assert(_curr_region == NULL, "invariant"); 2706 assert(_finger == NULL, "invariant"); 2707 assert(_region_limit == NULL, "invariant"); 2708 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2709 if (claimed_region != NULL) { 2710 // Yes, we managed to claim one 2711 setup_for_region(claimed_region); 2712 assert(_curr_region == claimed_region, "invariant"); 2713 } 2714 // It is important to call the regular clock here. It might take 2715 // a while to claim a region if, for example, we hit a large 2716 // block of empty regions. So we need to call the regular clock 2717 // method once round the loop to make sure it's called 2718 // frequently enough. 2719 regular_clock_call(); 2720 } 2721 2722 if (!has_aborted() && _curr_region == NULL) { 2723 assert(_cm->out_of_regions(), 2724 "at this point we should be out of regions"); 2725 } 2726 } while ( _curr_region != NULL && !has_aborted()); 2727 2728 if (!has_aborted()) { 2729 // We cannot check whether the global stack is empty, since other 2730 // tasks might be pushing objects to it concurrently. 2731 assert(_cm->out_of_regions(), 2732 "at this point we should be out of regions"); 2733 // Try to reduce the number of available SATB buffers so that 2734 // remark has less work to do. 2735 drain_satb_buffers(); 2736 } 2737 2738 // Since we've done everything else, we can now totally drain the 2739 // local queue and global stack. 2740 drain_local_queue(false); 2741 drain_global_stack(false); 2742 2743 // Attempt at work stealing from other task's queues. 2744 if (do_stealing && !has_aborted()) { 2745 // We have not aborted. This means that we have finished all that 2746 // we could. Let's try to do some stealing... 2747 2748 // We cannot check whether the global stack is empty, since other 2749 // tasks might be pushing objects to it concurrently. 2750 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2751 "only way to reach here"); 2752 while (!has_aborted()) { 2753 G1TaskQueueEntry entry; 2754 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2755 scan_task_entry(entry); 2756 2757 // And since we're towards the end, let's totally drain the 2758 // local queue and global stack. 2759 drain_local_queue(false); 2760 drain_global_stack(false); 2761 } else { 2762 break; 2763 } 2764 } 2765 } 2766 2767 // We still haven't aborted. Now, let's try to get into the 2768 // termination protocol. 2769 if (do_termination && !has_aborted()) { 2770 // We cannot check whether the global stack is empty, since other 2771 // tasks might be concurrently pushing objects on it. 2772 // Separated the asserts so that we know which one fires. 2773 assert(_cm->out_of_regions(), "only way to reach here"); 2774 assert(_task_queue->size() == 0, "only way to reach here"); 2775 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2776 2777 // The G1CMTask class also extends the TerminatorTerminator class, 2778 // hence its should_exit_termination() method will also decide 2779 // whether to exit the termination protocol or not. 2780 bool finished = (is_serial || 2781 _cm->terminator()->offer_termination(this)); 2782 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2783 _termination_time_ms += 2784 termination_end_time_ms - _termination_start_time_ms; 2785 2786 if (finished) { 2787 // We're all done. 2788 2789 if (_worker_id == 0) { 2790 // let's allow task 0 to do this 2791 if (concurrent()) { 2792 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2793 // we need to set this to false before the next 2794 // safepoint. This way we ensure that the marking phase 2795 // doesn't observe any more heap expansions. 2796 _cm->clear_concurrent_marking_in_progress(); 2797 } 2798 } 2799 2800 // We can now guarantee that the global stack is empty, since 2801 // all other tasks have finished. We separated the guarantees so 2802 // that, if a condition is false, we can immediately find out 2803 // which one. 2804 guarantee(_cm->out_of_regions(), "only way to reach here"); 2805 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2806 guarantee(_task_queue->size() == 0, "only way to reach here"); 2807 guarantee(!_cm->has_overflown(), "only way to reach here"); 2808 } else { 2809 // Apparently there's more work to do. Let's abort this task. It 2810 // will restart it and we can hopefully find more things to do. 2811 set_has_aborted(); 2812 } 2813 } 2814 2815 // Mainly for debugging purposes to make sure that a pointer to the 2816 // closure which was statically allocated in this frame doesn't 2817 // escape it by accident. 2818 set_cm_oop_closure(NULL); 2819 double end_time_ms = os::elapsedVTime() * 1000.0; 2820 double elapsed_time_ms = end_time_ms - _start_time_ms; 2821 // Update the step history. 2822 _step_times_ms.add(elapsed_time_ms); 2823 2824 if (has_aborted()) { 2825 // The task was aborted for some reason. 2826 if (_has_timed_out) { 2827 double diff_ms = elapsed_time_ms - _time_target_ms; 2828 // Keep statistics of how well we did with respect to hitting 2829 // our target only if we actually timed out (if we aborted for 2830 // other reasons, then the results might get skewed). 2831 _marking_step_diffs_ms.add(diff_ms); 2832 } 2833 2834 if (_cm->has_overflown()) { 2835 // This is the interesting one. We aborted because a global 2836 // overflow was raised. This means we have to restart the 2837 // marking phase and start iterating over regions. However, in 2838 // order to do this we have to make sure that all tasks stop 2839 // what they are doing and re-initialize in a safe manner. We 2840 // will achieve this with the use of two barrier sync points. 2841 2842 if (!is_serial) { 2843 // We only need to enter the sync barrier if being called 2844 // from a parallel context 2845 _cm->enter_first_sync_barrier(_worker_id); 2846 2847 // When we exit this sync barrier we know that all tasks have 2848 // stopped doing marking work. So, it's now safe to 2849 // re-initialize our data structures. At the end of this method, 2850 // task 0 will clear the global data structures. 2851 } 2852 2853 // We clear the local state of this task... 2854 clear_region_fields(); 2855 2856 if (!is_serial) { 2857 // ...and enter the second barrier. 2858 _cm->enter_second_sync_barrier(_worker_id); 2859 } 2860 // At this point, if we're during the concurrent phase of 2861 // marking, everything has been re-initialized and we're 2862 // ready to restart. 2863 } 2864 } 2865 2866 _claimed = false; 2867 } 2868 2869 G1CMTask::G1CMTask(uint worker_id, 2870 G1ConcurrentMark* cm, 2871 G1CMTaskQueue* task_queue, 2872 G1CMTaskQueueSet* task_queues) 2873 : _g1h(G1CollectedHeap::heap()), 2874 _worker_id(worker_id), _cm(cm), 2875 _objArray_processor(this), 2876 _claimed(false), 2877 _nextMarkBitMap(NULL), _hash_seed(17), 2878 _task_queue(task_queue), 2879 _task_queues(task_queues), 2880 _cm_oop_closure(NULL) { 2881 guarantee(task_queue != NULL, "invariant"); 2882 guarantee(task_queues != NULL, "invariant"); 2883 2884 _marking_step_diffs_ms.add(0.5); 2885 } 2886 2887 // These are formatting macros that are used below to ensure 2888 // consistent formatting. The *_H_* versions are used to format the 2889 // header for a particular value and they should be kept consistent 2890 // with the corresponding macro. Also note that most of the macros add 2891 // the necessary white space (as a prefix) which makes them a bit 2892 // easier to compose. 2893 2894 // All the output lines are prefixed with this string to be able to 2895 // identify them easily in a large log file. 2896 #define G1PPRL_LINE_PREFIX "###" 2897 2898 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2899 #ifdef _LP64 2900 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2901 #else // _LP64 2902 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2903 #endif // _LP64 2904 2905 // For per-region info 2906 #define G1PPRL_TYPE_FORMAT " %-4s" 2907 #define G1PPRL_TYPE_H_FORMAT " %4s" 2908 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2909 #define G1PPRL_BYTE_H_FORMAT " %9s" 2910 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2911 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2912 2913 // For summary info 2914 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2915 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2916 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2917 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2918 2919 G1PrintRegionLivenessInfoClosure:: 2920 G1PrintRegionLivenessInfoClosure(const char* phase_name) 2921 : _total_used_bytes(0), _total_capacity_bytes(0), 2922 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2923 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 2924 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2925 MemRegion g1_reserved = g1h->g1_reserved(); 2926 double now = os::elapsedTime(); 2927 2928 // Print the header of the output. 2929 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2930 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2931 G1PPRL_SUM_ADDR_FORMAT("reserved") 2932 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2933 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2934 HeapRegion::GrainBytes); 2935 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2936 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2937 G1PPRL_TYPE_H_FORMAT 2938 G1PPRL_ADDR_BASE_H_FORMAT 2939 G1PPRL_BYTE_H_FORMAT 2940 G1PPRL_BYTE_H_FORMAT 2941 G1PPRL_BYTE_H_FORMAT 2942 G1PPRL_DOUBLE_H_FORMAT 2943 G1PPRL_BYTE_H_FORMAT 2944 G1PPRL_BYTE_H_FORMAT, 2945 "type", "address-range", 2946 "used", "prev-live", "next-live", "gc-eff", 2947 "remset", "code-roots"); 2948 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2949 G1PPRL_TYPE_H_FORMAT 2950 G1PPRL_ADDR_BASE_H_FORMAT 2951 G1PPRL_BYTE_H_FORMAT 2952 G1PPRL_BYTE_H_FORMAT 2953 G1PPRL_BYTE_H_FORMAT 2954 G1PPRL_DOUBLE_H_FORMAT 2955 G1PPRL_BYTE_H_FORMAT 2956 G1PPRL_BYTE_H_FORMAT, 2957 "", "", 2958 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2959 "(bytes)", "(bytes)"); 2960 } 2961 2962 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 2963 const char* type = r->get_type_str(); 2964 HeapWord* bottom = r->bottom(); 2965 HeapWord* end = r->end(); 2966 size_t capacity_bytes = r->capacity(); 2967 size_t used_bytes = r->used(); 2968 size_t prev_live_bytes = r->live_bytes(); 2969 size_t next_live_bytes = r->next_live_bytes(); 2970 double gc_eff = r->gc_efficiency(); 2971 size_t remset_bytes = r->rem_set()->mem_size(); 2972 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2973 2974 _total_used_bytes += used_bytes; 2975 _total_capacity_bytes += capacity_bytes; 2976 _total_prev_live_bytes += prev_live_bytes; 2977 _total_next_live_bytes += next_live_bytes; 2978 _total_remset_bytes += remset_bytes; 2979 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2980 2981 // Print a line for this particular region. 2982 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2983 G1PPRL_TYPE_FORMAT 2984 G1PPRL_ADDR_BASE_FORMAT 2985 G1PPRL_BYTE_FORMAT 2986 G1PPRL_BYTE_FORMAT 2987 G1PPRL_BYTE_FORMAT 2988 G1PPRL_DOUBLE_FORMAT 2989 G1PPRL_BYTE_FORMAT 2990 G1PPRL_BYTE_FORMAT, 2991 type, p2i(bottom), p2i(end), 2992 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2993 remset_bytes, strong_code_roots_bytes); 2994 2995 return false; 2996 } 2997 2998 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 2999 // add static memory usages to remembered set sizes 3000 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3001 // Print the footer of the output. 3002 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3003 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3004 " SUMMARY" 3005 G1PPRL_SUM_MB_FORMAT("capacity") 3006 G1PPRL_SUM_MB_PERC_FORMAT("used") 3007 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3008 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3009 G1PPRL_SUM_MB_FORMAT("remset") 3010 G1PPRL_SUM_MB_FORMAT("code-roots"), 3011 bytes_to_mb(_total_capacity_bytes), 3012 bytes_to_mb(_total_used_bytes), 3013 perc(_total_used_bytes, _total_capacity_bytes), 3014 bytes_to_mb(_total_prev_live_bytes), 3015 perc(_total_prev_live_bytes, _total_capacity_bytes), 3016 bytes_to_mb(_total_next_live_bytes), 3017 perc(_total_next_live_bytes, _total_capacity_bytes), 3018 bytes_to_mb(_total_remset_bytes), 3019 bytes_to_mb(_total_strong_code_roots_bytes)); 3020 }