1 /* 2 * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1OopClosures.inline.hpp" 34 #include "gc/g1/g1Policy.hpp" 35 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionRemSet.hpp" 39 #include "gc/g1/heapRegionSet.inline.hpp" 40 #include "gc/shared/adaptiveSizePolicy.hpp" 41 #include "gc/shared/gcId.hpp" 42 #include "gc/shared/gcTimer.hpp" 43 #include "gc/shared/gcTrace.hpp" 44 #include "gc/shared/gcTraceTime.inline.hpp" 45 #include "gc/shared/genOopClosures.inline.hpp" 46 #include "gc/shared/referencePolicy.hpp" 47 #include "gc/shared/strongRootsScope.hpp" 48 #include "gc/shared/suspendibleThreadSet.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "gc/shared/weakProcessor.hpp" 52 #include "include/jvm.h" 53 #include "logging/log.hpp" 54 #include "memory/allocation.hpp" 55 #include "memory/resourceArea.hpp" 56 #include "oops/access.inline.hpp" 57 #include "oops/oop.inline.hpp" 58 #include "runtime/atomic.hpp" 59 #include "runtime/handles.inline.hpp" 60 #include "runtime/java.hpp" 61 #include "runtime/prefetch.inline.hpp" 62 #include "services/memTracker.hpp" 63 #include "utilities/align.hpp" 64 #include "utilities/growableArray.hpp" 65 66 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 67 assert(addr < _cm->finger(), "invariant"); 68 assert(addr >= _task->finger(), "invariant"); 69 70 // We move that task's local finger along. 71 _task->move_finger_to(addr); 72 73 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 74 // we only partially drain the local queue and global stack 75 _task->drain_local_queue(true); 76 _task->drain_global_stack(true); 77 78 // if the has_aborted flag has been raised, we need to bail out of 79 // the iteration 80 return !_task->has_aborted(); 81 } 82 83 G1CMMarkStack::G1CMMarkStack() : 84 _max_chunk_capacity(0), 85 _base(NULL), 86 _chunk_capacity(0) { 87 set_empty(); 88 } 89 90 bool G1CMMarkStack::resize(size_t new_capacity) { 91 assert(is_empty(), "Only resize when stack is empty."); 92 assert(new_capacity <= _max_chunk_capacity, 93 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 94 95 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 96 97 if (new_base == NULL) { 98 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 99 return false; 100 } 101 // Release old mapping. 102 if (_base != NULL) { 103 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 104 } 105 106 _base = new_base; 107 _chunk_capacity = new_capacity; 108 set_empty(); 109 110 return true; 111 } 112 113 size_t G1CMMarkStack::capacity_alignment() { 114 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 115 } 116 117 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 118 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 119 120 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 121 122 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 123 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 124 125 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 126 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 127 _max_chunk_capacity, 128 initial_chunk_capacity); 129 130 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 131 initial_chunk_capacity, _max_chunk_capacity); 132 133 return resize(initial_chunk_capacity); 134 } 135 136 void G1CMMarkStack::expand() { 137 if (_chunk_capacity == _max_chunk_capacity) { 138 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 139 return; 140 } 141 size_t old_capacity = _chunk_capacity; 142 // Double capacity if possible 143 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 144 145 if (resize(new_capacity)) { 146 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 147 old_capacity, new_capacity); 148 } else { 149 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 150 old_capacity, new_capacity); 151 } 152 } 153 154 G1CMMarkStack::~G1CMMarkStack() { 155 if (_base != NULL) { 156 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 157 } 158 } 159 160 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 161 elem->next = *list; 162 *list = elem; 163 } 164 165 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 166 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 167 add_chunk_to_list(&_chunk_list, elem); 168 _chunks_in_chunk_list++; 169 } 170 171 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 172 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 173 add_chunk_to_list(&_free_list, elem); 174 } 175 176 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 177 TaskQueueEntryChunk* result = *list; 178 if (result != NULL) { 179 *list = (*list)->next; 180 } 181 return result; 182 } 183 184 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 185 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 186 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 187 if (result != NULL) { 188 _chunks_in_chunk_list--; 189 } 190 return result; 191 } 192 193 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 194 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 195 return remove_chunk_from_list(&_free_list); 196 } 197 198 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 199 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 200 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 201 // wraparound of _hwm. 202 if (_hwm >= _chunk_capacity) { 203 return NULL; 204 } 205 206 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 207 if (cur_idx >= _chunk_capacity) { 208 return NULL; 209 } 210 211 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 212 result->next = NULL; 213 return result; 214 } 215 216 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 217 // Get a new chunk. 218 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 219 220 if (new_chunk == NULL) { 221 // Did not get a chunk from the free list. Allocate from backing memory. 222 new_chunk = allocate_new_chunk(); 223 224 if (new_chunk == NULL) { 225 return false; 226 } 227 } 228 229 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 230 231 add_chunk_to_chunk_list(new_chunk); 232 233 return true; 234 } 235 236 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 237 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 238 239 if (cur == NULL) { 240 return false; 241 } 242 243 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 244 245 add_chunk_to_free_list(cur); 246 return true; 247 } 248 249 void G1CMMarkStack::set_empty() { 250 _chunks_in_chunk_list = 0; 251 _hwm = 0; 252 _chunk_list = NULL; 253 _free_list = NULL; 254 } 255 256 G1CMRootRegions::G1CMRootRegions() : 257 _survivors(NULL), _cm(NULL), _scan_in_progress(false), 258 _should_abort(false), _claimed_survivor_index(0) { } 259 260 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 261 _survivors = survivors; 262 _cm = cm; 263 } 264 265 void G1CMRootRegions::prepare_for_scan() { 266 assert(!scan_in_progress(), "pre-condition"); 267 268 // Currently, only survivors can be root regions. 269 _claimed_survivor_index = 0; 270 _scan_in_progress = _survivors->regions()->is_nonempty(); 271 _should_abort = false; 272 } 273 274 HeapRegion* G1CMRootRegions::claim_next() { 275 if (_should_abort) { 276 // If someone has set the should_abort flag, we return NULL to 277 // force the caller to bail out of their loop. 278 return NULL; 279 } 280 281 // Currently, only survivors can be root regions. 282 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 283 284 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 285 if (claimed_index < survivor_regions->length()) { 286 return survivor_regions->at(claimed_index); 287 } 288 return NULL; 289 } 290 291 uint G1CMRootRegions::num_root_regions() const { 292 return (uint)_survivors->regions()->length(); 293 } 294 295 void G1CMRootRegions::notify_scan_done() { 296 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 297 _scan_in_progress = false; 298 RootRegionScan_lock->notify_all(); 299 } 300 301 void G1CMRootRegions::cancel_scan() { 302 notify_scan_done(); 303 } 304 305 void G1CMRootRegions::scan_finished() { 306 assert(scan_in_progress(), "pre-condition"); 307 308 // Currently, only survivors can be root regions. 309 if (!_should_abort) { 310 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 311 assert((uint)_claimed_survivor_index >= _survivors->length(), 312 "we should have claimed all survivors, claimed index = %u, length = %u", 313 (uint)_claimed_survivor_index, _survivors->length()); 314 } 315 316 notify_scan_done(); 317 } 318 319 bool G1CMRootRegions::wait_until_scan_finished() { 320 if (!scan_in_progress()) { 321 return false; 322 } 323 324 { 325 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 326 while (scan_in_progress()) { 327 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 328 } 329 } 330 return true; 331 } 332 333 // Returns the maximum number of workers to be used in a concurrent 334 // phase based on the number of GC workers being used in a STW 335 // phase. 336 static uint scale_concurrent_worker_threads(uint num_gc_workers) { 337 return MAX2((num_gc_workers + 2) / 4, 1U); 338 } 339 340 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, 341 G1RegionToSpaceMapper* prev_bitmap_storage, 342 G1RegionToSpaceMapper* next_bitmap_storage) : 343 // _cm_thread set inside the constructor 344 _g1h(g1h), 345 _completed_initialization(false), 346 347 _mark_bitmap_1(), 348 _mark_bitmap_2(), 349 _prev_mark_bitmap(&_mark_bitmap_1), 350 _next_mark_bitmap(&_mark_bitmap_2), 351 352 _heap(_g1h->reserved_region()), 353 354 _root_regions(), 355 356 _global_mark_stack(), 357 358 // _finger set in set_non_marking_state 359 360 _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads), 361 _max_num_tasks(ParallelGCThreads), 362 // _num_active_tasks set in set_non_marking_state() 363 // _tasks set inside the constructor 364 365 _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)), 366 _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)), 367 368 _first_overflow_barrier_sync(), 369 _second_overflow_barrier_sync(), 370 371 _has_overflown(false), 372 _concurrent(false), 373 _has_aborted(false), 374 _restart_for_overflow(false), 375 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 376 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 377 378 // _verbose_level set below 379 380 _init_times(), 381 _remark_times(), 382 _remark_mark_times(), 383 _remark_weak_ref_times(), 384 _cleanup_times(), 385 _total_cleanup_time(0.0), 386 387 _accum_task_vtime(NULL), 388 389 _concurrent_workers(NULL), 390 _num_concurrent_workers(0), 391 _max_concurrent_workers(0), 392 393 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)), 394 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC)) 395 { 396 _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage); 397 _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage); 398 399 // Create & start ConcurrentMark thread. 400 _cm_thread = new ConcurrentMarkThread(this); 401 if (_cm_thread->osthread() == NULL) { 402 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 403 } 404 405 assert(CGC_lock != NULL, "CGC_lock must be initialized"); 406 407 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 408 satb_qs.set_buffer_size(G1SATBBufferSize); 409 410 _root_regions.init(_g1h->survivor(), this); 411 412 if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) { 413 // Calculate the number of concurrent worker threads by scaling 414 // the number of parallel GC threads. 415 uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads); 416 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 417 } 418 419 assert(ConcGCThreads > 0, "ConcGCThreads have been set."); 420 if (ConcGCThreads > ParallelGCThreads) { 421 log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).", 422 ConcGCThreads, ParallelGCThreads); 423 return; 424 } 425 426 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset); 427 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 428 429 _num_concurrent_workers = ConcGCThreads; 430 _max_concurrent_workers = _num_concurrent_workers; 431 432 _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true); 433 _concurrent_workers->initialize_workers(); 434 435 if (FLAG_IS_DEFAULT(MarkStackSize)) { 436 size_t mark_stack_size = 437 MIN2(MarkStackSizeMax, 438 MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE))); 439 // Verify that the calculated value for MarkStackSize is in range. 440 // It would be nice to use the private utility routine from Arguments. 441 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 442 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 443 "must be between 1 and " SIZE_FORMAT, 444 mark_stack_size, MarkStackSizeMax); 445 return; 446 } 447 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 448 } else { 449 // Verify MarkStackSize is in range. 450 if (FLAG_IS_CMDLINE(MarkStackSize)) { 451 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 452 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 453 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 454 "must be between 1 and " SIZE_FORMAT, 455 MarkStackSize, MarkStackSizeMax); 456 return; 457 } 458 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 459 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 460 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 461 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 462 MarkStackSize, MarkStackSizeMax); 463 return; 464 } 465 } 466 } 467 } 468 469 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 470 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 471 } 472 473 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC); 474 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC); 475 476 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 477 _num_active_tasks = _max_num_tasks; 478 479 for (uint i = 0; i < _max_num_tasks; ++i) { 480 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 481 task_queue->initialize(); 482 _task_queues->register_queue(i, task_queue); 483 484 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions()); 485 486 _accum_task_vtime[i] = 0.0; 487 } 488 489 reset_at_marking_complete(); 490 _completed_initialization = true; 491 } 492 493 void G1ConcurrentMark::reset() { 494 _has_aborted = false; 495 496 reset_marking_for_restart(); 497 498 // Reset all tasks, since different phases will use different number of active 499 // threads. So, it's easiest to have all of them ready. 500 for (uint i = 0; i < _max_num_tasks; ++i) { 501 _tasks[i]->reset(_next_mark_bitmap); 502 } 503 504 uint max_regions = _g1h->max_regions(); 505 for (uint i = 0; i < max_regions; i++) { 506 _top_at_rebuild_starts[i] = NULL; 507 _region_mark_stats[i].clear(); 508 } 509 } 510 511 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) { 512 for (uint j = 0; j < _max_num_tasks; ++j) { 513 _tasks[j]->clear_mark_stats_cache(region_idx); 514 } 515 _top_at_rebuild_starts[region_idx] = NULL; 516 _region_mark_stats[region_idx].clear(); 517 } 518 519 void G1ConcurrentMark::clear_statistics(HeapRegion* r) { 520 uint const region_idx = r->hrm_index(); 521 if (r->is_humongous()) { 522 assert(r->is_starts_humongous(), "Got humongous continues region here"); 523 uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size()); 524 for (uint j = region_idx; j < (region_idx + size_in_regions); j++) { 525 clear_statistics_in_region(j); 526 } 527 } else { 528 clear_statistics_in_region(region_idx); 529 } 530 } 531 532 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) { 533 if (bitmap->is_marked(addr)) { 534 bitmap->clear(addr); 535 } 536 } 537 538 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) { 539 assert_at_safepoint_on_vm_thread(); 540 541 // Need to clear all mark bits of the humongous object. 542 clear_mark_if_set(_prev_mark_bitmap, r->bottom()); 543 clear_mark_if_set(_next_mark_bitmap, r->bottom()); 544 545 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 546 return; 547 } 548 549 // Clear any statistics about the region gathered so far. 550 clear_statistics(r); 551 } 552 553 void G1ConcurrentMark::reset_marking_for_restart() { 554 _global_mark_stack.set_empty(); 555 556 // Expand the marking stack, if we have to and if we can. 557 if (has_overflown()) { 558 _global_mark_stack.expand(); 559 560 uint max_regions = _g1h->max_regions(); 561 for (uint i = 0; i < max_regions; i++) { 562 _region_mark_stats[i].clear_during_overflow(); 563 } 564 } 565 566 clear_has_overflown(); 567 _finger = _heap.start(); 568 569 for (uint i = 0; i < _max_num_tasks; ++i) { 570 G1CMTaskQueue* queue = _task_queues->queue(i); 571 queue->set_empty(); 572 } 573 } 574 575 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 576 assert(active_tasks <= _max_num_tasks, "we should not have more"); 577 578 _num_active_tasks = active_tasks; 579 // Need to update the three data structures below according to the 580 // number of active threads for this phase. 581 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 582 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 583 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 584 } 585 586 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 587 set_concurrency(active_tasks); 588 589 _concurrent = concurrent; 590 591 if (!concurrent) { 592 // At this point we should be in a STW phase, and completed marking. 593 assert_at_safepoint_on_vm_thread(); 594 assert(out_of_regions(), 595 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 596 p2i(_finger), p2i(_heap.end())); 597 } 598 } 599 600 void G1ConcurrentMark::reset_at_marking_complete() { 601 // We set the global marking state to some default values when we're 602 // not doing marking. 603 reset_marking_for_restart(); 604 _num_active_tasks = 0; 605 } 606 607 G1ConcurrentMark::~G1ConcurrentMark() { 608 FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts); 609 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats); 610 // The G1ConcurrentMark instance is never freed. 611 ShouldNotReachHere(); 612 } 613 614 class G1ClearBitMapTask : public AbstractGangTask { 615 public: 616 static size_t chunk_size() { return M; } 617 618 private: 619 // Heap region closure used for clearing the given mark bitmap. 620 class G1ClearBitmapHRClosure : public HeapRegionClosure { 621 private: 622 G1CMBitMap* _bitmap; 623 G1ConcurrentMark* _cm; 624 public: 625 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 626 } 627 628 virtual bool do_heap_region(HeapRegion* r) { 629 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 630 631 HeapWord* cur = r->bottom(); 632 HeapWord* const end = r->end(); 633 634 while (cur < end) { 635 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 636 _bitmap->clear_range(mr); 637 638 cur += chunk_size_in_words; 639 640 // Abort iteration if after yielding the marking has been aborted. 641 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 642 return true; 643 } 644 // Repeat the asserts from before the start of the closure. We will do them 645 // as asserts here to minimize their overhead on the product. However, we 646 // will have them as guarantees at the beginning / end of the bitmap 647 // clearing to get some checking in the product. 648 assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant"); 649 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 650 } 651 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 652 653 return false; 654 } 655 }; 656 657 G1ClearBitmapHRClosure _cl; 658 HeapRegionClaimer _hr_claimer; 659 bool _suspendible; // If the task is suspendible, workers must join the STS. 660 661 public: 662 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 663 AbstractGangTask("G1 Clear Bitmap"), 664 _cl(bitmap, suspendible ? cm : NULL), 665 _hr_claimer(n_workers), 666 _suspendible(suspendible) 667 { } 668 669 void work(uint worker_id) { 670 SuspendibleThreadSetJoiner sts_join(_suspendible); 671 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id); 672 } 673 674 bool is_complete() { 675 return _cl.is_complete(); 676 } 677 }; 678 679 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 680 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 681 682 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 683 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 684 685 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 686 687 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 688 689 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 690 workers->run_task(&cl, num_workers); 691 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 692 } 693 694 void G1ConcurrentMark::cleanup_for_next_mark() { 695 // Make sure that the concurrent mark thread looks to still be in 696 // the current cycle. 697 guarantee(cm_thread()->during_cycle(), "invariant"); 698 699 // We are finishing up the current cycle by clearing the next 700 // marking bitmap and getting it ready for the next cycle. During 701 // this time no other cycle can start. So, let's make sure that this 702 // is the case. 703 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 704 705 clear_bitmap(_next_mark_bitmap, _concurrent_workers, true); 706 707 // Repeat the asserts from above. 708 guarantee(cm_thread()->during_cycle(), "invariant"); 709 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 710 } 711 712 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 713 assert_at_safepoint_on_vm_thread(); 714 clear_bitmap(_prev_mark_bitmap, workers, false); 715 } 716 717 class CheckBitmapClearHRClosure : public HeapRegionClosure { 718 G1CMBitMap* _bitmap; 719 public: 720 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 721 } 722 723 virtual bool do_heap_region(HeapRegion* r) { 724 // This closure can be called concurrently to the mutator, so we must make sure 725 // that the result of the getNextMarkedWordAddress() call is compared to the 726 // value passed to it as limit to detect any found bits. 727 // end never changes in G1. 728 HeapWord* end = r->end(); 729 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 730 } 731 }; 732 733 bool G1ConcurrentMark::next_mark_bitmap_is_clear() { 734 CheckBitmapClearHRClosure cl(_next_mark_bitmap); 735 _g1h->heap_region_iterate(&cl); 736 return cl.is_complete(); 737 } 738 739 class NoteStartOfMarkHRClosure : public HeapRegionClosure { 740 public: 741 bool do_heap_region(HeapRegion* r) { 742 r->note_start_of_marking(); 743 return false; 744 } 745 }; 746 747 void G1ConcurrentMark::pre_initial_mark() { 748 // Initialize marking structures. This has to be done in a STW phase. 749 reset(); 750 751 // For each region note start of marking. 752 NoteStartOfMarkHRClosure startcl; 753 _g1h->heap_region_iterate(&startcl); 754 } 755 756 757 void G1ConcurrentMark::post_initial_mark() { 758 // Start Concurrent Marking weak-reference discovery. 759 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 760 // enable ("weak") refs discovery 761 rp->enable_discovery(); 762 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 763 764 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 765 // This is the start of the marking cycle, we're expected all 766 // threads to have SATB queues with active set to false. 767 satb_mq_set.set_active_all_threads(true, /* new active value */ 768 false /* expected_active */); 769 770 _root_regions.prepare_for_scan(); 771 772 // update_g1_committed() will be called at the end of an evac pause 773 // when marking is on. So, it's also called at the end of the 774 // initial-mark pause to update the heap end, if the heap expands 775 // during it. No need to call it here. 776 } 777 778 /* 779 * Notice that in the next two methods, we actually leave the STS 780 * during the barrier sync and join it immediately afterwards. If we 781 * do not do this, the following deadlock can occur: one thread could 782 * be in the barrier sync code, waiting for the other thread to also 783 * sync up, whereas another one could be trying to yield, while also 784 * waiting for the other threads to sync up too. 785 * 786 * Note, however, that this code is also used during remark and in 787 * this case we should not attempt to leave / enter the STS, otherwise 788 * we'll either hit an assert (debug / fastdebug) or deadlock 789 * (product). So we should only leave / enter the STS if we are 790 * operating concurrently. 791 * 792 * Because the thread that does the sync barrier has left the STS, it 793 * is possible to be suspended for a Full GC or an evacuation pause 794 * could occur. This is actually safe, since the entering the sync 795 * barrier is one of the last things do_marking_step() does, and it 796 * doesn't manipulate any data structures afterwards. 797 */ 798 799 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 800 bool barrier_aborted; 801 { 802 SuspendibleThreadSetLeaver sts_leave(concurrent()); 803 barrier_aborted = !_first_overflow_barrier_sync.enter(); 804 } 805 806 // at this point everyone should have synced up and not be doing any 807 // more work 808 809 if (barrier_aborted) { 810 // If the barrier aborted we ignore the overflow condition and 811 // just abort the whole marking phase as quickly as possible. 812 return; 813 } 814 } 815 816 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 817 SuspendibleThreadSetLeaver sts_leave(concurrent()); 818 _second_overflow_barrier_sync.enter(); 819 820 // at this point everything should be re-initialized and ready to go 821 } 822 823 class G1CMConcurrentMarkingTask : public AbstractGangTask { 824 G1ConcurrentMark* _cm; 825 public: 826 void work(uint worker_id) { 827 assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread"); 828 ResourceMark rm; 829 830 double start_vtime = os::elapsedVTime(); 831 832 { 833 SuspendibleThreadSetJoiner sts_join; 834 835 assert(worker_id < _cm->active_tasks(), "invariant"); 836 837 G1CMTask* task = _cm->task(worker_id); 838 task->record_start_time(); 839 if (!_cm->has_aborted()) { 840 do { 841 task->do_marking_step(G1ConcMarkStepDurationMillis, 842 true /* do_termination */, 843 false /* is_serial*/); 844 845 _cm->do_yield_check(); 846 } while (!_cm->has_aborted() && task->has_aborted()); 847 } 848 task->record_end_time(); 849 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant"); 850 } 851 852 double end_vtime = os::elapsedVTime(); 853 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 854 } 855 856 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) : 857 AbstractGangTask("Concurrent Mark"), _cm(cm) { } 858 859 ~G1CMConcurrentMarkingTask() { } 860 }; 861 862 uint G1ConcurrentMark::calc_active_marking_workers() { 863 uint result = 0; 864 if (!UseDynamicNumberOfGCThreads || 865 (!FLAG_IS_DEFAULT(ConcGCThreads) && 866 !ForceDynamicNumberOfGCThreads)) { 867 result = _max_concurrent_workers; 868 } else { 869 result = 870 AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers, 871 1, /* Minimum workers */ 872 _num_concurrent_workers, 873 Threads::number_of_non_daemon_threads()); 874 // Don't scale the result down by scale_concurrent_workers() because 875 // that scaling has already gone into "_max_concurrent_workers". 876 } 877 assert(result > 0 && result <= _max_concurrent_workers, 878 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u", 879 _max_concurrent_workers, result); 880 return result; 881 } 882 883 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) { 884 // Currently, only survivors can be root regions. 885 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 886 G1RootRegionScanClosure cl(_g1h, this, worker_id); 887 888 const uintx interval = PrefetchScanIntervalInBytes; 889 HeapWord* curr = hr->bottom(); 890 const HeapWord* end = hr->top(); 891 while (curr < end) { 892 Prefetch::read(curr, interval); 893 oop obj = oop(curr); 894 int size = obj->oop_iterate_size(&cl); 895 assert(size == obj->size(), "sanity"); 896 curr += size; 897 } 898 } 899 900 class G1CMRootRegionScanTask : public AbstractGangTask { 901 G1ConcurrentMark* _cm; 902 public: 903 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 904 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 905 906 void work(uint worker_id) { 907 assert(Thread::current()->is_ConcurrentGC_thread(), 908 "this should only be done by a conc GC thread"); 909 910 G1CMRootRegions* root_regions = _cm->root_regions(); 911 HeapRegion* hr = root_regions->claim_next(); 912 while (hr != NULL) { 913 _cm->scan_root_region(hr, worker_id); 914 hr = root_regions->claim_next(); 915 } 916 } 917 }; 918 919 void G1ConcurrentMark::scan_root_regions() { 920 // scan_in_progress() will have been set to true only if there was 921 // at least one root region to scan. So, if it's false, we 922 // should not attempt to do any further work. 923 if (root_regions()->scan_in_progress()) { 924 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 925 926 _num_concurrent_workers = MIN2(calc_active_marking_workers(), 927 // We distribute work on a per-region basis, so starting 928 // more threads than that is useless. 929 root_regions()->num_root_regions()); 930 assert(_num_concurrent_workers <= _max_concurrent_workers, 931 "Maximum number of marking threads exceeded"); 932 933 G1CMRootRegionScanTask task(this); 934 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 935 task.name(), _num_concurrent_workers, root_regions()->num_root_regions()); 936 _concurrent_workers->run_task(&task, _num_concurrent_workers); 937 938 // It's possible that has_aborted() is true here without actually 939 // aborting the survivor scan earlier. This is OK as it's 940 // mainly used for sanity checking. 941 root_regions()->scan_finished(); 942 } 943 } 944 945 void G1ConcurrentMark::concurrent_cycle_start() { 946 _gc_timer_cm->register_gc_start(); 947 948 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 949 950 _g1h->trace_heap_before_gc(_gc_tracer_cm); 951 } 952 953 void G1ConcurrentMark::concurrent_cycle_end() { 954 _g1h->collector_state()->set_clearing_next_bitmap(false); 955 956 _g1h->trace_heap_after_gc(_gc_tracer_cm); 957 958 if (has_aborted()) { 959 log_info(gc, marking)("Concurrent Mark Abort"); 960 _gc_tracer_cm->report_concurrent_mode_failure(); 961 } 962 963 _gc_timer_cm->register_gc_end(); 964 965 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 966 } 967 968 void G1ConcurrentMark::mark_from_roots() { 969 _restart_for_overflow = false; 970 971 _num_concurrent_workers = calc_active_marking_workers(); 972 973 uint active_workers = MAX2(1U, _num_concurrent_workers); 974 975 // Setting active workers is not guaranteed since fewer 976 // worker threads may currently exist and more may not be 977 // available. 978 active_workers = _concurrent_workers->update_active_workers(active_workers); 979 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers()); 980 981 // Parallel task terminator is set in "set_concurrency_and_phase()" 982 set_concurrency_and_phase(active_workers, true /* concurrent */); 983 984 G1CMConcurrentMarkingTask marking_task(this); 985 _concurrent_workers->run_task(&marking_task); 986 print_stats(); 987 } 988 989 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) { 990 G1HeapVerifier* verifier = _g1h->verifier(); 991 992 verifier->verify_region_sets_optional(); 993 994 if (VerifyDuringGC) { 995 GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm); 996 997 size_t const BufLen = 512; 998 char buffer[BufLen]; 999 1000 jio_snprintf(buffer, BufLen, "During GC (%s)", caller); 1001 verifier->verify(type, vo, buffer); 1002 } 1003 1004 verifier->check_bitmaps(caller); 1005 } 1006 1007 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure { 1008 G1CollectedHeap* _g1h; 1009 G1ConcurrentMark* _cm; 1010 1011 G1PrintRegionLivenessInfoClosure _cl; 1012 1013 uint _num_regions_selected_for_rebuild; // The number of regions actually selected for rebuild. 1014 1015 void update_remset_before_rebuild(HeapRegion * hr) { 1016 G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker(); 1017 1018 size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize; 1019 bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes); 1020 if (selected_for_rebuild) { 1021 _num_regions_selected_for_rebuild++; 1022 } 1023 _cm->update_top_at_rebuild_start(hr); 1024 } 1025 1026 void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) { 1027 uint const region_idx = hr->hrm_index(); 1028 uint num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(marked_words); 1029 1030 for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) { 1031 HeapRegion* const r = _g1h->region_at(i); 1032 size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words); 1033 assert(words_to_add > 0, "Out of space to distribute before end of humongous object in region %u (starts %u)", i, region_idx); 1034 1035 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to humongous region %u (%s)", 1036 words_to_add, i, r->get_type_str()); 1037 r->add_to_marked_bytes(words_to_add * HeapWordSize); 1038 marked_words -= words_to_add; 1039 } 1040 assert(marked_words == 0, 1041 SIZE_FORMAT " words left after distributing space across %u regions", 1042 marked_words, num_regions_in_humongous); 1043 } 1044 1045 void update_marked_bytes(HeapRegion* hr) { 1046 uint const region_idx = hr->hrm_index(); 1047 size_t marked_words = _cm->liveness(region_idx); 1048 // The marking attributes the object's size completely to the humongous starts 1049 // region. We need to distribute this value across the entire set of regions a 1050 // humongous object spans. 1051 if (hr->is_humongous()) { 1052 assert(hr->is_starts_humongous() || marked_words == 0, 1053 "Should not have marked words " SIZE_FORMAT " in non-starts humongous region %u (%s)", 1054 marked_words, region_idx, hr->get_type_str()); 1055 1056 if (marked_words > 0) { 1057 distribute_marked_bytes(hr, marked_words); 1058 } 1059 } else { 1060 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1061 hr->add_to_marked_bytes(marked_words * HeapWordSize); 1062 } 1063 } 1064 1065 public: 1066 G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) : 1067 _g1h(g1h), _cm(cm), _cl("Post-Marking"), _num_regions_selected_for_rebuild(0) { } 1068 1069 virtual bool do_heap_region(HeapRegion* r) { 1070 update_remset_before_rebuild(r); 1071 update_marked_bytes(r); 1072 if (log_is_enabled(Trace, gc, liveness)) { 1073 _cl.do_heap_region(r); 1074 } 1075 r->note_end_of_marking(); 1076 return false; 1077 } 1078 1079 uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; } 1080 }; 1081 1082 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure { 1083 G1CollectedHeap* _g1h; 1084 public: 1085 G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { } 1086 1087 virtual bool do_heap_region(HeapRegion* r) { 1088 _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r); 1089 return false; 1090 } 1091 }; 1092 1093 void G1ConcurrentMark::remark() { 1094 assert_at_safepoint_on_vm_thread(); 1095 1096 // If a full collection has happened, we should not continue. However we might 1097 // have ended up here as the Remark VM operation has been scheduled already. 1098 if (has_aborted()) { 1099 return; 1100 } 1101 1102 G1Policy* g1p = _g1h->g1_policy(); 1103 g1p->record_concurrent_mark_remark_start(); 1104 1105 double start = os::elapsedTime(); 1106 1107 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before"); 1108 1109 { 1110 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1111 finalize_marking(); 1112 } 1113 1114 double mark_work_end = os::elapsedTime(); 1115 1116 bool const mark_finished = !has_overflown(); 1117 if (mark_finished) { 1118 weak_refs_work(false /* clear_all_soft_refs */); 1119 1120 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1121 // We're done with marking. 1122 // This is the end of the marking cycle, we're expected all 1123 // threads to have SATB queues with active set to true. 1124 satb_mq_set.set_active_all_threads(false, /* new active value */ 1125 true /* expected_active */); 1126 1127 { 1128 GCTraceTime(Debug, gc, phases)("Flush Task Caches"); 1129 flush_all_task_caches(); 1130 } 1131 1132 // Install newly created mark bitmap as "prev". 1133 swap_mark_bitmaps(); 1134 { 1135 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild"); 1136 G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this); 1137 _g1h->heap_region_iterate(&cl); 1138 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u", 1139 _g1h->num_regions(), cl.num_selected_for_rebuild()); 1140 } 1141 1142 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after"); 1143 1144 assert(!restart_for_overflow(), "sanity"); 1145 // Completely reset the marking state since marking completed 1146 reset_at_marking_complete(); 1147 } else { 1148 // We overflowed. Restart concurrent marking. 1149 _restart_for_overflow = true; 1150 1151 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow"); 1152 1153 // Clear the marking state because we will be restarting 1154 // marking due to overflowing the global mark stack. 1155 reset_marking_for_restart(); 1156 } 1157 1158 { 1159 GCTraceTime(Debug, gc, phases)("Report Object Count"); 1160 report_object_count(mark_finished); 1161 } 1162 1163 // Statistics 1164 double now = os::elapsedTime(); 1165 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1166 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1167 _remark_times.add((now - start) * 1000.0); 1168 1169 g1p->record_concurrent_mark_remark_end(); 1170 } 1171 1172 class G1CleanupTask : public AbstractGangTask { 1173 // Per-region work during the Cleanup pause. 1174 class G1CleanupRegionsClosure : public HeapRegionClosure { 1175 G1CollectedHeap* _g1h; 1176 size_t _freed_bytes; 1177 FreeRegionList* _local_cleanup_list; 1178 uint _old_regions_removed; 1179 uint _humongous_regions_removed; 1180 HRRSCleanupTask* _hrrs_cleanup_task; 1181 1182 public: 1183 G1CleanupRegionsClosure(G1CollectedHeap* g1, 1184 FreeRegionList* local_cleanup_list, 1185 HRRSCleanupTask* hrrs_cleanup_task) : 1186 _g1h(g1), 1187 _freed_bytes(0), 1188 _local_cleanup_list(local_cleanup_list), 1189 _old_regions_removed(0), 1190 _humongous_regions_removed(0), 1191 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1192 1193 size_t freed_bytes() { return _freed_bytes; } 1194 const uint old_regions_removed() { return _old_regions_removed; } 1195 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1196 1197 bool do_heap_region(HeapRegion *hr) { 1198 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1199 _freed_bytes += hr->used(); 1200 hr->set_containing_set(NULL); 1201 if (hr->is_humongous()) { 1202 _humongous_regions_removed++; 1203 _g1h->free_humongous_region(hr, _local_cleanup_list); 1204 } else { 1205 _old_regions_removed++; 1206 _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */); 1207 } 1208 hr->clear_cardtable(); 1209 _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index()); 1210 log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom())); 1211 } else { 1212 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1213 } 1214 1215 return false; 1216 } 1217 }; 1218 1219 G1CollectedHeap* _g1h; 1220 FreeRegionList* _cleanup_list; 1221 HeapRegionClaimer _hrclaimer; 1222 1223 public: 1224 G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1225 AbstractGangTask("G1 Cleanup"), 1226 _g1h(g1h), 1227 _cleanup_list(cleanup_list), 1228 _hrclaimer(n_workers) { 1229 1230 HeapRegionRemSet::reset_for_cleanup_tasks(); 1231 } 1232 1233 void work(uint worker_id) { 1234 FreeRegionList local_cleanup_list("Local Cleanup List"); 1235 HRRSCleanupTask hrrs_cleanup_task; 1236 G1CleanupRegionsClosure cl(_g1h, 1237 &local_cleanup_list, 1238 &hrrs_cleanup_task); 1239 _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id); 1240 assert(cl.is_complete(), "Shouldn't have aborted!"); 1241 1242 // Now update the old/humongous region sets 1243 _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed()); 1244 { 1245 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1246 _g1h->decrement_summary_bytes(cl.freed_bytes()); 1247 1248 _cleanup_list->add_ordered(&local_cleanup_list); 1249 assert(local_cleanup_list.is_empty(), "post-condition"); 1250 1251 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1252 } 1253 } 1254 }; 1255 1256 void G1ConcurrentMark::reclaim_empty_regions() { 1257 WorkGang* workers = _g1h->workers(); 1258 FreeRegionList empty_regions_list("Empty Regions After Mark List"); 1259 1260 G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers()); 1261 workers->run_task(&cl); 1262 1263 if (!empty_regions_list.is_empty()) { 1264 log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length()); 1265 // Now print the empty regions list. 1266 G1HRPrinter* hrp = _g1h->hr_printer(); 1267 if (hrp->is_active()) { 1268 FreeRegionListIterator iter(&empty_regions_list); 1269 while (iter.more_available()) { 1270 HeapRegion* hr = iter.get_next(); 1271 hrp->cleanup(hr); 1272 } 1273 } 1274 // And actually make them available. 1275 _g1h->prepend_to_freelist(&empty_regions_list); 1276 } 1277 } 1278 1279 void G1ConcurrentMark::cleanup() { 1280 assert_at_safepoint_on_vm_thread(); 1281 1282 // If a full collection has happened, we shouldn't do this. 1283 if (has_aborted()) { 1284 return; 1285 } 1286 1287 G1Policy* g1p = _g1h->g1_policy(); 1288 g1p->record_concurrent_mark_cleanup_start(); 1289 1290 double start = os::elapsedTime(); 1291 1292 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before"); 1293 1294 { 1295 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild"); 1296 G1UpdateRemSetTrackingAfterRebuild cl(_g1h); 1297 _g1h->heap_region_iterate(&cl); 1298 } 1299 1300 if (log_is_enabled(Trace, gc, liveness)) { 1301 G1PrintRegionLivenessInfoClosure cl("Post-Cleanup"); 1302 _g1h->heap_region_iterate(&cl); 1303 } 1304 1305 { 1306 GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions"); 1307 reclaim_empty_regions(); 1308 } 1309 1310 // Cleanup will have freed any regions completely full of garbage. 1311 // Update the soft reference policy with the new heap occupancy. 1312 Universe::update_heap_info_at_gc(); 1313 1314 // Clean out dead classes and update Metaspace sizes. 1315 if (ClassUnloadingWithConcurrentMark) { 1316 GCTraceTime(Debug, gc, phases)("Purge Metaspace"); 1317 ClassLoaderDataGraph::purge(); 1318 } 1319 MetaspaceGC::compute_new_size(); 1320 1321 // We reclaimed old regions so we should calculate the sizes to make 1322 // sure we update the old gen/space data. 1323 _g1h->g1mm()->update_sizes(); 1324 1325 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after"); 1326 1327 // We need to make this be a "collection" so any collection pause that 1328 // races with it goes around and waits for Cleanup to finish. 1329 _g1h->increment_total_collections(); 1330 1331 // Local statistics 1332 double recent_cleanup_time = (os::elapsedTime() - start); 1333 _total_cleanup_time += recent_cleanup_time; 1334 _cleanup_times.add(recent_cleanup_time); 1335 1336 { 1337 GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup"); 1338 _g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1339 } 1340 } 1341 1342 // Supporting Object and Oop closures for reference discovery 1343 // and processing in during marking 1344 1345 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1346 HeapWord* addr = (HeapWord*)obj; 1347 return addr != NULL && 1348 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj)); 1349 } 1350 1351 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1352 // Uses the G1CMTask associated with a worker thread (for serial reference 1353 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1354 // trace referent objects. 1355 // 1356 // Using the G1CMTask and embedded local queues avoids having the worker 1357 // threads operating on the global mark stack. This reduces the risk 1358 // of overflowing the stack - which we would rather avoid at this late 1359 // state. Also using the tasks' local queues removes the potential 1360 // of the workers interfering with each other that could occur if 1361 // operating on the global stack. 1362 1363 class G1CMKeepAliveAndDrainClosure : public OopClosure { 1364 G1ConcurrentMark* _cm; 1365 G1CMTask* _task; 1366 int _ref_counter_limit; 1367 int _ref_counter; 1368 bool _is_serial; 1369 public: 1370 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1371 _cm(cm), _task(task), _is_serial(is_serial), 1372 _ref_counter_limit(G1RefProcDrainInterval) { 1373 assert(_ref_counter_limit > 0, "sanity"); 1374 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1375 _ref_counter = _ref_counter_limit; 1376 } 1377 1378 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1379 virtual void do_oop( oop* p) { do_oop_work(p); } 1380 1381 template <class T> void do_oop_work(T* p) { 1382 if (!_cm->has_overflown()) { 1383 _task->deal_with_reference(p); 1384 _ref_counter--; 1385 1386 if (_ref_counter == 0) { 1387 // We have dealt with _ref_counter_limit references, pushing them 1388 // and objects reachable from them on to the local stack (and 1389 // possibly the global stack). Call G1CMTask::do_marking_step() to 1390 // process these entries. 1391 // 1392 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1393 // there's nothing more to do (i.e. we're done with the entries that 1394 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1395 // above) or we overflow. 1396 // 1397 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1398 // flag while there may still be some work to do. (See the comment at 1399 // the beginning of G1CMTask::do_marking_step() for those conditions - 1400 // one of which is reaching the specified time target.) It is only 1401 // when G1CMTask::do_marking_step() returns without setting the 1402 // has_aborted() flag that the marking step has completed. 1403 do { 1404 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1405 _task->do_marking_step(mark_step_duration_ms, 1406 false /* do_termination */, 1407 _is_serial); 1408 } while (_task->has_aborted() && !_cm->has_overflown()); 1409 _ref_counter = _ref_counter_limit; 1410 } 1411 } 1412 } 1413 }; 1414 1415 // 'Drain' oop closure used by both serial and parallel reference processing. 1416 // Uses the G1CMTask associated with a given worker thread (for serial 1417 // reference processing the G1CMtask for worker 0 is used). Calls the 1418 // do_marking_step routine, with an unbelievably large timeout value, 1419 // to drain the marking data structures of the remaining entries 1420 // added by the 'keep alive' oop closure above. 1421 1422 class G1CMDrainMarkingStackClosure : public VoidClosure { 1423 G1ConcurrentMark* _cm; 1424 G1CMTask* _task; 1425 bool _is_serial; 1426 public: 1427 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1428 _cm(cm), _task(task), _is_serial(is_serial) { 1429 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1430 } 1431 1432 void do_void() { 1433 do { 1434 // We call G1CMTask::do_marking_step() to completely drain the local 1435 // and global marking stacks of entries pushed by the 'keep alive' 1436 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1437 // 1438 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1439 // if there's nothing more to do (i.e. we've completely drained the 1440 // entries that were pushed as a a result of applying the 'keep alive' 1441 // closure to the entries on the discovered ref lists) or we overflow 1442 // the global marking stack. 1443 // 1444 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1445 // flag while there may still be some work to do. (See the comment at 1446 // the beginning of G1CMTask::do_marking_step() for those conditions - 1447 // one of which is reaching the specified time target.) It is only 1448 // when G1CMTask::do_marking_step() returns without setting the 1449 // has_aborted() flag that the marking step has completed. 1450 1451 _task->do_marking_step(1000000000.0 /* something very large */, 1452 true /* do_termination */, 1453 _is_serial); 1454 } while (_task->has_aborted() && !_cm->has_overflown()); 1455 } 1456 }; 1457 1458 // Implementation of AbstractRefProcTaskExecutor for parallel 1459 // reference processing at the end of G1 concurrent marking 1460 1461 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor { 1462 private: 1463 G1CollectedHeap* _g1h; 1464 G1ConcurrentMark* _cm; 1465 WorkGang* _workers; 1466 uint _active_workers; 1467 1468 public: 1469 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1470 G1ConcurrentMark* cm, 1471 WorkGang* workers, 1472 uint n_workers) : 1473 _g1h(g1h), _cm(cm), 1474 _workers(workers), _active_workers(n_workers) { } 1475 1476 // Executes the given task using concurrent marking worker threads. 1477 virtual void execute(ProcessTask& task); 1478 virtual void execute(EnqueueTask& task); 1479 }; 1480 1481 class G1CMRefProcTaskProxy : public AbstractGangTask { 1482 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1483 ProcessTask& _proc_task; 1484 G1CollectedHeap* _g1h; 1485 G1ConcurrentMark* _cm; 1486 1487 public: 1488 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1489 G1CollectedHeap* g1h, 1490 G1ConcurrentMark* cm) : 1491 AbstractGangTask("Process reference objects in parallel"), 1492 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1493 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1494 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1495 } 1496 1497 virtual void work(uint worker_id) { 1498 ResourceMark rm; 1499 HandleMark hm; 1500 G1CMTask* task = _cm->task(worker_id); 1501 G1CMIsAliveClosure g1_is_alive(_g1h); 1502 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1503 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1504 1505 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1506 } 1507 }; 1508 1509 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1510 assert(_workers != NULL, "Need parallel worker threads."); 1511 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1512 1513 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1514 1515 // We need to reset the concurrency level before each 1516 // proxy task execution, so that the termination protocol 1517 // and overflow handling in G1CMTask::do_marking_step() knows 1518 // how many workers to wait for. 1519 _cm->set_concurrency(_active_workers); 1520 _workers->run_task(&proc_task_proxy); 1521 } 1522 1523 class G1CMRefEnqueueTaskProxy : public AbstractGangTask { 1524 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1525 EnqueueTask& _enq_task; 1526 1527 public: 1528 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1529 AbstractGangTask("Enqueue reference objects in parallel"), 1530 _enq_task(enq_task) { } 1531 1532 virtual void work(uint worker_id) { 1533 _enq_task.work(worker_id); 1534 } 1535 }; 1536 1537 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1538 assert(_workers != NULL, "Need parallel worker threads."); 1539 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1540 1541 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1542 1543 // Not strictly necessary but... 1544 // 1545 // We need to reset the concurrency level before each 1546 // proxy task execution, so that the termination protocol 1547 // and overflow handling in G1CMTask::do_marking_step() knows 1548 // how many workers to wait for. 1549 _cm->set_concurrency(_active_workers); 1550 _workers->run_task(&enq_task_proxy); 1551 } 1552 1553 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) { 1554 ResourceMark rm; 1555 HandleMark hm; 1556 1557 // Is alive closure. 1558 G1CMIsAliveClosure g1_is_alive(_g1h); 1559 1560 // Inner scope to exclude the cleaning of the string and symbol 1561 // tables from the displayed time. 1562 { 1563 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1564 1565 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1566 1567 // See the comment in G1CollectedHeap::ref_processing_init() 1568 // about how reference processing currently works in G1. 1569 1570 // Set the soft reference policy 1571 rp->setup_policy(clear_all_soft_refs); 1572 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1573 1574 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1575 // in serial reference processing. Note these closures are also 1576 // used for serially processing (by the the current thread) the 1577 // JNI references during parallel reference processing. 1578 // 1579 // These closures do not need to synchronize with the worker 1580 // threads involved in parallel reference processing as these 1581 // instances are executed serially by the current thread (e.g. 1582 // reference processing is not multi-threaded and is thus 1583 // performed by the current thread instead of a gang worker). 1584 // 1585 // The gang tasks involved in parallel reference processing create 1586 // their own instances of these closures, which do their own 1587 // synchronization among themselves. 1588 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1589 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1590 1591 // We need at least one active thread. If reference processing 1592 // is not multi-threaded we use the current (VMThread) thread, 1593 // otherwise we use the work gang from the G1CollectedHeap and 1594 // we utilize all the worker threads we can. 1595 bool processing_is_mt = rp->processing_is_mt(); 1596 uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U); 1597 active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U); 1598 1599 // Parallel processing task executor. 1600 G1CMRefProcTaskExecutor par_task_executor(_g1h, this, 1601 _g1h->workers(), active_workers); 1602 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1603 1604 // Set the concurrency level. The phase was already set prior to 1605 // executing the remark task. 1606 set_concurrency(active_workers); 1607 1608 // Set the degree of MT processing here. If the discovery was done MT, 1609 // the number of threads involved during discovery could differ from 1610 // the number of active workers. This is OK as long as the discovered 1611 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1612 rp->set_active_mt_degree(active_workers); 1613 1614 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1615 1616 // Process the weak references. 1617 const ReferenceProcessorStats& stats = 1618 rp->process_discovered_references(&g1_is_alive, 1619 &g1_keep_alive, 1620 &g1_drain_mark_stack, 1621 executor, 1622 &pt); 1623 _gc_tracer_cm->report_gc_reference_stats(stats); 1624 pt.print_all_references(); 1625 1626 // The do_oop work routines of the keep_alive and drain_marking_stack 1627 // oop closures will set the has_overflown flag if we overflow the 1628 // global marking stack. 1629 1630 assert(has_overflown() || _global_mark_stack.is_empty(), 1631 "Mark stack should be empty (unless it has overflown)"); 1632 1633 assert(rp->num_q() == active_workers, "why not"); 1634 1635 rp->enqueue_discovered_references(executor, &pt); 1636 1637 rp->verify_no_references_recorded(); 1638 1639 pt.print_enqueue_phase(); 1640 1641 assert(!rp->discovery_enabled(), "Post condition"); 1642 } 1643 1644 assert(has_overflown() || _global_mark_stack.is_empty(), 1645 "Mark stack should be empty (unless it has overflown)"); 1646 1647 { 1648 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm); 1649 WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl); 1650 } 1651 1652 if (has_overflown()) { 1653 // We can not trust g1_is_alive if the marking stack overflowed 1654 return; 1655 } 1656 1657 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1658 1659 // Unload Klasses, String, Symbols, Code Cache, etc. 1660 if (ClassUnloadingWithConcurrentMark) { 1661 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1662 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1663 _g1h->complete_cleaning(&g1_is_alive, purged_classes); 1664 } else { 1665 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1666 // No need to clean string table and symbol table as they are treated as strong roots when 1667 // class unloading is disabled. 1668 _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1669 } 1670 } 1671 1672 // When sampling object counts, we already swapped the mark bitmaps, so we need to use 1673 // the prev bitmap determining liveness. 1674 class G1ObjectCountIsAliveClosure: public BoolObjectClosure { 1675 G1CollectedHeap* _g1; 1676 public: 1677 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { } 1678 1679 bool do_object_b(oop obj) { 1680 HeapWord* addr = (HeapWord*)obj; 1681 return addr != NULL && 1682 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_dead(obj)); 1683 } 1684 }; 1685 1686 void G1ConcurrentMark::report_object_count(bool mark_completed) { 1687 // Depending on the completion of the marking liveness needs to be determined 1688 // using either the next or prev bitmap. 1689 if (mark_completed) { 1690 G1ObjectCountIsAliveClosure is_alive(_g1h); 1691 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1692 } else { 1693 G1CMIsAliveClosure is_alive(_g1h); 1694 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1695 } 1696 } 1697 1698 1699 void G1ConcurrentMark::swap_mark_bitmaps() { 1700 G1CMBitMap* temp = _prev_mark_bitmap; 1701 _prev_mark_bitmap = _next_mark_bitmap; 1702 _next_mark_bitmap = temp; 1703 _g1h->collector_state()->set_clearing_next_bitmap(true); 1704 } 1705 1706 // Closure for marking entries in SATB buffers. 1707 class G1CMSATBBufferClosure : public SATBBufferClosure { 1708 private: 1709 G1CMTask* _task; 1710 G1CollectedHeap* _g1h; 1711 1712 // This is very similar to G1CMTask::deal_with_reference, but with 1713 // more relaxed requirements for the argument, so this must be more 1714 // circumspect about treating the argument as an object. 1715 void do_entry(void* entry) const { 1716 _task->increment_refs_reached(); 1717 oop const obj = static_cast<oop>(entry); 1718 _task->make_reference_grey(obj); 1719 } 1720 1721 public: 1722 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1723 : _task(task), _g1h(g1h) { } 1724 1725 virtual void do_buffer(void** buffer, size_t size) { 1726 for (size_t i = 0; i < size; ++i) { 1727 do_entry(buffer[i]); 1728 } 1729 } 1730 }; 1731 1732 class G1RemarkThreadsClosure : public ThreadClosure { 1733 G1CMSATBBufferClosure _cm_satb_cl; 1734 G1CMOopClosure _cm_cl; 1735 MarkingCodeBlobClosure _code_cl; 1736 int _thread_parity; 1737 1738 public: 1739 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1740 _cm_satb_cl(task, g1h), 1741 _cm_cl(g1h, task), 1742 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1743 _thread_parity(Threads::thread_claim_parity()) {} 1744 1745 void do_thread(Thread* thread) { 1746 if (thread->is_Java_thread()) { 1747 if (thread->claim_oops_do(true, _thread_parity)) { 1748 JavaThread* jt = (JavaThread*)thread; 1749 1750 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1751 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1752 // * Alive if on the stack of an executing method 1753 // * Weakly reachable otherwise 1754 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1755 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1756 jt->nmethods_do(&_code_cl); 1757 1758 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1759 } 1760 } else if (thread->is_VM_thread()) { 1761 if (thread->claim_oops_do(true, _thread_parity)) { 1762 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1763 } 1764 } 1765 } 1766 }; 1767 1768 class G1CMRemarkTask : public AbstractGangTask { 1769 G1ConcurrentMark* _cm; 1770 public: 1771 void work(uint worker_id) { 1772 G1CMTask* task = _cm->task(worker_id); 1773 task->record_start_time(); 1774 { 1775 ResourceMark rm; 1776 HandleMark hm; 1777 1778 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1779 Threads::threads_do(&threads_f); 1780 } 1781 1782 do { 1783 task->do_marking_step(1000000000.0 /* something very large */, 1784 true /* do_termination */, 1785 false /* is_serial */); 1786 } while (task->has_aborted() && !_cm->has_overflown()); 1787 // If we overflow, then we do not want to restart. We instead 1788 // want to abort remark and do concurrent marking again. 1789 task->record_end_time(); 1790 } 1791 1792 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1793 AbstractGangTask("Par Remark"), _cm(cm) { 1794 _cm->terminator()->reset_for_reuse(active_workers); 1795 } 1796 }; 1797 1798 void G1ConcurrentMark::finalize_marking() { 1799 ResourceMark rm; 1800 HandleMark hm; 1801 1802 _g1h->ensure_parsability(false); 1803 1804 // this is remark, so we'll use up all active threads 1805 uint active_workers = _g1h->workers()->active_workers(); 1806 set_concurrency_and_phase(active_workers, false /* concurrent */); 1807 // Leave _parallel_marking_threads at it's 1808 // value originally calculated in the G1ConcurrentMark 1809 // constructor and pass values of the active workers 1810 // through the gang in the task. 1811 1812 { 1813 StrongRootsScope srs(active_workers); 1814 1815 G1CMRemarkTask remarkTask(this, active_workers); 1816 // We will start all available threads, even if we decide that the 1817 // active_workers will be fewer. The extra ones will just bail out 1818 // immediately. 1819 _g1h->workers()->run_task(&remarkTask); 1820 } 1821 1822 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1823 guarantee(has_overflown() || 1824 satb_mq_set.completed_buffers_num() == 0, 1825 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1826 BOOL_TO_STR(has_overflown()), 1827 satb_mq_set.completed_buffers_num()); 1828 1829 print_stats(); 1830 } 1831 1832 void G1ConcurrentMark::flush_all_task_caches() { 1833 size_t hits = 0; 1834 size_t misses = 0; 1835 for (uint i = 0; i < _max_num_tasks; i++) { 1836 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache(); 1837 hits += stats.first; 1838 misses += stats.second; 1839 } 1840 size_t sum = hits + misses; 1841 log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf", 1842 hits, misses, percent_of(hits, sum)); 1843 } 1844 1845 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) { 1846 _prev_mark_bitmap->clear_range(mr); 1847 } 1848 1849 HeapRegion* 1850 G1ConcurrentMark::claim_region(uint worker_id) { 1851 // "checkpoint" the finger 1852 HeapWord* finger = _finger; 1853 1854 while (finger < _heap.end()) { 1855 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1856 1857 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1858 // Make sure that the reads below do not float before loading curr_region. 1859 OrderAccess::loadload(); 1860 // Above heap_region_containing may return NULL as we always scan claim 1861 // until the end of the heap. In this case, just jump to the next region. 1862 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1863 1864 // Is the gap between reading the finger and doing the CAS too long? 1865 HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); 1866 if (res == finger && curr_region != NULL) { 1867 // we succeeded 1868 HeapWord* bottom = curr_region->bottom(); 1869 HeapWord* limit = curr_region->next_top_at_mark_start(); 1870 1871 // notice that _finger == end cannot be guaranteed here since, 1872 // someone else might have moved the finger even further 1873 assert(_finger >= end, "the finger should have moved forward"); 1874 1875 if (limit > bottom) { 1876 return curr_region; 1877 } else { 1878 assert(limit == bottom, 1879 "the region limit should be at bottom"); 1880 // we return NULL and the caller should try calling 1881 // claim_region() again. 1882 return NULL; 1883 } 1884 } else { 1885 assert(_finger > finger, "the finger should have moved forward"); 1886 // read it again 1887 finger = _finger; 1888 } 1889 } 1890 1891 return NULL; 1892 } 1893 1894 #ifndef PRODUCT 1895 class VerifyNoCSetOops { 1896 G1CollectedHeap* _g1h; 1897 const char* _phase; 1898 int _info; 1899 1900 public: 1901 VerifyNoCSetOops(const char* phase, int info = -1) : 1902 _g1h(G1CollectedHeap::heap()), 1903 _phase(phase), 1904 _info(info) 1905 { } 1906 1907 void operator()(G1TaskQueueEntry task_entry) const { 1908 if (task_entry.is_array_slice()) { 1909 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1910 return; 1911 } 1912 guarantee(oopDesc::is_oop(task_entry.obj()), 1913 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1914 p2i(task_entry.obj()), _phase, _info); 1915 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1916 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1917 p2i(task_entry.obj()), _phase, _info); 1918 } 1919 }; 1920 1921 void G1ConcurrentMark::verify_no_cset_oops() { 1922 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1923 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 1924 return; 1925 } 1926 1927 // Verify entries on the global mark stack 1928 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1929 1930 // Verify entries on the task queues 1931 for (uint i = 0; i < _max_num_tasks; ++i) { 1932 G1CMTaskQueue* queue = _task_queues->queue(i); 1933 queue->iterate(VerifyNoCSetOops("Queue", i)); 1934 } 1935 1936 // Verify the global finger 1937 HeapWord* global_finger = finger(); 1938 if (global_finger != NULL && global_finger < _heap.end()) { 1939 // Since we always iterate over all regions, we might get a NULL HeapRegion 1940 // here. 1941 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1942 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1943 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1944 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1945 } 1946 1947 // Verify the task fingers 1948 assert(_num_concurrent_workers <= _max_num_tasks, "sanity"); 1949 for (uint i = 0; i < _num_concurrent_workers; ++i) { 1950 G1CMTask* task = _tasks[i]; 1951 HeapWord* task_finger = task->finger(); 1952 if (task_finger != NULL && task_finger < _heap.end()) { 1953 // See above note on the global finger verification. 1954 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1955 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1956 !task_hr->in_collection_set(), 1957 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1958 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1959 } 1960 } 1961 } 1962 #endif // PRODUCT 1963 1964 void G1ConcurrentMark::rebuild_rem_set_concurrently() { 1965 _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset); 1966 } 1967 1968 void G1ConcurrentMark::print_stats() { 1969 if (!log_is_enabled(Debug, gc, stats)) { 1970 return; 1971 } 1972 log_debug(gc, stats)("---------------------------------------------------------------------"); 1973 for (size_t i = 0; i < _num_active_tasks; ++i) { 1974 _tasks[i]->print_stats(); 1975 log_debug(gc, stats)("---------------------------------------------------------------------"); 1976 } 1977 } 1978 1979 void G1ConcurrentMark::concurrent_cycle_abort() { 1980 if (!cm_thread()->during_cycle() || _has_aborted) { 1981 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 1982 return; 1983 } 1984 1985 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 1986 // concurrent bitmap clearing. 1987 { 1988 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 1989 clear_bitmap(_next_mark_bitmap, _g1h->workers(), false); 1990 } 1991 // Note we cannot clear the previous marking bitmap here 1992 // since VerifyDuringGC verifies the objects marked during 1993 // a full GC against the previous bitmap. 1994 1995 // Empty mark stack 1996 reset_marking_for_restart(); 1997 for (uint i = 0; i < _max_num_tasks; ++i) { 1998 _tasks[i]->clear_region_fields(); 1999 } 2000 _first_overflow_barrier_sync.abort(); 2001 _second_overflow_barrier_sync.abort(); 2002 _has_aborted = true; 2003 2004 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2005 satb_mq_set.abandon_partial_marking(); 2006 // This can be called either during or outside marking, we'll read 2007 // the expected_active value from the SATB queue set. 2008 satb_mq_set.set_active_all_threads( 2009 false, /* new active value */ 2010 satb_mq_set.is_active() /* expected_active */); 2011 } 2012 2013 static void print_ms_time_info(const char* prefix, const char* name, 2014 NumberSeq& ns) { 2015 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2016 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2017 if (ns.num() > 0) { 2018 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2019 prefix, ns.sd(), ns.maximum()); 2020 } 2021 } 2022 2023 void G1ConcurrentMark::print_summary_info() { 2024 Log(gc, marking) log; 2025 if (!log.is_trace()) { 2026 return; 2027 } 2028 2029 log.trace(" Concurrent marking:"); 2030 print_ms_time_info(" ", "init marks", _init_times); 2031 print_ms_time_info(" ", "remarks", _remark_times); 2032 { 2033 print_ms_time_info(" ", "final marks", _remark_mark_times); 2034 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2035 2036 } 2037 print_ms_time_info(" ", "cleanups", _cleanup_times); 2038 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2039 _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2040 log.trace(" Total stop_world time = %8.2f s.", 2041 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2042 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2043 cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum()); 2044 } 2045 2046 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2047 _concurrent_workers->print_worker_threads_on(st); 2048 } 2049 2050 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2051 _concurrent_workers->threads_do(tc); 2052 } 2053 2054 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2055 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2056 p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap)); 2057 _prev_mark_bitmap->print_on_error(st, " Prev Bits: "); 2058 _next_mark_bitmap->print_on_error(st, " Next Bits: "); 2059 } 2060 2061 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2062 ReferenceProcessor* result = g1h->ref_processor_cm(); 2063 assert(result != NULL, "CM reference processor should not be NULL"); 2064 return result; 2065 } 2066 2067 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2068 G1CMTask* task) 2069 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2070 _g1h(g1h), _task(task) 2071 { } 2072 2073 void G1CMTask::setup_for_region(HeapRegion* hr) { 2074 assert(hr != NULL, 2075 "claim_region() should have filtered out NULL regions"); 2076 _curr_region = hr; 2077 _finger = hr->bottom(); 2078 update_region_limit(); 2079 } 2080 2081 void G1CMTask::update_region_limit() { 2082 HeapRegion* hr = _curr_region; 2083 HeapWord* bottom = hr->bottom(); 2084 HeapWord* limit = hr->next_top_at_mark_start(); 2085 2086 if (limit == bottom) { 2087 // The region was collected underneath our feet. 2088 // We set the finger to bottom to ensure that the bitmap 2089 // iteration that will follow this will not do anything. 2090 // (this is not a condition that holds when we set the region up, 2091 // as the region is not supposed to be empty in the first place) 2092 _finger = bottom; 2093 } else if (limit >= _region_limit) { 2094 assert(limit >= _finger, "peace of mind"); 2095 } else { 2096 assert(limit < _region_limit, "only way to get here"); 2097 // This can happen under some pretty unusual circumstances. An 2098 // evacuation pause empties the region underneath our feet (NTAMS 2099 // at bottom). We then do some allocation in the region (NTAMS 2100 // stays at bottom), followed by the region being used as a GC 2101 // alloc region (NTAMS will move to top() and the objects 2102 // originally below it will be grayed). All objects now marked in 2103 // the region are explicitly grayed, if below the global finger, 2104 // and we do not need in fact to scan anything else. So, we simply 2105 // set _finger to be limit to ensure that the bitmap iteration 2106 // doesn't do anything. 2107 _finger = limit; 2108 } 2109 2110 _region_limit = limit; 2111 } 2112 2113 void G1CMTask::giveup_current_region() { 2114 assert(_curr_region != NULL, "invariant"); 2115 clear_region_fields(); 2116 } 2117 2118 void G1CMTask::clear_region_fields() { 2119 // Values for these three fields that indicate that we're not 2120 // holding on to a region. 2121 _curr_region = NULL; 2122 _finger = NULL; 2123 _region_limit = NULL; 2124 } 2125 2126 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2127 if (cm_oop_closure == NULL) { 2128 assert(_cm_oop_closure != NULL, "invariant"); 2129 } else { 2130 assert(_cm_oop_closure == NULL, "invariant"); 2131 } 2132 _cm_oop_closure = cm_oop_closure; 2133 } 2134 2135 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { 2136 guarantee(next_mark_bitmap != NULL, "invariant"); 2137 _next_mark_bitmap = next_mark_bitmap; 2138 clear_region_fields(); 2139 2140 _calls = 0; 2141 _elapsed_time_ms = 0.0; 2142 _termination_time_ms = 0.0; 2143 _termination_start_time_ms = 0.0; 2144 2145 _mark_stats_cache.reset(); 2146 } 2147 2148 bool G1CMTask::should_exit_termination() { 2149 regular_clock_call(); 2150 // This is called when we are in the termination protocol. We should 2151 // quit if, for some reason, this task wants to abort or the global 2152 // stack is not empty (this means that we can get work from it). 2153 return !_cm->mark_stack_empty() || has_aborted(); 2154 } 2155 2156 void G1CMTask::reached_limit() { 2157 assert(_words_scanned >= _words_scanned_limit || 2158 _refs_reached >= _refs_reached_limit , 2159 "shouldn't have been called otherwise"); 2160 regular_clock_call(); 2161 } 2162 2163 void G1CMTask::regular_clock_call() { 2164 if (has_aborted()) { 2165 return; 2166 } 2167 2168 // First, we need to recalculate the words scanned and refs reached 2169 // limits for the next clock call. 2170 recalculate_limits(); 2171 2172 // During the regular clock call we do the following 2173 2174 // (1) If an overflow has been flagged, then we abort. 2175 if (_cm->has_overflown()) { 2176 set_has_aborted(); 2177 return; 2178 } 2179 2180 // If we are not concurrent (i.e. we're doing remark) we don't need 2181 // to check anything else. The other steps are only needed during 2182 // the concurrent marking phase. 2183 if (!_cm->concurrent()) { 2184 return; 2185 } 2186 2187 // (2) If marking has been aborted for Full GC, then we also abort. 2188 if (_cm->has_aborted()) { 2189 set_has_aborted(); 2190 return; 2191 } 2192 2193 double curr_time_ms = os::elapsedVTime() * 1000.0; 2194 2195 // (4) We check whether we should yield. If we have to, then we abort. 2196 if (SuspendibleThreadSet::should_yield()) { 2197 // We should yield. To do this we abort the task. The caller is 2198 // responsible for yielding. 2199 set_has_aborted(); 2200 return; 2201 } 2202 2203 // (5) We check whether we've reached our time quota. If we have, 2204 // then we abort. 2205 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2206 if (elapsed_time_ms > _time_target_ms) { 2207 set_has_aborted(); 2208 _has_timed_out = true; 2209 return; 2210 } 2211 2212 // (6) Finally, we check whether there are enough completed STAB 2213 // buffers available for processing. If there are, we abort. 2214 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2215 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2216 // we do need to process SATB buffers, we'll abort and restart 2217 // the marking task to do so 2218 set_has_aborted(); 2219 return; 2220 } 2221 } 2222 2223 void G1CMTask::recalculate_limits() { 2224 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2225 _words_scanned_limit = _real_words_scanned_limit; 2226 2227 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2228 _refs_reached_limit = _real_refs_reached_limit; 2229 } 2230 2231 void G1CMTask::decrease_limits() { 2232 // This is called when we believe that we're going to do an infrequent 2233 // operation which will increase the per byte scanned cost (i.e. move 2234 // entries to/from the global stack). It basically tries to decrease the 2235 // scanning limit so that the clock is called earlier. 2236 2237 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4; 2238 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4; 2239 } 2240 2241 void G1CMTask::move_entries_to_global_stack() { 2242 // Local array where we'll store the entries that will be popped 2243 // from the local queue. 2244 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2245 2246 size_t n = 0; 2247 G1TaskQueueEntry task_entry; 2248 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2249 buffer[n] = task_entry; 2250 ++n; 2251 } 2252 if (n < G1CMMarkStack::EntriesPerChunk) { 2253 buffer[n] = G1TaskQueueEntry(); 2254 } 2255 2256 if (n > 0) { 2257 if (!_cm->mark_stack_push(buffer)) { 2258 set_has_aborted(); 2259 } 2260 } 2261 2262 // This operation was quite expensive, so decrease the limits. 2263 decrease_limits(); 2264 } 2265 2266 bool G1CMTask::get_entries_from_global_stack() { 2267 // Local array where we'll store the entries that will be popped 2268 // from the global stack. 2269 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2270 2271 if (!_cm->mark_stack_pop(buffer)) { 2272 return false; 2273 } 2274 2275 // We did actually pop at least one entry. 2276 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2277 G1TaskQueueEntry task_entry = buffer[i]; 2278 if (task_entry.is_null()) { 2279 break; 2280 } 2281 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2282 bool success = _task_queue->push(task_entry); 2283 // We only call this when the local queue is empty or under a 2284 // given target limit. So, we do not expect this push to fail. 2285 assert(success, "invariant"); 2286 } 2287 2288 // This operation was quite expensive, so decrease the limits 2289 decrease_limits(); 2290 return true; 2291 } 2292 2293 void G1CMTask::drain_local_queue(bool partially) { 2294 if (has_aborted()) { 2295 return; 2296 } 2297 2298 // Decide what the target size is, depending whether we're going to 2299 // drain it partially (so that other tasks can steal if they run out 2300 // of things to do) or totally (at the very end). 2301 size_t target_size; 2302 if (partially) { 2303 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2304 } else { 2305 target_size = 0; 2306 } 2307 2308 if (_task_queue->size() > target_size) { 2309 G1TaskQueueEntry entry; 2310 bool ret = _task_queue->pop_local(entry); 2311 while (ret) { 2312 scan_task_entry(entry); 2313 if (_task_queue->size() <= target_size || has_aborted()) { 2314 ret = false; 2315 } else { 2316 ret = _task_queue->pop_local(entry); 2317 } 2318 } 2319 } 2320 } 2321 2322 void G1CMTask::drain_global_stack(bool partially) { 2323 if (has_aborted()) { 2324 return; 2325 } 2326 2327 // We have a policy to drain the local queue before we attempt to 2328 // drain the global stack. 2329 assert(partially || _task_queue->size() == 0, "invariant"); 2330 2331 // Decide what the target size is, depending whether we're going to 2332 // drain it partially (so that other tasks can steal if they run out 2333 // of things to do) or totally (at the very end). 2334 // Notice that when draining the global mark stack partially, due to the racyness 2335 // of the mark stack size update we might in fact drop below the target. But, 2336 // this is not a problem. 2337 // In case of total draining, we simply process until the global mark stack is 2338 // totally empty, disregarding the size counter. 2339 if (partially) { 2340 size_t const target_size = _cm->partial_mark_stack_size_target(); 2341 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2342 if (get_entries_from_global_stack()) { 2343 drain_local_queue(partially); 2344 } 2345 } 2346 } else { 2347 while (!has_aborted() && get_entries_from_global_stack()) { 2348 drain_local_queue(partially); 2349 } 2350 } 2351 } 2352 2353 // SATB Queue has several assumptions on whether to call the par or 2354 // non-par versions of the methods. this is why some of the code is 2355 // replicated. We should really get rid of the single-threaded version 2356 // of the code to simplify things. 2357 void G1CMTask::drain_satb_buffers() { 2358 if (has_aborted()) { 2359 return; 2360 } 2361 2362 // We set this so that the regular clock knows that we're in the 2363 // middle of draining buffers and doesn't set the abort flag when it 2364 // notices that SATB buffers are available for draining. It'd be 2365 // very counter productive if it did that. :-) 2366 _draining_satb_buffers = true; 2367 2368 G1CMSATBBufferClosure satb_cl(this, _g1h); 2369 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2370 2371 // This keeps claiming and applying the closure to completed buffers 2372 // until we run out of buffers or we need to abort. 2373 while (!has_aborted() && 2374 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2375 regular_clock_call(); 2376 } 2377 2378 _draining_satb_buffers = false; 2379 2380 assert(has_aborted() || 2381 _cm->concurrent() || 2382 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2383 2384 // again, this was a potentially expensive operation, decrease the 2385 // limits to get the regular clock call early 2386 decrease_limits(); 2387 } 2388 2389 void G1CMTask::clear_mark_stats_cache(uint region_idx) { 2390 _mark_stats_cache.reset(region_idx); 2391 } 2392 2393 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() { 2394 return _mark_stats_cache.evict_all(); 2395 } 2396 2397 void G1CMTask::print_stats() { 2398 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls); 2399 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2400 _elapsed_time_ms, _termination_time_ms); 2401 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms", 2402 _step_times_ms.num(), 2403 _step_times_ms.avg(), 2404 _step_times_ms.sd(), 2405 _step_times_ms.maximum(), 2406 _step_times_ms.sum()); 2407 size_t const hits = _mark_stats_cache.hits(); 2408 size_t const misses = _mark_stats_cache.misses(); 2409 log_debug(gc, stats)(" Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f", 2410 hits, misses, percent_of(hits, hits + misses)); 2411 } 2412 2413 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2414 return _task_queues->steal(worker_id, hash_seed, task_entry); 2415 } 2416 2417 /***************************************************************************** 2418 2419 The do_marking_step(time_target_ms, ...) method is the building 2420 block of the parallel marking framework. It can be called in parallel 2421 with other invocations of do_marking_step() on different tasks 2422 (but only one per task, obviously) and concurrently with the 2423 mutator threads, or during remark, hence it eliminates the need 2424 for two versions of the code. When called during remark, it will 2425 pick up from where the task left off during the concurrent marking 2426 phase. Interestingly, tasks are also claimable during evacuation 2427 pauses too, since do_marking_step() ensures that it aborts before 2428 it needs to yield. 2429 2430 The data structures that it uses to do marking work are the 2431 following: 2432 2433 (1) Marking Bitmap. If there are gray objects that appear only 2434 on the bitmap (this happens either when dealing with an overflow 2435 or when the initial marking phase has simply marked the roots 2436 and didn't push them on the stack), then tasks claim heap 2437 regions whose bitmap they then scan to find gray objects. A 2438 global finger indicates where the end of the last claimed region 2439 is. A local finger indicates how far into the region a task has 2440 scanned. The two fingers are used to determine how to gray an 2441 object (i.e. whether simply marking it is OK, as it will be 2442 visited by a task in the future, or whether it needs to be also 2443 pushed on a stack). 2444 2445 (2) Local Queue. The local queue of the task which is accessed 2446 reasonably efficiently by the task. Other tasks can steal from 2447 it when they run out of work. Throughout the marking phase, a 2448 task attempts to keep its local queue short but not totally 2449 empty, so that entries are available for stealing by other 2450 tasks. Only when there is no more work, a task will totally 2451 drain its local queue. 2452 2453 (3) Global Mark Stack. This handles local queue overflow. During 2454 marking only sets of entries are moved between it and the local 2455 queues, as access to it requires a mutex and more fine-grain 2456 interaction with it which might cause contention. If it 2457 overflows, then the marking phase should restart and iterate 2458 over the bitmap to identify gray objects. Throughout the marking 2459 phase, tasks attempt to keep the global mark stack at a small 2460 length but not totally empty, so that entries are available for 2461 popping by other tasks. Only when there is no more work, tasks 2462 will totally drain the global mark stack. 2463 2464 (4) SATB Buffer Queue. This is where completed SATB buffers are 2465 made available. Buffers are regularly removed from this queue 2466 and scanned for roots, so that the queue doesn't get too 2467 long. During remark, all completed buffers are processed, as 2468 well as the filled in parts of any uncompleted buffers. 2469 2470 The do_marking_step() method tries to abort when the time target 2471 has been reached. There are a few other cases when the 2472 do_marking_step() method also aborts: 2473 2474 (1) When the marking phase has been aborted (after a Full GC). 2475 2476 (2) When a global overflow (on the global stack) has been 2477 triggered. Before the task aborts, it will actually sync up with 2478 the other tasks to ensure that all the marking data structures 2479 (local queues, stacks, fingers etc.) are re-initialized so that 2480 when do_marking_step() completes, the marking phase can 2481 immediately restart. 2482 2483 (3) When enough completed SATB buffers are available. The 2484 do_marking_step() method only tries to drain SATB buffers right 2485 at the beginning. So, if enough buffers are available, the 2486 marking step aborts and the SATB buffers are processed at 2487 the beginning of the next invocation. 2488 2489 (4) To yield. when we have to yield then we abort and yield 2490 right at the end of do_marking_step(). This saves us from a lot 2491 of hassle as, by yielding we might allow a Full GC. If this 2492 happens then objects will be compacted underneath our feet, the 2493 heap might shrink, etc. We save checking for this by just 2494 aborting and doing the yield right at the end. 2495 2496 From the above it follows that the do_marking_step() method should 2497 be called in a loop (or, otherwise, regularly) until it completes. 2498 2499 If a marking step completes without its has_aborted() flag being 2500 true, it means it has completed the current marking phase (and 2501 also all other marking tasks have done so and have all synced up). 2502 2503 A method called regular_clock_call() is invoked "regularly" (in 2504 sub ms intervals) throughout marking. It is this clock method that 2505 checks all the abort conditions which were mentioned above and 2506 decides when the task should abort. A work-based scheme is used to 2507 trigger this clock method: when the number of object words the 2508 marking phase has scanned or the number of references the marking 2509 phase has visited reach a given limit. Additional invocations to 2510 the method clock have been planted in a few other strategic places 2511 too. The initial reason for the clock method was to avoid calling 2512 vtime too regularly, as it is quite expensive. So, once it was in 2513 place, it was natural to piggy-back all the other conditions on it 2514 too and not constantly check them throughout the code. 2515 2516 If do_termination is true then do_marking_step will enter its 2517 termination protocol. 2518 2519 The value of is_serial must be true when do_marking_step is being 2520 called serially (i.e. by the VMThread) and do_marking_step should 2521 skip any synchronization in the termination and overflow code. 2522 Examples include the serial remark code and the serial reference 2523 processing closures. 2524 2525 The value of is_serial must be false when do_marking_step is 2526 being called by any of the worker threads in a work gang. 2527 Examples include the concurrent marking code (CMMarkingTask), 2528 the MT remark code, and the MT reference processing closures. 2529 2530 *****************************************************************************/ 2531 2532 void G1CMTask::do_marking_step(double time_target_ms, 2533 bool do_termination, 2534 bool is_serial) { 2535 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2536 2537 _start_time_ms = os::elapsedVTime() * 1000.0; 2538 2539 // If do_stealing is true then do_marking_step will attempt to 2540 // steal work from the other G1CMTasks. It only makes sense to 2541 // enable stealing when the termination protocol is enabled 2542 // and do_marking_step() is not being called serially. 2543 bool do_stealing = do_termination && !is_serial; 2544 2545 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2546 _time_target_ms = time_target_ms - diff_prediction_ms; 2547 2548 // set up the variables that are used in the work-based scheme to 2549 // call the regular clock method 2550 _words_scanned = 0; 2551 _refs_reached = 0; 2552 recalculate_limits(); 2553 2554 // clear all flags 2555 clear_has_aborted(); 2556 _has_timed_out = false; 2557 _draining_satb_buffers = false; 2558 2559 ++_calls; 2560 2561 // Set up the bitmap and oop closures. Anything that uses them is 2562 // eventually called from this method, so it is OK to allocate these 2563 // statically. 2564 G1CMBitMapClosure bitmap_closure(this, _cm); 2565 G1CMOopClosure cm_oop_closure(_g1h, this); 2566 set_cm_oop_closure(&cm_oop_closure); 2567 2568 if (_cm->has_overflown()) { 2569 // This can happen if the mark stack overflows during a GC pause 2570 // and this task, after a yield point, restarts. We have to abort 2571 // as we need to get into the overflow protocol which happens 2572 // right at the end of this task. 2573 set_has_aborted(); 2574 } 2575 2576 // First drain any available SATB buffers. After this, we will not 2577 // look at SATB buffers before the next invocation of this method. 2578 // If enough completed SATB buffers are queued up, the regular clock 2579 // will abort this task so that it restarts. 2580 drain_satb_buffers(); 2581 // ...then partially drain the local queue and the global stack 2582 drain_local_queue(true); 2583 drain_global_stack(true); 2584 2585 do { 2586 if (!has_aborted() && _curr_region != NULL) { 2587 // This means that we're already holding on to a region. 2588 assert(_finger != NULL, "if region is not NULL, then the finger " 2589 "should not be NULL either"); 2590 2591 // We might have restarted this task after an evacuation pause 2592 // which might have evacuated the region we're holding on to 2593 // underneath our feet. Let's read its limit again to make sure 2594 // that we do not iterate over a region of the heap that 2595 // contains garbage (update_region_limit() will also move 2596 // _finger to the start of the region if it is found empty). 2597 update_region_limit(); 2598 // We will start from _finger not from the start of the region, 2599 // as we might be restarting this task after aborting half-way 2600 // through scanning this region. In this case, _finger points to 2601 // the address where we last found a marked object. If this is a 2602 // fresh region, _finger points to start(). 2603 MemRegion mr = MemRegion(_finger, _region_limit); 2604 2605 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2606 "humongous regions should go around loop once only"); 2607 2608 // Some special cases: 2609 // If the memory region is empty, we can just give up the region. 2610 // If the current region is humongous then we only need to check 2611 // the bitmap for the bit associated with the start of the object, 2612 // scan the object if it's live, and give up the region. 2613 // Otherwise, let's iterate over the bitmap of the part of the region 2614 // that is left. 2615 // If the iteration is successful, give up the region. 2616 if (mr.is_empty()) { 2617 giveup_current_region(); 2618 regular_clock_call(); 2619 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2620 if (_next_mark_bitmap->is_marked(mr.start())) { 2621 // The object is marked - apply the closure 2622 bitmap_closure.do_addr(mr.start()); 2623 } 2624 // Even if this task aborted while scanning the humongous object 2625 // we can (and should) give up the current region. 2626 giveup_current_region(); 2627 regular_clock_call(); 2628 } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) { 2629 giveup_current_region(); 2630 regular_clock_call(); 2631 } else { 2632 assert(has_aborted(), "currently the only way to do so"); 2633 // The only way to abort the bitmap iteration is to return 2634 // false from the do_bit() method. However, inside the 2635 // do_bit() method we move the _finger to point to the 2636 // object currently being looked at. So, if we bail out, we 2637 // have definitely set _finger to something non-null. 2638 assert(_finger != NULL, "invariant"); 2639 2640 // Region iteration was actually aborted. So now _finger 2641 // points to the address of the object we last scanned. If we 2642 // leave it there, when we restart this task, we will rescan 2643 // the object. It is easy to avoid this. We move the finger by 2644 // enough to point to the next possible object header. 2645 assert(_finger < _region_limit, "invariant"); 2646 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2647 // Check if bitmap iteration was aborted while scanning the last object 2648 if (new_finger >= _region_limit) { 2649 giveup_current_region(); 2650 } else { 2651 move_finger_to(new_finger); 2652 } 2653 } 2654 } 2655 // At this point we have either completed iterating over the 2656 // region we were holding on to, or we have aborted. 2657 2658 // We then partially drain the local queue and the global stack. 2659 // (Do we really need this?) 2660 drain_local_queue(true); 2661 drain_global_stack(true); 2662 2663 // Read the note on the claim_region() method on why it might 2664 // return NULL with potentially more regions available for 2665 // claiming and why we have to check out_of_regions() to determine 2666 // whether we're done or not. 2667 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2668 // We are going to try to claim a new region. We should have 2669 // given up on the previous one. 2670 // Separated the asserts so that we know which one fires. 2671 assert(_curr_region == NULL, "invariant"); 2672 assert(_finger == NULL, "invariant"); 2673 assert(_region_limit == NULL, "invariant"); 2674 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2675 if (claimed_region != NULL) { 2676 // Yes, we managed to claim one 2677 setup_for_region(claimed_region); 2678 assert(_curr_region == claimed_region, "invariant"); 2679 } 2680 // It is important to call the regular clock here. It might take 2681 // a while to claim a region if, for example, we hit a large 2682 // block of empty regions. So we need to call the regular clock 2683 // method once round the loop to make sure it's called 2684 // frequently enough. 2685 regular_clock_call(); 2686 } 2687 2688 if (!has_aborted() && _curr_region == NULL) { 2689 assert(_cm->out_of_regions(), 2690 "at this point we should be out of regions"); 2691 } 2692 } while ( _curr_region != NULL && !has_aborted()); 2693 2694 if (!has_aborted()) { 2695 // We cannot check whether the global stack is empty, since other 2696 // tasks might be pushing objects to it concurrently. 2697 assert(_cm->out_of_regions(), 2698 "at this point we should be out of regions"); 2699 // Try to reduce the number of available SATB buffers so that 2700 // remark has less work to do. 2701 drain_satb_buffers(); 2702 } 2703 2704 // Since we've done everything else, we can now totally drain the 2705 // local queue and global stack. 2706 drain_local_queue(false); 2707 drain_global_stack(false); 2708 2709 // Attempt at work stealing from other task's queues. 2710 if (do_stealing && !has_aborted()) { 2711 // We have not aborted. This means that we have finished all that 2712 // we could. Let's try to do some stealing... 2713 2714 // We cannot check whether the global stack is empty, since other 2715 // tasks might be pushing objects to it concurrently. 2716 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2717 "only way to reach here"); 2718 while (!has_aborted()) { 2719 G1TaskQueueEntry entry; 2720 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2721 scan_task_entry(entry); 2722 2723 // And since we're towards the end, let's totally drain the 2724 // local queue and global stack. 2725 drain_local_queue(false); 2726 drain_global_stack(false); 2727 } else { 2728 break; 2729 } 2730 } 2731 } 2732 2733 // We still haven't aborted. Now, let's try to get into the 2734 // termination protocol. 2735 if (do_termination && !has_aborted()) { 2736 // We cannot check whether the global stack is empty, since other 2737 // tasks might be concurrently pushing objects on it. 2738 // Separated the asserts so that we know which one fires. 2739 assert(_cm->out_of_regions(), "only way to reach here"); 2740 assert(_task_queue->size() == 0, "only way to reach here"); 2741 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2742 2743 // The G1CMTask class also extends the TerminatorTerminator class, 2744 // hence its should_exit_termination() method will also decide 2745 // whether to exit the termination protocol or not. 2746 bool finished = (is_serial || 2747 _cm->terminator()->offer_termination(this)); 2748 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2749 _termination_time_ms += 2750 termination_end_time_ms - _termination_start_time_ms; 2751 2752 if (finished) { 2753 // We're all done. 2754 2755 // We can now guarantee that the global stack is empty, since 2756 // all other tasks have finished. We separated the guarantees so 2757 // that, if a condition is false, we can immediately find out 2758 // which one. 2759 guarantee(_cm->out_of_regions(), "only way to reach here"); 2760 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2761 guarantee(_task_queue->size() == 0, "only way to reach here"); 2762 guarantee(!_cm->has_overflown(), "only way to reach here"); 2763 } else { 2764 // Apparently there's more work to do. Let's abort this task. It 2765 // will restart it and we can hopefully find more things to do. 2766 set_has_aborted(); 2767 } 2768 } 2769 2770 // Mainly for debugging purposes to make sure that a pointer to the 2771 // closure which was statically allocated in this frame doesn't 2772 // escape it by accident. 2773 set_cm_oop_closure(NULL); 2774 double end_time_ms = os::elapsedVTime() * 1000.0; 2775 double elapsed_time_ms = end_time_ms - _start_time_ms; 2776 // Update the step history. 2777 _step_times_ms.add(elapsed_time_ms); 2778 2779 if (has_aborted()) { 2780 // The task was aborted for some reason. 2781 if (_has_timed_out) { 2782 double diff_ms = elapsed_time_ms - _time_target_ms; 2783 // Keep statistics of how well we did with respect to hitting 2784 // our target only if we actually timed out (if we aborted for 2785 // other reasons, then the results might get skewed). 2786 _marking_step_diffs_ms.add(diff_ms); 2787 } 2788 2789 if (_cm->has_overflown()) { 2790 // This is the interesting one. We aborted because a global 2791 // overflow was raised. This means we have to restart the 2792 // marking phase and start iterating over regions. However, in 2793 // order to do this we have to make sure that all tasks stop 2794 // what they are doing and re-initialize in a safe manner. We 2795 // will achieve this with the use of two barrier sync points. 2796 2797 if (!is_serial) { 2798 // We only need to enter the sync barrier if being called 2799 // from a parallel context 2800 _cm->enter_first_sync_barrier(_worker_id); 2801 2802 // When we exit this sync barrier we know that all tasks have 2803 // stopped doing marking work. So, it's now safe to 2804 // re-initialize our data structures. 2805 } 2806 2807 clear_region_fields(); 2808 flush_mark_stats_cache(); 2809 2810 if (!is_serial) { 2811 // If we're executing the concurrent phase of marking, reset the marking 2812 // state; otherwise the marking state is reset after reference processing, 2813 // during the remark pause. 2814 // If we reset here as a result of an overflow during the remark we will 2815 // see assertion failures from any subsequent set_concurrency_and_phase() 2816 // calls. 2817 if (_cm->concurrent() && _worker_id == 0) { 2818 // Worker 0 is responsible for clearing the global data structures because 2819 // of an overflow. During STW we should not clear the overflow flag (in 2820 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit 2821 // method to abort the pause and restart concurrent marking. 2822 _cm->reset_marking_for_restart(); 2823 2824 log_info(gc, marking)("Concurrent Mark reset for overflow"); 2825 } 2826 2827 // ...and enter the second barrier. 2828 _cm->enter_second_sync_barrier(_worker_id); 2829 } 2830 // At this point, if we're during the concurrent phase of 2831 // marking, everything has been re-initialized and we're 2832 // ready to restart. 2833 } 2834 } 2835 } 2836 2837 G1CMTask::G1CMTask(uint worker_id, 2838 G1ConcurrentMark* cm, 2839 G1CMTaskQueue* task_queue, 2840 G1RegionMarkStats* mark_stats, 2841 uint max_regions) : 2842 _objArray_processor(this), 2843 _worker_id(worker_id), 2844 _g1h(G1CollectedHeap::heap()), 2845 _cm(cm), 2846 _next_mark_bitmap(NULL), 2847 _task_queue(task_queue), 2848 _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize), 2849 _calls(0), 2850 _time_target_ms(0.0), 2851 _start_time_ms(0.0), 2852 _cm_oop_closure(NULL), 2853 _curr_region(NULL), 2854 _finger(NULL), 2855 _region_limit(NULL), 2856 _words_scanned(0), 2857 _words_scanned_limit(0), 2858 _real_words_scanned_limit(0), 2859 _refs_reached(0), 2860 _refs_reached_limit(0), 2861 _real_refs_reached_limit(0), 2862 _hash_seed(17), 2863 _has_aborted(false), 2864 _has_timed_out(false), 2865 _draining_satb_buffers(false), 2866 _step_times_ms(), 2867 _elapsed_time_ms(0.0), 2868 _termination_time_ms(0.0), 2869 _termination_start_time_ms(0.0), 2870 _marking_step_diffs_ms() 2871 { 2872 guarantee(task_queue != NULL, "invariant"); 2873 2874 _marking_step_diffs_ms.add(0.5); 2875 } 2876 2877 // These are formatting macros that are used below to ensure 2878 // consistent formatting. The *_H_* versions are used to format the 2879 // header for a particular value and they should be kept consistent 2880 // with the corresponding macro. Also note that most of the macros add 2881 // the necessary white space (as a prefix) which makes them a bit 2882 // easier to compose. 2883 2884 // All the output lines are prefixed with this string to be able to 2885 // identify them easily in a large log file. 2886 #define G1PPRL_LINE_PREFIX "###" 2887 2888 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2889 #ifdef _LP64 2890 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2891 #else // _LP64 2892 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2893 #endif // _LP64 2894 2895 // For per-region info 2896 #define G1PPRL_TYPE_FORMAT " %-4s" 2897 #define G1PPRL_TYPE_H_FORMAT " %4s" 2898 #define G1PPRL_STATE_FORMAT " %-5s" 2899 #define G1PPRL_STATE_H_FORMAT " %5s" 2900 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2901 #define G1PPRL_BYTE_H_FORMAT " %9s" 2902 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2903 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2904 2905 // For summary info 2906 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2907 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2908 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2909 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2910 2911 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) : 2912 _total_used_bytes(0), _total_capacity_bytes(0), 2913 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2914 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) 2915 { 2916 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2917 MemRegion g1_reserved = g1h->g1_reserved(); 2918 double now = os::elapsedTime(); 2919 2920 // Print the header of the output. 2921 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2922 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2923 G1PPRL_SUM_ADDR_FORMAT("reserved") 2924 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2925 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2926 HeapRegion::GrainBytes); 2927 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2928 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2929 G1PPRL_TYPE_H_FORMAT 2930 G1PPRL_ADDR_BASE_H_FORMAT 2931 G1PPRL_BYTE_H_FORMAT 2932 G1PPRL_BYTE_H_FORMAT 2933 G1PPRL_BYTE_H_FORMAT 2934 G1PPRL_DOUBLE_H_FORMAT 2935 G1PPRL_BYTE_H_FORMAT 2936 G1PPRL_STATE_H_FORMAT 2937 G1PPRL_BYTE_H_FORMAT, 2938 "type", "address-range", 2939 "used", "prev-live", "next-live", "gc-eff", 2940 "remset", "state", "code-roots"); 2941 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2942 G1PPRL_TYPE_H_FORMAT 2943 G1PPRL_ADDR_BASE_H_FORMAT 2944 G1PPRL_BYTE_H_FORMAT 2945 G1PPRL_BYTE_H_FORMAT 2946 G1PPRL_BYTE_H_FORMAT 2947 G1PPRL_DOUBLE_H_FORMAT 2948 G1PPRL_BYTE_H_FORMAT 2949 G1PPRL_STATE_H_FORMAT 2950 G1PPRL_BYTE_H_FORMAT, 2951 "", "", 2952 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2953 "(bytes)", "", "(bytes)"); 2954 } 2955 2956 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) { 2957 const char* type = r->get_type_str(); 2958 HeapWord* bottom = r->bottom(); 2959 HeapWord* end = r->end(); 2960 size_t capacity_bytes = r->capacity(); 2961 size_t used_bytes = r->used(); 2962 size_t prev_live_bytes = r->live_bytes(); 2963 size_t next_live_bytes = r->next_live_bytes(); 2964 double gc_eff = r->gc_efficiency(); 2965 size_t remset_bytes = r->rem_set()->mem_size(); 2966 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2967 const char* remset_type = r->rem_set()->get_short_state_str(); 2968 2969 _total_used_bytes += used_bytes; 2970 _total_capacity_bytes += capacity_bytes; 2971 _total_prev_live_bytes += prev_live_bytes; 2972 _total_next_live_bytes += next_live_bytes; 2973 _total_remset_bytes += remset_bytes; 2974 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2975 2976 // Print a line for this particular region. 2977 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2978 G1PPRL_TYPE_FORMAT 2979 G1PPRL_ADDR_BASE_FORMAT 2980 G1PPRL_BYTE_FORMAT 2981 G1PPRL_BYTE_FORMAT 2982 G1PPRL_BYTE_FORMAT 2983 G1PPRL_DOUBLE_FORMAT 2984 G1PPRL_BYTE_FORMAT 2985 G1PPRL_STATE_FORMAT 2986 G1PPRL_BYTE_FORMAT, 2987 type, p2i(bottom), p2i(end), 2988 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2989 remset_bytes, remset_type, strong_code_roots_bytes); 2990 2991 return false; 2992 } 2993 2994 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 2995 // add static memory usages to remembered set sizes 2996 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 2997 // Print the footer of the output. 2998 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2999 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3000 " SUMMARY" 3001 G1PPRL_SUM_MB_FORMAT("capacity") 3002 G1PPRL_SUM_MB_PERC_FORMAT("used") 3003 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3004 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3005 G1PPRL_SUM_MB_FORMAT("remset") 3006 G1PPRL_SUM_MB_FORMAT("code-roots"), 3007 bytes_to_mb(_total_capacity_bytes), 3008 bytes_to_mb(_total_used_bytes), 3009 percent_of(_total_used_bytes, _total_capacity_bytes), 3010 bytes_to_mb(_total_prev_live_bytes), 3011 percent_of(_total_prev_live_bytes, _total_capacity_bytes), 3012 bytes_to_mb(_total_next_live_bytes), 3013 percent_of(_total_next_live_bytes, _total_capacity_bytes), 3014 bytes_to_mb(_total_remset_bytes), 3015 bytes_to_mb(_total_strong_code_roots_bytes)); 3016 }