1 /* 2 * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1OopClosures.inline.hpp" 34 #include "gc/g1/g1Policy.hpp" 35 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionRemSet.hpp" 39 #include "gc/g1/heapRegionSet.inline.hpp" 40 #include "gc/shared/adaptiveSizePolicy.hpp" 41 #include "gc/shared/gcId.hpp" 42 #include "gc/shared/gcTimer.hpp" 43 #include "gc/shared/gcTrace.hpp" 44 #include "gc/shared/gcTraceTime.inline.hpp" 45 #include "gc/shared/genOopClosures.inline.hpp" 46 #include "gc/shared/referencePolicy.hpp" 47 #include "gc/shared/strongRootsScope.hpp" 48 #include "gc/shared/suspendibleThreadSet.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "gc/shared/weakProcessor.hpp" 52 #include "include/jvm.h" 53 #include "logging/log.hpp" 54 #include "memory/allocation.hpp" 55 #include "memory/resourceArea.hpp" 56 #include "oops/access.inline.hpp" 57 #include "oops/oop.inline.hpp" 58 #include "runtime/atomic.hpp" 59 #include "runtime/handles.inline.hpp" 60 #include "runtime/java.hpp" 61 #include "runtime/prefetch.inline.hpp" 62 #include "services/memTracker.hpp" 63 #include "utilities/align.hpp" 64 #include "utilities/growableArray.hpp" 65 66 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 67 assert(addr < _cm->finger(), "invariant"); 68 assert(addr >= _task->finger(), "invariant"); 69 70 // We move that task's local finger along. 71 _task->move_finger_to(addr); 72 73 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 74 // we only partially drain the local queue and global stack 75 _task->drain_local_queue(true); 76 _task->drain_global_stack(true); 77 78 // if the has_aborted flag has been raised, we need to bail out of 79 // the iteration 80 return !_task->has_aborted(); 81 } 82 83 G1CMMarkStack::G1CMMarkStack() : 84 _max_chunk_capacity(0), 85 _base(NULL), 86 _chunk_capacity(0) { 87 set_empty(); 88 } 89 90 bool G1CMMarkStack::resize(size_t new_capacity) { 91 assert(is_empty(), "Only resize when stack is empty."); 92 assert(new_capacity <= _max_chunk_capacity, 93 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 94 95 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 96 97 if (new_base == NULL) { 98 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 99 return false; 100 } 101 // Release old mapping. 102 if (_base != NULL) { 103 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 104 } 105 106 _base = new_base; 107 _chunk_capacity = new_capacity; 108 set_empty(); 109 110 return true; 111 } 112 113 size_t G1CMMarkStack::capacity_alignment() { 114 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 115 } 116 117 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 118 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 119 120 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 121 122 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 123 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 124 125 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 126 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 127 _max_chunk_capacity, 128 initial_chunk_capacity); 129 130 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 131 initial_chunk_capacity, _max_chunk_capacity); 132 133 return resize(initial_chunk_capacity); 134 } 135 136 void G1CMMarkStack::expand() { 137 if (_chunk_capacity == _max_chunk_capacity) { 138 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 139 return; 140 } 141 size_t old_capacity = _chunk_capacity; 142 // Double capacity if possible 143 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 144 145 if (resize(new_capacity)) { 146 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 147 old_capacity, new_capacity); 148 } else { 149 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 150 old_capacity, new_capacity); 151 } 152 } 153 154 G1CMMarkStack::~G1CMMarkStack() { 155 if (_base != NULL) { 156 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 157 } 158 } 159 160 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 161 elem->next = *list; 162 *list = elem; 163 } 164 165 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 166 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 167 add_chunk_to_list(&_chunk_list, elem); 168 _chunks_in_chunk_list++; 169 } 170 171 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 172 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 173 add_chunk_to_list(&_free_list, elem); 174 } 175 176 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 177 TaskQueueEntryChunk* result = *list; 178 if (result != NULL) { 179 *list = (*list)->next; 180 } 181 return result; 182 } 183 184 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 185 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 186 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 187 if (result != NULL) { 188 _chunks_in_chunk_list--; 189 } 190 return result; 191 } 192 193 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 194 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 195 return remove_chunk_from_list(&_free_list); 196 } 197 198 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 199 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 200 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 201 // wraparound of _hwm. 202 if (_hwm >= _chunk_capacity) { 203 return NULL; 204 } 205 206 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 207 if (cur_idx >= _chunk_capacity) { 208 return NULL; 209 } 210 211 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 212 result->next = NULL; 213 return result; 214 } 215 216 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 217 // Get a new chunk. 218 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 219 220 if (new_chunk == NULL) { 221 // Did not get a chunk from the free list. Allocate from backing memory. 222 new_chunk = allocate_new_chunk(); 223 224 if (new_chunk == NULL) { 225 return false; 226 } 227 } 228 229 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 230 231 add_chunk_to_chunk_list(new_chunk); 232 233 return true; 234 } 235 236 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 237 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 238 239 if (cur == NULL) { 240 return false; 241 } 242 243 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 244 245 add_chunk_to_free_list(cur); 246 return true; 247 } 248 249 void G1CMMarkStack::set_empty() { 250 _chunks_in_chunk_list = 0; 251 _hwm = 0; 252 _chunk_list = NULL; 253 _free_list = NULL; 254 } 255 256 G1CMRootRegions::G1CMRootRegions() : 257 _survivors(NULL), _cm(NULL), _scan_in_progress(false), 258 _should_abort(false), _claimed_survivor_index(0) { } 259 260 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 261 _survivors = survivors; 262 _cm = cm; 263 } 264 265 void G1CMRootRegions::prepare_for_scan() { 266 assert(!scan_in_progress(), "pre-condition"); 267 268 // Currently, only survivors can be root regions. 269 _claimed_survivor_index = 0; 270 _scan_in_progress = _survivors->regions()->is_nonempty(); 271 _should_abort = false; 272 } 273 274 HeapRegion* G1CMRootRegions::claim_next() { 275 if (_should_abort) { 276 // If someone has set the should_abort flag, we return NULL to 277 // force the caller to bail out of their loop. 278 return NULL; 279 } 280 281 // Currently, only survivors can be root regions. 282 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 283 284 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 285 if (claimed_index < survivor_regions->length()) { 286 return survivor_regions->at(claimed_index); 287 } 288 return NULL; 289 } 290 291 uint G1CMRootRegions::num_root_regions() const { 292 return (uint)_survivors->regions()->length(); 293 } 294 295 void G1CMRootRegions::notify_scan_done() { 296 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 297 _scan_in_progress = false; 298 RootRegionScan_lock->notify_all(); 299 } 300 301 void G1CMRootRegions::cancel_scan() { 302 notify_scan_done(); 303 } 304 305 void G1CMRootRegions::scan_finished() { 306 assert(scan_in_progress(), "pre-condition"); 307 308 // Currently, only survivors can be root regions. 309 if (!_should_abort) { 310 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 311 assert((uint)_claimed_survivor_index >= _survivors->length(), 312 "we should have claimed all survivors, claimed index = %u, length = %u", 313 (uint)_claimed_survivor_index, _survivors->length()); 314 } 315 316 notify_scan_done(); 317 } 318 319 bool G1CMRootRegions::wait_until_scan_finished() { 320 if (!scan_in_progress()) { 321 return false; 322 } 323 324 { 325 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 326 while (scan_in_progress()) { 327 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 328 } 329 } 330 return true; 331 } 332 333 // Returns the maximum number of workers to be used in a concurrent 334 // phase based on the number of GC workers being used in a STW 335 // phase. 336 static uint scale_concurrent_worker_threads(uint num_gc_workers) { 337 return MAX2((num_gc_workers + 2) / 4, 1U); 338 } 339 340 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, 341 G1RegionToSpaceMapper* prev_bitmap_storage, 342 G1RegionToSpaceMapper* next_bitmap_storage) : 343 // _cm_thread set inside the constructor 344 _g1h(g1h), 345 _completed_initialization(false), 346 347 _mark_bitmap_1(), 348 _mark_bitmap_2(), 349 _prev_mark_bitmap(&_mark_bitmap_1), 350 _next_mark_bitmap(&_mark_bitmap_2), 351 352 _heap(_g1h->reserved_region()), 353 354 _root_regions(), 355 356 _global_mark_stack(), 357 358 // _finger set in set_non_marking_state 359 360 _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads), 361 _max_num_tasks(ParallelGCThreads), 362 // _num_active_tasks set in set_non_marking_state() 363 // _tasks set inside the constructor 364 365 _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)), 366 _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)), 367 368 _first_overflow_barrier_sync(), 369 _second_overflow_barrier_sync(), 370 371 _has_overflown(false), 372 _concurrent(false), 373 _has_aborted(false), 374 _restart_for_overflow(false), 375 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 376 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 377 378 // _verbose_level set below 379 380 _init_times(), 381 _remark_times(), 382 _remark_mark_times(), 383 _remark_weak_ref_times(), 384 _cleanup_times(), 385 _total_cleanup_time(0.0), 386 387 _accum_task_vtime(NULL), 388 389 _concurrent_workers(NULL), 390 _num_concurrent_workers(0), 391 _max_concurrent_workers(0), 392 393 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)), 394 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC)) 395 { 396 _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage); 397 _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage); 398 399 // Create & start ConcurrentMark thread. 400 _cm_thread = new ConcurrentMarkThread(this); 401 if (_cm_thread->osthread() == NULL) { 402 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 403 } 404 405 assert(CGC_lock != NULL, "CGC_lock must be initialized"); 406 407 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 408 satb_qs.set_buffer_size(G1SATBBufferSize); 409 410 _root_regions.init(_g1h->survivor(), this); 411 412 if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) { 413 // Calculate the number of concurrent worker threads by scaling 414 // the number of parallel GC threads. 415 uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads); 416 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 417 } 418 419 assert(ConcGCThreads > 0, "ConcGCThreads have been set."); 420 if (ConcGCThreads > ParallelGCThreads) { 421 log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).", 422 ConcGCThreads, ParallelGCThreads); 423 return; 424 } 425 426 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset); 427 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 428 429 _num_concurrent_workers = ConcGCThreads; 430 _max_concurrent_workers = _num_concurrent_workers; 431 432 _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true); 433 _concurrent_workers->initialize_workers(); 434 435 if (FLAG_IS_DEFAULT(MarkStackSize)) { 436 size_t mark_stack_size = 437 MIN2(MarkStackSizeMax, 438 MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE))); 439 // Verify that the calculated value for MarkStackSize is in range. 440 // It would be nice to use the private utility routine from Arguments. 441 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 442 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 443 "must be between 1 and " SIZE_FORMAT, 444 mark_stack_size, MarkStackSizeMax); 445 return; 446 } 447 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 448 } else { 449 // Verify MarkStackSize is in range. 450 if (FLAG_IS_CMDLINE(MarkStackSize)) { 451 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 452 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 453 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 454 "must be between 1 and " SIZE_FORMAT, 455 MarkStackSize, MarkStackSizeMax); 456 return; 457 } 458 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 459 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 460 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 461 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 462 MarkStackSize, MarkStackSizeMax); 463 return; 464 } 465 } 466 } 467 } 468 469 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 470 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 471 } 472 473 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC); 474 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC); 475 476 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 477 _num_active_tasks = _max_num_tasks; 478 479 for (uint i = 0; i < _max_num_tasks; ++i) { 480 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 481 task_queue->initialize(); 482 _task_queues->register_queue(i, task_queue); 483 484 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions()); 485 486 _accum_task_vtime[i] = 0.0; 487 } 488 489 reset_at_marking_complete(); 490 _completed_initialization = true; 491 } 492 493 void G1ConcurrentMark::reset() { 494 _has_aborted = false; 495 496 reset_marking_for_restart(); 497 498 // Reset all tasks, since different phases will use different number of active 499 // threads. So, it's easiest to have all of them ready. 500 for (uint i = 0; i < _max_num_tasks; ++i) { 501 _tasks[i]->reset(_next_mark_bitmap); 502 } 503 504 uint max_regions = _g1h->max_regions(); 505 for (uint i = 0; i < max_regions; i++) { 506 _top_at_rebuild_starts[i] = NULL; 507 _region_mark_stats[i].clear(); 508 } 509 } 510 511 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) { 512 for (uint j = 0; j < _max_num_tasks; ++j) { 513 _tasks[j]->clear_mark_stats_cache(region_idx); 514 } 515 _top_at_rebuild_starts[region_idx] = NULL; 516 _region_mark_stats[region_idx].clear(); 517 } 518 519 void G1ConcurrentMark::clear_statistics(HeapRegion* r) { 520 uint const region_idx = r->hrm_index(); 521 if (r->is_humongous()) { 522 assert(r->is_starts_humongous(), "Got humongous continues region here"); 523 uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size()); 524 for (uint j = region_idx; j < (region_idx + size_in_regions); j++) { 525 clear_statistics_in_region(j); 526 } 527 } else { 528 clear_statistics_in_region(region_idx); 529 } 530 } 531 532 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) { 533 if (bitmap->is_marked(addr)) { 534 bitmap->clear(addr); 535 } 536 } 537 538 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) { 539 assert_at_safepoint_on_vm_thread(); 540 541 // Need to clear all mark bits of the humongous object. 542 clear_mark_if_set(_prev_mark_bitmap, r->bottom()); 543 clear_mark_if_set(_next_mark_bitmap, r->bottom()); 544 545 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 546 return; 547 } 548 549 // Clear any statistics about the region gathered so far. 550 clear_statistics(r); 551 } 552 553 void G1ConcurrentMark::reset_marking_for_restart() { 554 _global_mark_stack.set_empty(); 555 556 // Expand the marking stack, if we have to and if we can. 557 if (has_overflown()) { 558 _global_mark_stack.expand(); 559 560 uint max_regions = _g1h->max_regions(); 561 for (uint i = 0; i < max_regions; i++) { 562 _region_mark_stats[i].clear_during_overflow(); 563 } 564 } 565 566 clear_has_overflown(); 567 _finger = _heap.start(); 568 569 for (uint i = 0; i < _max_num_tasks; ++i) { 570 G1CMTaskQueue* queue = _task_queues->queue(i); 571 queue->set_empty(); 572 } 573 } 574 575 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 576 assert(active_tasks <= _max_num_tasks, "we should not have more"); 577 578 _num_active_tasks = active_tasks; 579 // Need to update the three data structures below according to the 580 // number of active threads for this phase. 581 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 582 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 583 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 584 } 585 586 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 587 set_concurrency(active_tasks); 588 589 _concurrent = concurrent; 590 591 if (!concurrent) { 592 // At this point we should be in a STW phase, and completed marking. 593 assert_at_safepoint_on_vm_thread(); 594 assert(out_of_regions(), 595 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 596 p2i(_finger), p2i(_heap.end())); 597 } 598 } 599 600 void G1ConcurrentMark::reset_at_marking_complete() { 601 // We set the global marking state to some default values when we're 602 // not doing marking. 603 reset_marking_for_restart(); 604 _num_active_tasks = 0; 605 } 606 607 G1ConcurrentMark::~G1ConcurrentMark() { 608 FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts); 609 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats); 610 // The G1ConcurrentMark instance is never freed. 611 ShouldNotReachHere(); 612 } 613 614 class G1ClearBitMapTask : public AbstractGangTask { 615 public: 616 static size_t chunk_size() { return M; } 617 618 private: 619 // Heap region closure used for clearing the given mark bitmap. 620 class G1ClearBitmapHRClosure : public HeapRegionClosure { 621 private: 622 G1CMBitMap* _bitmap; 623 G1ConcurrentMark* _cm; 624 public: 625 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 626 } 627 628 virtual bool do_heap_region(HeapRegion* r) { 629 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 630 631 HeapWord* cur = r->bottom(); 632 HeapWord* const end = r->end(); 633 634 while (cur < end) { 635 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 636 _bitmap->clear_range(mr); 637 638 cur += chunk_size_in_words; 639 640 // Abort iteration if after yielding the marking has been aborted. 641 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 642 return true; 643 } 644 // Repeat the asserts from before the start of the closure. We will do them 645 // as asserts here to minimize their overhead on the product. However, we 646 // will have them as guarantees at the beginning / end of the bitmap 647 // clearing to get some checking in the product. 648 assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant"); 649 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 650 } 651 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 652 653 return false; 654 } 655 }; 656 657 G1ClearBitmapHRClosure _cl; 658 HeapRegionClaimer _hr_claimer; 659 bool _suspendible; // If the task is suspendible, workers must join the STS. 660 661 public: 662 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 663 AbstractGangTask("G1 Clear Bitmap"), 664 _cl(bitmap, suspendible ? cm : NULL), 665 _hr_claimer(n_workers), 666 _suspendible(suspendible) 667 { } 668 669 void work(uint worker_id) { 670 SuspendibleThreadSetJoiner sts_join(_suspendible); 671 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id); 672 } 673 674 bool is_complete() { 675 return _cl.is_complete(); 676 } 677 }; 678 679 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 680 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 681 682 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 683 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 684 685 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 686 687 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 688 689 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 690 workers->run_task(&cl, num_workers); 691 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 692 } 693 694 void G1ConcurrentMark::cleanup_for_next_mark() { 695 // Make sure that the concurrent mark thread looks to still be in 696 // the current cycle. 697 guarantee(cm_thread()->during_cycle(), "invariant"); 698 699 // We are finishing up the current cycle by clearing the next 700 // marking bitmap and getting it ready for the next cycle. During 701 // this time no other cycle can start. So, let's make sure that this 702 // is the case. 703 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 704 705 clear_bitmap(_next_mark_bitmap, _concurrent_workers, true); 706 707 // Repeat the asserts from above. 708 guarantee(cm_thread()->during_cycle(), "invariant"); 709 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 710 } 711 712 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 713 assert_at_safepoint_on_vm_thread(); 714 clear_bitmap(_prev_mark_bitmap, workers, false); 715 } 716 717 class CheckBitmapClearHRClosure : public HeapRegionClosure { 718 G1CMBitMap* _bitmap; 719 public: 720 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 721 } 722 723 virtual bool do_heap_region(HeapRegion* r) { 724 // This closure can be called concurrently to the mutator, so we must make sure 725 // that the result of the getNextMarkedWordAddress() call is compared to the 726 // value passed to it as limit to detect any found bits. 727 // end never changes in G1. 728 HeapWord* end = r->end(); 729 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 730 } 731 }; 732 733 bool G1ConcurrentMark::next_mark_bitmap_is_clear() { 734 CheckBitmapClearHRClosure cl(_next_mark_bitmap); 735 _g1h->heap_region_iterate(&cl); 736 return cl.is_complete(); 737 } 738 739 class NoteStartOfMarkHRClosure : public HeapRegionClosure { 740 public: 741 bool do_heap_region(HeapRegion* r) { 742 r->note_start_of_marking(); 743 return false; 744 } 745 }; 746 747 void G1ConcurrentMark::pre_initial_mark() { 748 // Initialize marking structures. This has to be done in a STW phase. 749 reset(); 750 751 // For each region note start of marking. 752 NoteStartOfMarkHRClosure startcl; 753 _g1h->heap_region_iterate(&startcl); 754 } 755 756 757 void G1ConcurrentMark::post_initial_mark() { 758 // Start Concurrent Marking weak-reference discovery. 759 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 760 // enable ("weak") refs discovery 761 rp->enable_discovery(); 762 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 763 764 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 765 // This is the start of the marking cycle, we're expected all 766 // threads to have SATB queues with active set to false. 767 satb_mq_set.set_active_all_threads(true, /* new active value */ 768 false /* expected_active */); 769 770 _root_regions.prepare_for_scan(); 771 772 // update_g1_committed() will be called at the end of an evac pause 773 // when marking is on. So, it's also called at the end of the 774 // initial-mark pause to update the heap end, if the heap expands 775 // during it. No need to call it here. 776 } 777 778 /* 779 * Notice that in the next two methods, we actually leave the STS 780 * during the barrier sync and join it immediately afterwards. If we 781 * do not do this, the following deadlock can occur: one thread could 782 * be in the barrier sync code, waiting for the other thread to also 783 * sync up, whereas another one could be trying to yield, while also 784 * waiting for the other threads to sync up too. 785 * 786 * Note, however, that this code is also used during remark and in 787 * this case we should not attempt to leave / enter the STS, otherwise 788 * we'll either hit an assert (debug / fastdebug) or deadlock 789 * (product). So we should only leave / enter the STS if we are 790 * operating concurrently. 791 * 792 * Because the thread that does the sync barrier has left the STS, it 793 * is possible to be suspended for a Full GC or an evacuation pause 794 * could occur. This is actually safe, since the entering the sync 795 * barrier is one of the last things do_marking_step() does, and it 796 * doesn't manipulate any data structures afterwards. 797 */ 798 799 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 800 bool barrier_aborted; 801 { 802 SuspendibleThreadSetLeaver sts_leave(concurrent()); 803 barrier_aborted = !_first_overflow_barrier_sync.enter(); 804 } 805 806 // at this point everyone should have synced up and not be doing any 807 // more work 808 809 if (barrier_aborted) { 810 // If the barrier aborted we ignore the overflow condition and 811 // just abort the whole marking phase as quickly as possible. 812 return; 813 } 814 } 815 816 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 817 SuspendibleThreadSetLeaver sts_leave(concurrent()); 818 _second_overflow_barrier_sync.enter(); 819 820 // at this point everything should be re-initialized and ready to go 821 } 822 823 class G1CMConcurrentMarkingTask : public AbstractGangTask { 824 G1ConcurrentMark* _cm; 825 public: 826 void work(uint worker_id) { 827 assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread"); 828 ResourceMark rm; 829 830 double start_vtime = os::elapsedVTime(); 831 832 { 833 SuspendibleThreadSetJoiner sts_join; 834 835 assert(worker_id < _cm->active_tasks(), "invariant"); 836 837 G1CMTask* task = _cm->task(worker_id); 838 task->record_start_time(); 839 if (!_cm->has_aborted()) { 840 do { 841 task->do_marking_step(G1ConcMarkStepDurationMillis, 842 true /* do_termination */, 843 false /* is_serial*/); 844 845 _cm->do_yield_check(); 846 } while (!_cm->has_aborted() && task->has_aborted()); 847 } 848 task->record_end_time(); 849 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant"); 850 } 851 852 double end_vtime = os::elapsedVTime(); 853 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 854 } 855 856 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) : 857 AbstractGangTask("Concurrent Mark"), _cm(cm) { } 858 859 ~G1CMConcurrentMarkingTask() { } 860 }; 861 862 uint G1ConcurrentMark::calc_active_marking_workers() { 863 uint result = 0; 864 if (!UseDynamicNumberOfGCThreads || 865 (!FLAG_IS_DEFAULT(ConcGCThreads) && 866 !ForceDynamicNumberOfGCThreads)) { 867 result = _max_concurrent_workers; 868 } else { 869 result = 870 AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers, 871 1, /* Minimum workers */ 872 _num_concurrent_workers, 873 Threads::number_of_non_daemon_threads()); 874 // Don't scale the result down by scale_concurrent_workers() because 875 // that scaling has already gone into "_max_concurrent_workers". 876 } 877 assert(result > 0 && result <= _max_concurrent_workers, 878 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u", 879 _max_concurrent_workers, result); 880 return result; 881 } 882 883 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) { 884 // Currently, only survivors can be root regions. 885 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 886 G1RootRegionScanClosure cl(_g1h, this, worker_id); 887 888 const uintx interval = PrefetchScanIntervalInBytes; 889 HeapWord* curr = hr->bottom(); 890 const HeapWord* end = hr->top(); 891 while (curr < end) { 892 Prefetch::read(curr, interval); 893 oop obj = oop(curr); 894 int size = obj->oop_iterate_size(&cl); 895 assert(size == obj->size(), "sanity"); 896 curr += size; 897 } 898 } 899 900 class G1CMRootRegionScanTask : public AbstractGangTask { 901 G1ConcurrentMark* _cm; 902 public: 903 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 904 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 905 906 void work(uint worker_id) { 907 assert(Thread::current()->is_ConcurrentGC_thread(), 908 "this should only be done by a conc GC thread"); 909 910 G1CMRootRegions* root_regions = _cm->root_regions(); 911 HeapRegion* hr = root_regions->claim_next(); 912 while (hr != NULL) { 913 _cm->scan_root_region(hr, worker_id); 914 hr = root_regions->claim_next(); 915 } 916 } 917 }; 918 919 void G1ConcurrentMark::scan_root_regions() { 920 // scan_in_progress() will have been set to true only if there was 921 // at least one root region to scan. So, if it's false, we 922 // should not attempt to do any further work. 923 if (root_regions()->scan_in_progress()) { 924 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 925 926 _num_concurrent_workers = MIN2(calc_active_marking_workers(), 927 // We distribute work on a per-region basis, so starting 928 // more threads than that is useless. 929 root_regions()->num_root_regions()); 930 assert(_num_concurrent_workers <= _max_concurrent_workers, 931 "Maximum number of marking threads exceeded"); 932 933 G1CMRootRegionScanTask task(this); 934 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 935 task.name(), _num_concurrent_workers, root_regions()->num_root_regions()); 936 _concurrent_workers->run_task(&task, _num_concurrent_workers); 937 938 // It's possible that has_aborted() is true here without actually 939 // aborting the survivor scan earlier. This is OK as it's 940 // mainly used for sanity checking. 941 root_regions()->scan_finished(); 942 } 943 } 944 945 void G1ConcurrentMark::concurrent_cycle_start() { 946 _gc_timer_cm->register_gc_start(); 947 948 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 949 950 _g1h->trace_heap_before_gc(_gc_tracer_cm); 951 } 952 953 void G1ConcurrentMark::concurrent_cycle_end() { 954 _g1h->collector_state()->set_clearing_next_bitmap(false); 955 956 _g1h->trace_heap_after_gc(_gc_tracer_cm); 957 958 if (has_aborted()) { 959 log_info(gc, marking)("Concurrent Mark Abort"); 960 _gc_tracer_cm->report_concurrent_mode_failure(); 961 } 962 963 _gc_timer_cm->register_gc_end(); 964 965 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 966 } 967 968 void G1ConcurrentMark::mark_from_roots() { 969 _restart_for_overflow = false; 970 971 _num_concurrent_workers = calc_active_marking_workers(); 972 973 uint active_workers = MAX2(1U, _num_concurrent_workers); 974 975 // Setting active workers is not guaranteed since fewer 976 // worker threads may currently exist and more may not be 977 // available. 978 active_workers = _concurrent_workers->update_active_workers(active_workers); 979 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers()); 980 981 // Parallel task terminator is set in "set_concurrency_and_phase()" 982 set_concurrency_and_phase(active_workers, true /* concurrent */); 983 984 G1CMConcurrentMarkingTask marking_task(this); 985 _concurrent_workers->run_task(&marking_task); 986 print_stats(); 987 } 988 989 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) { 990 G1HeapVerifier* verifier = _g1h->verifier(); 991 992 verifier->verify_region_sets_optional(); 993 994 if (VerifyDuringGC) { 995 GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm); 996 997 size_t const BufLen = 512; 998 char buffer[BufLen]; 999 1000 jio_snprintf(buffer, BufLen, "During GC (%s)", caller); 1001 verifier->verify(type, vo, buffer); 1002 } 1003 1004 verifier->check_bitmaps(caller); 1005 } 1006 1007 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure { 1008 G1CollectedHeap* _g1h; 1009 G1ConcurrentMark* _cm; 1010 1011 G1PrintRegionLivenessInfoClosure _cl; 1012 1013 uint _num_regions_selected_for_rebuild; // The number of regions actually selected for rebuild. 1014 1015 void update_remset_before_rebuild(HeapRegion * hr) { 1016 G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker(); 1017 1018 size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize; 1019 bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes); 1020 if (selected_for_rebuild) { 1021 _num_regions_selected_for_rebuild++; 1022 } 1023 _cm->update_top_at_rebuild_start(hr); 1024 } 1025 1026 void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) { 1027 uint const region_idx = hr->hrm_index(); 1028 assert(hr->is_starts_humongous(), "Should not have marked bytes " SIZE_FORMAT " in non-starts humongous region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1029 uint num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(marked_words); 1030 1031 for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) { 1032 HeapRegion* const r = _g1h->region_at(i); 1033 size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words); 1034 r->add_to_marked_bytes(words_to_add * HeapWordSize); 1035 marked_words -= words_to_add; 1036 } 1037 } 1038 1039 void update_marked_bytes(HeapRegion* hr) { 1040 uint const region_idx = hr->hrm_index(); 1041 size_t marked_words = _cm->liveness(region_idx); 1042 // The marking attributes the object's size completely to the humongous starts 1043 // region. We need to distribute this value across the entire set of regions a 1044 // humongous object spans. 1045 if (!hr->is_humongous()) { 1046 hr->add_to_marked_bytes(marked_words * HeapWordSize); 1047 log_trace(gc)("Added " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1048 } else { 1049 if (marked_words > 0) { 1050 log_trace(gc)("Distributing " SIZE_FORMAT " words to humongous start region %u (%s), word size %d (%f)", 1051 marked_words, region_idx, hr->get_type_str(), 1052 oop(hr->bottom())->size(), (double)oop(hr->bottom())->size() / HeapRegion::GrainWords); 1053 distribute_marked_bytes(hr, marked_words); 1054 } else { 1055 log_trace(gc)("NOT Added " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1056 } 1057 } 1058 } 1059 public: 1060 G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) : 1061 _g1h(g1h), _cm(cm), _cl("Post-Marking"), _num_regions_selected_for_rebuild(0) { } 1062 1063 virtual bool do_heap_region(HeapRegion* r) { 1064 update_remset_before_rebuild(r); 1065 update_marked_bytes(r); 1066 if (log_is_enabled(Trace, gc, liveness)) { 1067 _cl.do_heap_region(r); 1068 } 1069 r->note_end_of_marking(); 1070 return false; 1071 } 1072 1073 uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; } 1074 }; 1075 1076 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure { 1077 G1CollectedHeap* _g1h; 1078 public: 1079 G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { } 1080 1081 virtual bool do_heap_region(HeapRegion* r) { 1082 _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r); 1083 return false; 1084 } 1085 }; 1086 1087 void G1ConcurrentMark::remark() { 1088 assert_at_safepoint_on_vm_thread(); 1089 1090 // If a full collection has happened, we should not continue. However we might 1091 // have ended up here as the Remark VM operation has been scheduled already. 1092 if (has_aborted()) { 1093 return; 1094 } 1095 1096 G1Policy* g1p = _g1h->g1_policy(); 1097 g1p->record_concurrent_mark_remark_start(); 1098 1099 double start = os::elapsedTime(); 1100 1101 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before"); 1102 1103 { 1104 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1105 finalize_marking(); 1106 } 1107 1108 double mark_work_end = os::elapsedTime(); 1109 1110 bool const mark_finished = !has_overflown(); 1111 if (mark_finished) { 1112 weak_refs_work(false /* clear_all_soft_refs */); 1113 1114 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1115 // We're done with marking. 1116 // This is the end of the marking cycle, we're expected all 1117 // threads to have SATB queues with active set to true. 1118 satb_mq_set.set_active_all_threads(false, /* new active value */ 1119 true /* expected_active */); 1120 1121 { 1122 GCTraceTime(Debug, gc, phases)("Flush Task Caches"); 1123 flush_all_task_caches(); 1124 } 1125 1126 // Install newly created mark bitmap as "prev". 1127 swap_mark_bitmaps(); 1128 { 1129 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild"); 1130 G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this); 1131 _g1h->heap_region_iterate(&cl); 1132 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u", 1133 _g1h->num_regions(), cl.num_selected_for_rebuild()); 1134 } 1135 1136 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after"); 1137 1138 assert(!restart_for_overflow(), "sanity"); 1139 // Completely reset the marking state since marking completed 1140 reset_at_marking_complete(); 1141 } else { 1142 // We overflowed. Restart concurrent marking. 1143 _restart_for_overflow = true; 1144 1145 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow"); 1146 1147 // Clear the marking state because we will be restarting 1148 // marking due to overflowing the global mark stack. 1149 reset_marking_for_restart(); 1150 } 1151 1152 { 1153 GCTraceTime(Debug, gc, phases)("Report Object Count"); 1154 report_object_count(mark_finished); 1155 } 1156 1157 // Statistics 1158 double now = os::elapsedTime(); 1159 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1160 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1161 _remark_times.add((now - start) * 1000.0); 1162 1163 g1p->record_concurrent_mark_remark_end(); 1164 } 1165 1166 class G1CleanupTask : public AbstractGangTask { 1167 // Per-region work during the Cleanup pause. 1168 class G1CleanupRegionsClosure : public HeapRegionClosure { 1169 G1CollectedHeap* _g1h; 1170 size_t _freed_bytes; 1171 FreeRegionList* _local_cleanup_list; 1172 uint _old_regions_removed; 1173 uint _humongous_regions_removed; 1174 HRRSCleanupTask* _hrrs_cleanup_task; 1175 1176 public: 1177 G1CleanupRegionsClosure(G1CollectedHeap* g1, 1178 FreeRegionList* local_cleanup_list, 1179 HRRSCleanupTask* hrrs_cleanup_task) : 1180 _g1h(g1), 1181 _freed_bytes(0), 1182 _local_cleanup_list(local_cleanup_list), 1183 _old_regions_removed(0), 1184 _humongous_regions_removed(0), 1185 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1186 1187 size_t freed_bytes() { return _freed_bytes; } 1188 const uint old_regions_removed() { return _old_regions_removed; } 1189 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1190 1191 bool do_heap_region(HeapRegion *hr) { 1192 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1193 _freed_bytes += hr->used(); 1194 hr->set_containing_set(NULL); 1195 if (hr->is_humongous()) { 1196 _humongous_regions_removed++; 1197 _g1h->free_humongous_region(hr, _local_cleanup_list); 1198 } else { 1199 _old_regions_removed++; 1200 _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */); 1201 } 1202 hr->clear_cardtable(); 1203 _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index()); 1204 log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom())); 1205 } else { 1206 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1207 } 1208 1209 return false; 1210 } 1211 }; 1212 1213 G1CollectedHeap* _g1h; 1214 FreeRegionList* _cleanup_list; 1215 HeapRegionClaimer _hrclaimer; 1216 1217 public: 1218 G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1219 AbstractGangTask("G1 Cleanup"), 1220 _g1h(g1h), 1221 _cleanup_list(cleanup_list), 1222 _hrclaimer(n_workers) { 1223 1224 HeapRegionRemSet::reset_for_cleanup_tasks(); 1225 } 1226 1227 void work(uint worker_id) { 1228 FreeRegionList local_cleanup_list("Local Cleanup List"); 1229 HRRSCleanupTask hrrs_cleanup_task; 1230 G1CleanupRegionsClosure cl(_g1h, 1231 &local_cleanup_list, 1232 &hrrs_cleanup_task); 1233 _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id); 1234 assert(cl.is_complete(), "Shouldn't have aborted!"); 1235 1236 // Now update the old/humongous region sets 1237 _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed()); 1238 { 1239 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1240 _g1h->decrement_summary_bytes(cl.freed_bytes()); 1241 1242 _cleanup_list->add_ordered(&local_cleanup_list); 1243 assert(local_cleanup_list.is_empty(), "post-condition"); 1244 1245 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1246 } 1247 } 1248 }; 1249 1250 void G1ConcurrentMark::reclaim_empty_regions() { 1251 WorkGang* workers = _g1h->workers(); 1252 FreeRegionList empty_regions_list("Empty Regions After Mark List"); 1253 1254 G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers()); 1255 workers->run_task(&cl); 1256 1257 if (!empty_regions_list.is_empty()) { 1258 log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length()); 1259 // Now print the empty regions list. 1260 G1HRPrinter* hrp = _g1h->hr_printer(); 1261 if (hrp->is_active()) { 1262 FreeRegionListIterator iter(&empty_regions_list); 1263 while (iter.more_available()) { 1264 HeapRegion* hr = iter.get_next(); 1265 hrp->cleanup(hr); 1266 } 1267 } 1268 // And actually make them available. 1269 _g1h->prepend_to_freelist(&empty_regions_list); 1270 } 1271 } 1272 1273 void G1ConcurrentMark::cleanup() { 1274 assert_at_safepoint_on_vm_thread(); 1275 1276 // If a full collection has happened, we shouldn't do this. 1277 if (has_aborted()) { 1278 return; 1279 } 1280 1281 G1Policy* g1p = _g1h->g1_policy(); 1282 g1p->record_concurrent_mark_cleanup_start(); 1283 1284 double start = os::elapsedTime(); 1285 1286 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before"); 1287 1288 { 1289 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild"); 1290 G1UpdateRemSetTrackingAfterRebuild cl(_g1h); 1291 _g1h->heap_region_iterate(&cl); 1292 } 1293 1294 if (log_is_enabled(Trace, gc, liveness)) { 1295 G1PrintRegionLivenessInfoClosure cl("Post-Cleanup"); 1296 _g1h->heap_region_iterate(&cl); 1297 } 1298 1299 { 1300 GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions"); 1301 reclaim_empty_regions(); 1302 } 1303 1304 // Cleanup will have freed any regions completely full of garbage. 1305 // Update the soft reference policy with the new heap occupancy. 1306 Universe::update_heap_info_at_gc(); 1307 1308 // Clean out dead classes and update Metaspace sizes. 1309 if (ClassUnloadingWithConcurrentMark) { 1310 GCTraceTime(Debug, gc, phases)("Purge Metaspace"); 1311 ClassLoaderDataGraph::purge(); 1312 } 1313 MetaspaceGC::compute_new_size(); 1314 1315 // We reclaimed old regions so we should calculate the sizes to make 1316 // sure we update the old gen/space data. 1317 _g1h->g1mm()->update_sizes(); 1318 1319 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after"); 1320 1321 // We need to make this be a "collection" so any collection pause that 1322 // races with it goes around and waits for Cleanup to finish. 1323 _g1h->increment_total_collections(); 1324 1325 // Local statistics 1326 double recent_cleanup_time = (os::elapsedTime() - start); 1327 _total_cleanup_time += recent_cleanup_time; 1328 _cleanup_times.add(recent_cleanup_time); 1329 1330 { 1331 GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup"); 1332 _g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1333 } 1334 } 1335 1336 // Supporting Object and Oop closures for reference discovery 1337 // and processing in during marking 1338 1339 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1340 HeapWord* addr = (HeapWord*)obj; 1341 return addr != NULL && 1342 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj)); 1343 } 1344 1345 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1346 // Uses the G1CMTask associated with a worker thread (for serial reference 1347 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1348 // trace referent objects. 1349 // 1350 // Using the G1CMTask and embedded local queues avoids having the worker 1351 // threads operating on the global mark stack. This reduces the risk 1352 // of overflowing the stack - which we would rather avoid at this late 1353 // state. Also using the tasks' local queues removes the potential 1354 // of the workers interfering with each other that could occur if 1355 // operating on the global stack. 1356 1357 class G1CMKeepAliveAndDrainClosure : public OopClosure { 1358 G1ConcurrentMark* _cm; 1359 G1CMTask* _task; 1360 int _ref_counter_limit; 1361 int _ref_counter; 1362 bool _is_serial; 1363 public: 1364 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1365 _cm(cm), _task(task), _is_serial(is_serial), 1366 _ref_counter_limit(G1RefProcDrainInterval) { 1367 assert(_ref_counter_limit > 0, "sanity"); 1368 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1369 _ref_counter = _ref_counter_limit; 1370 } 1371 1372 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1373 virtual void do_oop( oop* p) { do_oop_work(p); } 1374 1375 template <class T> void do_oop_work(T* p) { 1376 if (!_cm->has_overflown()) { 1377 _task->deal_with_reference(p); 1378 _ref_counter--; 1379 1380 if (_ref_counter == 0) { 1381 // We have dealt with _ref_counter_limit references, pushing them 1382 // and objects reachable from them on to the local stack (and 1383 // possibly the global stack). Call G1CMTask::do_marking_step() to 1384 // process these entries. 1385 // 1386 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1387 // there's nothing more to do (i.e. we're done with the entries that 1388 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1389 // above) or we overflow. 1390 // 1391 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1392 // flag while there may still be some work to do. (See the comment at 1393 // the beginning of G1CMTask::do_marking_step() for those conditions - 1394 // one of which is reaching the specified time target.) It is only 1395 // when G1CMTask::do_marking_step() returns without setting the 1396 // has_aborted() flag that the marking step has completed. 1397 do { 1398 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1399 _task->do_marking_step(mark_step_duration_ms, 1400 false /* do_termination */, 1401 _is_serial); 1402 } while (_task->has_aborted() && !_cm->has_overflown()); 1403 _ref_counter = _ref_counter_limit; 1404 } 1405 } 1406 } 1407 }; 1408 1409 // 'Drain' oop closure used by both serial and parallel reference processing. 1410 // Uses the G1CMTask associated with a given worker thread (for serial 1411 // reference processing the G1CMtask for worker 0 is used). Calls the 1412 // do_marking_step routine, with an unbelievably large timeout value, 1413 // to drain the marking data structures of the remaining entries 1414 // added by the 'keep alive' oop closure above. 1415 1416 class G1CMDrainMarkingStackClosure : public VoidClosure { 1417 G1ConcurrentMark* _cm; 1418 G1CMTask* _task; 1419 bool _is_serial; 1420 public: 1421 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1422 _cm(cm), _task(task), _is_serial(is_serial) { 1423 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1424 } 1425 1426 void do_void() { 1427 do { 1428 // We call G1CMTask::do_marking_step() to completely drain the local 1429 // and global marking stacks of entries pushed by the 'keep alive' 1430 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1431 // 1432 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1433 // if there's nothing more to do (i.e. we've completely drained the 1434 // entries that were pushed as a a result of applying the 'keep alive' 1435 // closure to the entries on the discovered ref lists) or we overflow 1436 // the global marking stack. 1437 // 1438 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1439 // flag while there may still be some work to do. (See the comment at 1440 // the beginning of G1CMTask::do_marking_step() for those conditions - 1441 // one of which is reaching the specified time target.) It is only 1442 // when G1CMTask::do_marking_step() returns without setting the 1443 // has_aborted() flag that the marking step has completed. 1444 1445 _task->do_marking_step(1000000000.0 /* something very large */, 1446 true /* do_termination */, 1447 _is_serial); 1448 } while (_task->has_aborted() && !_cm->has_overflown()); 1449 } 1450 }; 1451 1452 // Implementation of AbstractRefProcTaskExecutor for parallel 1453 // reference processing at the end of G1 concurrent marking 1454 1455 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor { 1456 private: 1457 G1CollectedHeap* _g1h; 1458 G1ConcurrentMark* _cm; 1459 WorkGang* _workers; 1460 uint _active_workers; 1461 1462 public: 1463 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1464 G1ConcurrentMark* cm, 1465 WorkGang* workers, 1466 uint n_workers) : 1467 _g1h(g1h), _cm(cm), 1468 _workers(workers), _active_workers(n_workers) { } 1469 1470 // Executes the given task using concurrent marking worker threads. 1471 virtual void execute(ProcessTask& task); 1472 virtual void execute(EnqueueTask& task); 1473 }; 1474 1475 class G1CMRefProcTaskProxy : public AbstractGangTask { 1476 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1477 ProcessTask& _proc_task; 1478 G1CollectedHeap* _g1h; 1479 G1ConcurrentMark* _cm; 1480 1481 public: 1482 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1483 G1CollectedHeap* g1h, 1484 G1ConcurrentMark* cm) : 1485 AbstractGangTask("Process reference objects in parallel"), 1486 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1487 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1488 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1489 } 1490 1491 virtual void work(uint worker_id) { 1492 ResourceMark rm; 1493 HandleMark hm; 1494 G1CMTask* task = _cm->task(worker_id); 1495 G1CMIsAliveClosure g1_is_alive(_g1h); 1496 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1497 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1498 1499 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1500 } 1501 }; 1502 1503 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1504 assert(_workers != NULL, "Need parallel worker threads."); 1505 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1506 1507 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1508 1509 // We need to reset the concurrency level before each 1510 // proxy task execution, so that the termination protocol 1511 // and overflow handling in G1CMTask::do_marking_step() knows 1512 // how many workers to wait for. 1513 _cm->set_concurrency(_active_workers); 1514 _workers->run_task(&proc_task_proxy); 1515 } 1516 1517 class G1CMRefEnqueueTaskProxy : public AbstractGangTask { 1518 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1519 EnqueueTask& _enq_task; 1520 1521 public: 1522 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1523 AbstractGangTask("Enqueue reference objects in parallel"), 1524 _enq_task(enq_task) { } 1525 1526 virtual void work(uint worker_id) { 1527 _enq_task.work(worker_id); 1528 } 1529 }; 1530 1531 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1532 assert(_workers != NULL, "Need parallel worker threads."); 1533 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1534 1535 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1536 1537 // Not strictly necessary but... 1538 // 1539 // We need to reset the concurrency level before each 1540 // proxy task execution, so that the termination protocol 1541 // and overflow handling in G1CMTask::do_marking_step() knows 1542 // how many workers to wait for. 1543 _cm->set_concurrency(_active_workers); 1544 _workers->run_task(&enq_task_proxy); 1545 } 1546 1547 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) { 1548 ResourceMark rm; 1549 HandleMark hm; 1550 1551 // Is alive closure. 1552 G1CMIsAliveClosure g1_is_alive(_g1h); 1553 1554 // Inner scope to exclude the cleaning of the string and symbol 1555 // tables from the displayed time. 1556 { 1557 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1558 1559 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1560 1561 // See the comment in G1CollectedHeap::ref_processing_init() 1562 // about how reference processing currently works in G1. 1563 1564 // Set the soft reference policy 1565 rp->setup_policy(clear_all_soft_refs); 1566 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1567 1568 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1569 // in serial reference processing. Note these closures are also 1570 // used for serially processing (by the the current thread) the 1571 // JNI references during parallel reference processing. 1572 // 1573 // These closures do not need to synchronize with the worker 1574 // threads involved in parallel reference processing as these 1575 // instances are executed serially by the current thread (e.g. 1576 // reference processing is not multi-threaded and is thus 1577 // performed by the current thread instead of a gang worker). 1578 // 1579 // The gang tasks involved in parallel reference processing create 1580 // their own instances of these closures, which do their own 1581 // synchronization among themselves. 1582 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1583 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1584 1585 // We need at least one active thread. If reference processing 1586 // is not multi-threaded we use the current (VMThread) thread, 1587 // otherwise we use the work gang from the G1CollectedHeap and 1588 // we utilize all the worker threads we can. 1589 bool processing_is_mt = rp->processing_is_mt(); 1590 uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U); 1591 active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U); 1592 1593 // Parallel processing task executor. 1594 G1CMRefProcTaskExecutor par_task_executor(_g1h, this, 1595 _g1h->workers(), active_workers); 1596 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1597 1598 // Set the concurrency level. The phase was already set prior to 1599 // executing the remark task. 1600 set_concurrency(active_workers); 1601 1602 // Set the degree of MT processing here. If the discovery was done MT, 1603 // the number of threads involved during discovery could differ from 1604 // the number of active workers. This is OK as long as the discovered 1605 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1606 rp->set_active_mt_degree(active_workers); 1607 1608 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1609 1610 // Process the weak references. 1611 const ReferenceProcessorStats& stats = 1612 rp->process_discovered_references(&g1_is_alive, 1613 &g1_keep_alive, 1614 &g1_drain_mark_stack, 1615 executor, 1616 &pt); 1617 _gc_tracer_cm->report_gc_reference_stats(stats); 1618 pt.print_all_references(); 1619 1620 // The do_oop work routines of the keep_alive and drain_marking_stack 1621 // oop closures will set the has_overflown flag if we overflow the 1622 // global marking stack. 1623 1624 assert(has_overflown() || _global_mark_stack.is_empty(), 1625 "Mark stack should be empty (unless it has overflown)"); 1626 1627 assert(rp->num_q() == active_workers, "why not"); 1628 1629 rp->enqueue_discovered_references(executor, &pt); 1630 1631 rp->verify_no_references_recorded(); 1632 1633 pt.print_enqueue_phase(); 1634 1635 assert(!rp->discovery_enabled(), "Post condition"); 1636 } 1637 1638 assert(has_overflown() || _global_mark_stack.is_empty(), 1639 "Mark stack should be empty (unless it has overflown)"); 1640 1641 { 1642 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm); 1643 WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl); 1644 } 1645 1646 if (has_overflown()) { 1647 // We can not trust g1_is_alive if the marking stack overflowed 1648 return; 1649 } 1650 1651 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1652 1653 // Unload Klasses, String, Symbols, Code Cache, etc. 1654 if (ClassUnloadingWithConcurrentMark) { 1655 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1656 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1657 _g1h->complete_cleaning(&g1_is_alive, purged_classes); 1658 } else { 1659 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1660 // No need to clean string table and symbol table as they are treated as strong roots when 1661 // class unloading is disabled. 1662 _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1663 } 1664 } 1665 1666 // When sampling object counts, we already swapped the mark bitmaps, so we need to use 1667 // the prev bitmap determining liveness. 1668 class G1ObjectCountIsAliveClosure: public BoolObjectClosure { 1669 G1CollectedHeap* _g1; 1670 public: 1671 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { } 1672 1673 bool do_object_b(oop obj) { 1674 HeapWord* addr = (HeapWord*)obj; 1675 return addr != NULL && 1676 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_dead(obj)); 1677 } 1678 }; 1679 1680 void G1ConcurrentMark::report_object_count(bool mark_completed) { 1681 // Depending on the completion of the marking liveness needs to be determined 1682 // using either the next or prev bitmap. 1683 G1ObjectCountIsAliveClosure is_alive_prev(_g1h); 1684 G1CMIsAliveClosure is_alive_next(_g1h); 1685 BoolObjectClosure* is_alive; 1686 if (mark_completed) { 1687 is_alive = &is_alive_prev; 1688 } else { 1689 is_alive = &is_alive_next; 1690 } 1691 _gc_tracer_cm->report_object_count_after_gc(is_alive); 1692 } 1693 1694 1695 void G1ConcurrentMark::swap_mark_bitmaps() { 1696 G1CMBitMap* temp = _prev_mark_bitmap; 1697 _prev_mark_bitmap = _next_mark_bitmap; 1698 _next_mark_bitmap = temp; 1699 _g1h->collector_state()->set_clearing_next_bitmap(true); 1700 } 1701 1702 // Closure for marking entries in SATB buffers. 1703 class G1CMSATBBufferClosure : public SATBBufferClosure { 1704 private: 1705 G1CMTask* _task; 1706 G1CollectedHeap* _g1h; 1707 1708 // This is very similar to G1CMTask::deal_with_reference, but with 1709 // more relaxed requirements for the argument, so this must be more 1710 // circumspect about treating the argument as an object. 1711 void do_entry(void* entry) const { 1712 _task->increment_refs_reached(); 1713 oop const obj = static_cast<oop>(entry); 1714 _task->make_reference_grey(obj); 1715 } 1716 1717 public: 1718 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1719 : _task(task), _g1h(g1h) { } 1720 1721 virtual void do_buffer(void** buffer, size_t size) { 1722 for (size_t i = 0; i < size; ++i) { 1723 do_entry(buffer[i]); 1724 } 1725 } 1726 }; 1727 1728 class G1RemarkThreadsClosure : public ThreadClosure { 1729 G1CMSATBBufferClosure _cm_satb_cl; 1730 G1CMOopClosure _cm_cl; 1731 MarkingCodeBlobClosure _code_cl; 1732 int _thread_parity; 1733 1734 public: 1735 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1736 _cm_satb_cl(task, g1h), 1737 _cm_cl(g1h, task), 1738 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1739 _thread_parity(Threads::thread_claim_parity()) {} 1740 1741 void do_thread(Thread* thread) { 1742 if (thread->is_Java_thread()) { 1743 if (thread->claim_oops_do(true, _thread_parity)) { 1744 JavaThread* jt = (JavaThread*)thread; 1745 1746 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1747 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1748 // * Alive if on the stack of an executing method 1749 // * Weakly reachable otherwise 1750 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1751 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1752 jt->nmethods_do(&_code_cl); 1753 1754 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1755 } 1756 } else if (thread->is_VM_thread()) { 1757 if (thread->claim_oops_do(true, _thread_parity)) { 1758 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1759 } 1760 } 1761 } 1762 }; 1763 1764 class G1CMRemarkTask : public AbstractGangTask { 1765 G1ConcurrentMark* _cm; 1766 public: 1767 void work(uint worker_id) { 1768 G1CMTask* task = _cm->task(worker_id); 1769 task->record_start_time(); 1770 { 1771 ResourceMark rm; 1772 HandleMark hm; 1773 1774 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1775 Threads::threads_do(&threads_f); 1776 } 1777 1778 do { 1779 task->do_marking_step(1000000000.0 /* something very large */, 1780 true /* do_termination */, 1781 false /* is_serial */); 1782 } while (task->has_aborted() && !_cm->has_overflown()); 1783 // If we overflow, then we do not want to restart. We instead 1784 // want to abort remark and do concurrent marking again. 1785 task->record_end_time(); 1786 } 1787 1788 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1789 AbstractGangTask("Par Remark"), _cm(cm) { 1790 _cm->terminator()->reset_for_reuse(active_workers); 1791 } 1792 }; 1793 1794 void G1ConcurrentMark::finalize_marking() { 1795 ResourceMark rm; 1796 HandleMark hm; 1797 1798 _g1h->ensure_parsability(false); 1799 1800 // this is remark, so we'll use up all active threads 1801 uint active_workers = _g1h->workers()->active_workers(); 1802 set_concurrency_and_phase(active_workers, false /* concurrent */); 1803 // Leave _parallel_marking_threads at it's 1804 // value originally calculated in the G1ConcurrentMark 1805 // constructor and pass values of the active workers 1806 // through the gang in the task. 1807 1808 { 1809 StrongRootsScope srs(active_workers); 1810 1811 G1CMRemarkTask remarkTask(this, active_workers); 1812 // We will start all available threads, even if we decide that the 1813 // active_workers will be fewer. The extra ones will just bail out 1814 // immediately. 1815 _g1h->workers()->run_task(&remarkTask); 1816 } 1817 1818 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1819 guarantee(has_overflown() || 1820 satb_mq_set.completed_buffers_num() == 0, 1821 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1822 BOOL_TO_STR(has_overflown()), 1823 satb_mq_set.completed_buffers_num()); 1824 1825 print_stats(); 1826 } 1827 1828 void G1ConcurrentMark::flush_all_task_caches() { 1829 size_t hits = 0; 1830 size_t misses = 0; 1831 for (uint i = 0; i < _max_num_tasks; i++) { 1832 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache(); 1833 hits += stats.first; 1834 misses += stats.second; 1835 } 1836 size_t sum = hits + misses; 1837 log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf", 1838 hits, misses, percent_of(hits, sum)); 1839 } 1840 1841 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) { 1842 _prev_mark_bitmap->clear_range(mr); 1843 } 1844 1845 HeapRegion* 1846 G1ConcurrentMark::claim_region(uint worker_id) { 1847 // "checkpoint" the finger 1848 HeapWord* finger = _finger; 1849 1850 while (finger < _heap.end()) { 1851 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1852 1853 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1854 // Make sure that the reads below do not float before loading curr_region. 1855 OrderAccess::loadload(); 1856 // Above heap_region_containing may return NULL as we always scan claim 1857 // until the end of the heap. In this case, just jump to the next region. 1858 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1859 1860 // Is the gap between reading the finger and doing the CAS too long? 1861 HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); 1862 if (res == finger && curr_region != NULL) { 1863 // we succeeded 1864 HeapWord* bottom = curr_region->bottom(); 1865 HeapWord* limit = curr_region->next_top_at_mark_start(); 1866 1867 // notice that _finger == end cannot be guaranteed here since, 1868 // someone else might have moved the finger even further 1869 assert(_finger >= end, "the finger should have moved forward"); 1870 1871 if (limit > bottom) { 1872 return curr_region; 1873 } else { 1874 assert(limit == bottom, 1875 "the region limit should be at bottom"); 1876 // we return NULL and the caller should try calling 1877 // claim_region() again. 1878 return NULL; 1879 } 1880 } else { 1881 assert(_finger > finger, "the finger should have moved forward"); 1882 // read it again 1883 finger = _finger; 1884 } 1885 } 1886 1887 return NULL; 1888 } 1889 1890 #ifndef PRODUCT 1891 class VerifyNoCSetOops { 1892 G1CollectedHeap* _g1h; 1893 const char* _phase; 1894 int _info; 1895 1896 public: 1897 VerifyNoCSetOops(const char* phase, int info = -1) : 1898 _g1h(G1CollectedHeap::heap()), 1899 _phase(phase), 1900 _info(info) 1901 { } 1902 1903 void operator()(G1TaskQueueEntry task_entry) const { 1904 if (task_entry.is_array_slice()) { 1905 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1906 return; 1907 } 1908 guarantee(oopDesc::is_oop(task_entry.obj()), 1909 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1910 p2i(task_entry.obj()), _phase, _info); 1911 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1912 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1913 p2i(task_entry.obj()), _phase, _info); 1914 } 1915 }; 1916 1917 void G1ConcurrentMark::verify_no_cset_oops() { 1918 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1919 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 1920 return; 1921 } 1922 1923 // Verify entries on the global mark stack 1924 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1925 1926 // Verify entries on the task queues 1927 for (uint i = 0; i < _max_num_tasks; ++i) { 1928 G1CMTaskQueue* queue = _task_queues->queue(i); 1929 queue->iterate(VerifyNoCSetOops("Queue", i)); 1930 } 1931 1932 // Verify the global finger 1933 HeapWord* global_finger = finger(); 1934 if (global_finger != NULL && global_finger < _heap.end()) { 1935 // Since we always iterate over all regions, we might get a NULL HeapRegion 1936 // here. 1937 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1938 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1939 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1940 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1941 } 1942 1943 // Verify the task fingers 1944 assert(_num_concurrent_workers <= _max_num_tasks, "sanity"); 1945 for (uint i = 0; i < _num_concurrent_workers; ++i) { 1946 G1CMTask* task = _tasks[i]; 1947 HeapWord* task_finger = task->finger(); 1948 if (task_finger != NULL && task_finger < _heap.end()) { 1949 // See above note on the global finger verification. 1950 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1951 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1952 !task_hr->in_collection_set(), 1953 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1954 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1955 } 1956 } 1957 } 1958 #endif // PRODUCT 1959 1960 void G1ConcurrentMark::rebuild_rem_set_concurrently() { 1961 _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset); 1962 } 1963 1964 void G1ConcurrentMark::print_stats() { 1965 if (!log_is_enabled(Debug, gc, stats)) { 1966 return; 1967 } 1968 log_debug(gc, stats)("---------------------------------------------------------------------"); 1969 for (size_t i = 0; i < _num_active_tasks; ++i) { 1970 _tasks[i]->print_stats(); 1971 log_debug(gc, stats)("---------------------------------------------------------------------"); 1972 } 1973 } 1974 1975 void G1ConcurrentMark::concurrent_cycle_abort() { 1976 if (!cm_thread()->during_cycle() || _has_aborted) { 1977 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 1978 return; 1979 } 1980 1981 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 1982 // concurrent bitmap clearing. 1983 { 1984 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 1985 clear_bitmap(_next_mark_bitmap, _g1h->workers(), false); 1986 } 1987 // Note we cannot clear the previous marking bitmap here 1988 // since VerifyDuringGC verifies the objects marked during 1989 // a full GC against the previous bitmap. 1990 1991 // Empty mark stack 1992 reset_marking_for_restart(); 1993 for (uint i = 0; i < _max_num_tasks; ++i) { 1994 _tasks[i]->clear_region_fields(); 1995 } 1996 _first_overflow_barrier_sync.abort(); 1997 _second_overflow_barrier_sync.abort(); 1998 _has_aborted = true; 1999 2000 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2001 satb_mq_set.abandon_partial_marking(); 2002 // This can be called either during or outside marking, we'll read 2003 // the expected_active value from the SATB queue set. 2004 satb_mq_set.set_active_all_threads( 2005 false, /* new active value */ 2006 satb_mq_set.is_active() /* expected_active */); 2007 } 2008 2009 static void print_ms_time_info(const char* prefix, const char* name, 2010 NumberSeq& ns) { 2011 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2012 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2013 if (ns.num() > 0) { 2014 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2015 prefix, ns.sd(), ns.maximum()); 2016 } 2017 } 2018 2019 void G1ConcurrentMark::print_summary_info() { 2020 Log(gc, marking) log; 2021 if (!log.is_trace()) { 2022 return; 2023 } 2024 2025 log.trace(" Concurrent marking:"); 2026 print_ms_time_info(" ", "init marks", _init_times); 2027 print_ms_time_info(" ", "remarks", _remark_times); 2028 { 2029 print_ms_time_info(" ", "final marks", _remark_mark_times); 2030 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2031 2032 } 2033 print_ms_time_info(" ", "cleanups", _cleanup_times); 2034 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2035 _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2036 log.trace(" Total stop_world time = %8.2f s.", 2037 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2038 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2039 cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum()); 2040 } 2041 2042 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2043 _concurrent_workers->print_worker_threads_on(st); 2044 } 2045 2046 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2047 _concurrent_workers->threads_do(tc); 2048 } 2049 2050 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2051 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2052 p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap)); 2053 _prev_mark_bitmap->print_on_error(st, " Prev Bits: "); 2054 _next_mark_bitmap->print_on_error(st, " Next Bits: "); 2055 } 2056 2057 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2058 ReferenceProcessor* result = g1h->ref_processor_cm(); 2059 assert(result != NULL, "CM reference processor should not be NULL"); 2060 return result; 2061 } 2062 2063 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2064 G1CMTask* task) 2065 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2066 _g1h(g1h), _task(task) 2067 { } 2068 2069 void G1CMTask::setup_for_region(HeapRegion* hr) { 2070 assert(hr != NULL, 2071 "claim_region() should have filtered out NULL regions"); 2072 _curr_region = hr; 2073 _finger = hr->bottom(); 2074 update_region_limit(); 2075 } 2076 2077 void G1CMTask::update_region_limit() { 2078 HeapRegion* hr = _curr_region; 2079 HeapWord* bottom = hr->bottom(); 2080 HeapWord* limit = hr->next_top_at_mark_start(); 2081 2082 if (limit == bottom) { 2083 // The region was collected underneath our feet. 2084 // We set the finger to bottom to ensure that the bitmap 2085 // iteration that will follow this will not do anything. 2086 // (this is not a condition that holds when we set the region up, 2087 // as the region is not supposed to be empty in the first place) 2088 _finger = bottom; 2089 } else if (limit >= _region_limit) { 2090 assert(limit >= _finger, "peace of mind"); 2091 } else { 2092 assert(limit < _region_limit, "only way to get here"); 2093 // This can happen under some pretty unusual circumstances. An 2094 // evacuation pause empties the region underneath our feet (NTAMS 2095 // at bottom). We then do some allocation in the region (NTAMS 2096 // stays at bottom), followed by the region being used as a GC 2097 // alloc region (NTAMS will move to top() and the objects 2098 // originally below it will be grayed). All objects now marked in 2099 // the region are explicitly grayed, if below the global finger, 2100 // and we do not need in fact to scan anything else. So, we simply 2101 // set _finger to be limit to ensure that the bitmap iteration 2102 // doesn't do anything. 2103 _finger = limit; 2104 } 2105 2106 _region_limit = limit; 2107 } 2108 2109 void G1CMTask::giveup_current_region() { 2110 assert(_curr_region != NULL, "invariant"); 2111 clear_region_fields(); 2112 } 2113 2114 void G1CMTask::clear_region_fields() { 2115 // Values for these three fields that indicate that we're not 2116 // holding on to a region. 2117 _curr_region = NULL; 2118 _finger = NULL; 2119 _region_limit = NULL; 2120 } 2121 2122 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2123 if (cm_oop_closure == NULL) { 2124 assert(_cm_oop_closure != NULL, "invariant"); 2125 } else { 2126 assert(_cm_oop_closure == NULL, "invariant"); 2127 } 2128 _cm_oop_closure = cm_oop_closure; 2129 } 2130 2131 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { 2132 guarantee(next_mark_bitmap != NULL, "invariant"); 2133 _next_mark_bitmap = next_mark_bitmap; 2134 clear_region_fields(); 2135 2136 _calls = 0; 2137 _elapsed_time_ms = 0.0; 2138 _termination_time_ms = 0.0; 2139 _termination_start_time_ms = 0.0; 2140 2141 _mark_stats_cache.reset(); 2142 } 2143 2144 bool G1CMTask::should_exit_termination() { 2145 regular_clock_call(); 2146 // This is called when we are in the termination protocol. We should 2147 // quit if, for some reason, this task wants to abort or the global 2148 // stack is not empty (this means that we can get work from it). 2149 return !_cm->mark_stack_empty() || has_aborted(); 2150 } 2151 2152 void G1CMTask::reached_limit() { 2153 assert(_words_scanned >= _words_scanned_limit || 2154 _refs_reached >= _refs_reached_limit , 2155 "shouldn't have been called otherwise"); 2156 regular_clock_call(); 2157 } 2158 2159 void G1CMTask::regular_clock_call() { 2160 if (has_aborted()) { 2161 return; 2162 } 2163 2164 // First, we need to recalculate the words scanned and refs reached 2165 // limits for the next clock call. 2166 recalculate_limits(); 2167 2168 // During the regular clock call we do the following 2169 2170 // (1) If an overflow has been flagged, then we abort. 2171 if (_cm->has_overflown()) { 2172 set_has_aborted(); 2173 return; 2174 } 2175 2176 // If we are not concurrent (i.e. we're doing remark) we don't need 2177 // to check anything else. The other steps are only needed during 2178 // the concurrent marking phase. 2179 if (!_cm->concurrent()) { 2180 return; 2181 } 2182 2183 // (2) If marking has been aborted for Full GC, then we also abort. 2184 if (_cm->has_aborted()) { 2185 set_has_aborted(); 2186 return; 2187 } 2188 2189 double curr_time_ms = os::elapsedVTime() * 1000.0; 2190 2191 // (4) We check whether we should yield. If we have to, then we abort. 2192 if (SuspendibleThreadSet::should_yield()) { 2193 // We should yield. To do this we abort the task. The caller is 2194 // responsible for yielding. 2195 set_has_aborted(); 2196 return; 2197 } 2198 2199 // (5) We check whether we've reached our time quota. If we have, 2200 // then we abort. 2201 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2202 if (elapsed_time_ms > _time_target_ms) { 2203 set_has_aborted(); 2204 _has_timed_out = true; 2205 return; 2206 } 2207 2208 // (6) Finally, we check whether there are enough completed STAB 2209 // buffers available for processing. If there are, we abort. 2210 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2211 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2212 // we do need to process SATB buffers, we'll abort and restart 2213 // the marking task to do so 2214 set_has_aborted(); 2215 return; 2216 } 2217 } 2218 2219 void G1CMTask::recalculate_limits() { 2220 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2221 _words_scanned_limit = _real_words_scanned_limit; 2222 2223 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2224 _refs_reached_limit = _real_refs_reached_limit; 2225 } 2226 2227 void G1CMTask::decrease_limits() { 2228 // This is called when we believe that we're going to do an infrequent 2229 // operation which will increase the per byte scanned cost (i.e. move 2230 // entries to/from the global stack). It basically tries to decrease the 2231 // scanning limit so that the clock is called earlier. 2232 2233 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4; 2234 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4; 2235 } 2236 2237 void G1CMTask::move_entries_to_global_stack() { 2238 // Local array where we'll store the entries that will be popped 2239 // from the local queue. 2240 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2241 2242 size_t n = 0; 2243 G1TaskQueueEntry task_entry; 2244 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2245 buffer[n] = task_entry; 2246 ++n; 2247 } 2248 if (n < G1CMMarkStack::EntriesPerChunk) { 2249 buffer[n] = G1TaskQueueEntry(); 2250 } 2251 2252 if (n > 0) { 2253 if (!_cm->mark_stack_push(buffer)) { 2254 set_has_aborted(); 2255 } 2256 } 2257 2258 // This operation was quite expensive, so decrease the limits. 2259 decrease_limits(); 2260 } 2261 2262 bool G1CMTask::get_entries_from_global_stack() { 2263 // Local array where we'll store the entries that will be popped 2264 // from the global stack. 2265 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2266 2267 if (!_cm->mark_stack_pop(buffer)) { 2268 return false; 2269 } 2270 2271 // We did actually pop at least one entry. 2272 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2273 G1TaskQueueEntry task_entry = buffer[i]; 2274 if (task_entry.is_null()) { 2275 break; 2276 } 2277 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2278 bool success = _task_queue->push(task_entry); 2279 // We only call this when the local queue is empty or under a 2280 // given target limit. So, we do not expect this push to fail. 2281 assert(success, "invariant"); 2282 } 2283 2284 // This operation was quite expensive, so decrease the limits 2285 decrease_limits(); 2286 return true; 2287 } 2288 2289 void G1CMTask::drain_local_queue(bool partially) { 2290 if (has_aborted()) { 2291 return; 2292 } 2293 2294 // Decide what the target size is, depending whether we're going to 2295 // drain it partially (so that other tasks can steal if they run out 2296 // of things to do) or totally (at the very end). 2297 size_t target_size; 2298 if (partially) { 2299 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2300 } else { 2301 target_size = 0; 2302 } 2303 2304 if (_task_queue->size() > target_size) { 2305 G1TaskQueueEntry entry; 2306 bool ret = _task_queue->pop_local(entry); 2307 while (ret) { 2308 scan_task_entry(entry); 2309 if (_task_queue->size() <= target_size || has_aborted()) { 2310 ret = false; 2311 } else { 2312 ret = _task_queue->pop_local(entry); 2313 } 2314 } 2315 } 2316 } 2317 2318 void G1CMTask::drain_global_stack(bool partially) { 2319 if (has_aborted()) { 2320 return; 2321 } 2322 2323 // We have a policy to drain the local queue before we attempt to 2324 // drain the global stack. 2325 assert(partially || _task_queue->size() == 0, "invariant"); 2326 2327 // Decide what the target size is, depending whether we're going to 2328 // drain it partially (so that other tasks can steal if they run out 2329 // of things to do) or totally (at the very end). 2330 // Notice that when draining the global mark stack partially, due to the racyness 2331 // of the mark stack size update we might in fact drop below the target. But, 2332 // this is not a problem. 2333 // In case of total draining, we simply process until the global mark stack is 2334 // totally empty, disregarding the size counter. 2335 if (partially) { 2336 size_t const target_size = _cm->partial_mark_stack_size_target(); 2337 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2338 if (get_entries_from_global_stack()) { 2339 drain_local_queue(partially); 2340 } 2341 } 2342 } else { 2343 while (!has_aborted() && get_entries_from_global_stack()) { 2344 drain_local_queue(partially); 2345 } 2346 } 2347 } 2348 2349 // SATB Queue has several assumptions on whether to call the par or 2350 // non-par versions of the methods. this is why some of the code is 2351 // replicated. We should really get rid of the single-threaded version 2352 // of the code to simplify things. 2353 void G1CMTask::drain_satb_buffers() { 2354 if (has_aborted()) { 2355 return; 2356 } 2357 2358 // We set this so that the regular clock knows that we're in the 2359 // middle of draining buffers and doesn't set the abort flag when it 2360 // notices that SATB buffers are available for draining. It'd be 2361 // very counter productive if it did that. :-) 2362 _draining_satb_buffers = true; 2363 2364 G1CMSATBBufferClosure satb_cl(this, _g1h); 2365 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2366 2367 // This keeps claiming and applying the closure to completed buffers 2368 // until we run out of buffers or we need to abort. 2369 while (!has_aborted() && 2370 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2371 regular_clock_call(); 2372 } 2373 2374 _draining_satb_buffers = false; 2375 2376 assert(has_aborted() || 2377 _cm->concurrent() || 2378 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2379 2380 // again, this was a potentially expensive operation, decrease the 2381 // limits to get the regular clock call early 2382 decrease_limits(); 2383 } 2384 2385 void G1CMTask::clear_mark_stats_cache(uint region_idx) { 2386 _mark_stats_cache.reset(region_idx); 2387 } 2388 2389 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() { 2390 return _mark_stats_cache.evict_all(); 2391 } 2392 2393 void G1CMTask::print_stats() { 2394 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls); 2395 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2396 _elapsed_time_ms, _termination_time_ms); 2397 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms", 2398 _step_times_ms.num(), 2399 _step_times_ms.avg(), 2400 _step_times_ms.sd(), 2401 _step_times_ms.maximum(), 2402 _step_times_ms.sum()); 2403 size_t const hits = _mark_stats_cache.hits(); 2404 size_t const misses = _mark_stats_cache.misses(); 2405 log_debug(gc, stats)(" Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f", 2406 hits, misses, percent_of(hits, hits + misses)); 2407 } 2408 2409 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2410 return _task_queues->steal(worker_id, hash_seed, task_entry); 2411 } 2412 2413 /***************************************************************************** 2414 2415 The do_marking_step(time_target_ms, ...) method is the building 2416 block of the parallel marking framework. It can be called in parallel 2417 with other invocations of do_marking_step() on different tasks 2418 (but only one per task, obviously) and concurrently with the 2419 mutator threads, or during remark, hence it eliminates the need 2420 for two versions of the code. When called during remark, it will 2421 pick up from where the task left off during the concurrent marking 2422 phase. Interestingly, tasks are also claimable during evacuation 2423 pauses too, since do_marking_step() ensures that it aborts before 2424 it needs to yield. 2425 2426 The data structures that it uses to do marking work are the 2427 following: 2428 2429 (1) Marking Bitmap. If there are gray objects that appear only 2430 on the bitmap (this happens either when dealing with an overflow 2431 or when the initial marking phase has simply marked the roots 2432 and didn't push them on the stack), then tasks claim heap 2433 regions whose bitmap they then scan to find gray objects. A 2434 global finger indicates where the end of the last claimed region 2435 is. A local finger indicates how far into the region a task has 2436 scanned. The two fingers are used to determine how to gray an 2437 object (i.e. whether simply marking it is OK, as it will be 2438 visited by a task in the future, or whether it needs to be also 2439 pushed on a stack). 2440 2441 (2) Local Queue. The local queue of the task which is accessed 2442 reasonably efficiently by the task. Other tasks can steal from 2443 it when they run out of work. Throughout the marking phase, a 2444 task attempts to keep its local queue short but not totally 2445 empty, so that entries are available for stealing by other 2446 tasks. Only when there is no more work, a task will totally 2447 drain its local queue. 2448 2449 (3) Global Mark Stack. This handles local queue overflow. During 2450 marking only sets of entries are moved between it and the local 2451 queues, as access to it requires a mutex and more fine-grain 2452 interaction with it which might cause contention. If it 2453 overflows, then the marking phase should restart and iterate 2454 over the bitmap to identify gray objects. Throughout the marking 2455 phase, tasks attempt to keep the global mark stack at a small 2456 length but not totally empty, so that entries are available for 2457 popping by other tasks. Only when there is no more work, tasks 2458 will totally drain the global mark stack. 2459 2460 (4) SATB Buffer Queue. This is where completed SATB buffers are 2461 made available. Buffers are regularly removed from this queue 2462 and scanned for roots, so that the queue doesn't get too 2463 long. During remark, all completed buffers are processed, as 2464 well as the filled in parts of any uncompleted buffers. 2465 2466 The do_marking_step() method tries to abort when the time target 2467 has been reached. There are a few other cases when the 2468 do_marking_step() method also aborts: 2469 2470 (1) When the marking phase has been aborted (after a Full GC). 2471 2472 (2) When a global overflow (on the global stack) has been 2473 triggered. Before the task aborts, it will actually sync up with 2474 the other tasks to ensure that all the marking data structures 2475 (local queues, stacks, fingers etc.) are re-initialized so that 2476 when do_marking_step() completes, the marking phase can 2477 immediately restart. 2478 2479 (3) When enough completed SATB buffers are available. The 2480 do_marking_step() method only tries to drain SATB buffers right 2481 at the beginning. So, if enough buffers are available, the 2482 marking step aborts and the SATB buffers are processed at 2483 the beginning of the next invocation. 2484 2485 (4) To yield. when we have to yield then we abort and yield 2486 right at the end of do_marking_step(). This saves us from a lot 2487 of hassle as, by yielding we might allow a Full GC. If this 2488 happens then objects will be compacted underneath our feet, the 2489 heap might shrink, etc. We save checking for this by just 2490 aborting and doing the yield right at the end. 2491 2492 From the above it follows that the do_marking_step() method should 2493 be called in a loop (or, otherwise, regularly) until it completes. 2494 2495 If a marking step completes without its has_aborted() flag being 2496 true, it means it has completed the current marking phase (and 2497 also all other marking tasks have done so and have all synced up). 2498 2499 A method called regular_clock_call() is invoked "regularly" (in 2500 sub ms intervals) throughout marking. It is this clock method that 2501 checks all the abort conditions which were mentioned above and 2502 decides when the task should abort. A work-based scheme is used to 2503 trigger this clock method: when the number of object words the 2504 marking phase has scanned or the number of references the marking 2505 phase has visited reach a given limit. Additional invocations to 2506 the method clock have been planted in a few other strategic places 2507 too. The initial reason for the clock method was to avoid calling 2508 vtime too regularly, as it is quite expensive. So, once it was in 2509 place, it was natural to piggy-back all the other conditions on it 2510 too and not constantly check them throughout the code. 2511 2512 If do_termination is true then do_marking_step will enter its 2513 termination protocol. 2514 2515 The value of is_serial must be true when do_marking_step is being 2516 called serially (i.e. by the VMThread) and do_marking_step should 2517 skip any synchronization in the termination and overflow code. 2518 Examples include the serial remark code and the serial reference 2519 processing closures. 2520 2521 The value of is_serial must be false when do_marking_step is 2522 being called by any of the worker threads in a work gang. 2523 Examples include the concurrent marking code (CMMarkingTask), 2524 the MT remark code, and the MT reference processing closures. 2525 2526 *****************************************************************************/ 2527 2528 void G1CMTask::do_marking_step(double time_target_ms, 2529 bool do_termination, 2530 bool is_serial) { 2531 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2532 2533 _start_time_ms = os::elapsedVTime() * 1000.0; 2534 2535 // If do_stealing is true then do_marking_step will attempt to 2536 // steal work from the other G1CMTasks. It only makes sense to 2537 // enable stealing when the termination protocol is enabled 2538 // and do_marking_step() is not being called serially. 2539 bool do_stealing = do_termination && !is_serial; 2540 2541 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2542 _time_target_ms = time_target_ms - diff_prediction_ms; 2543 2544 // set up the variables that are used in the work-based scheme to 2545 // call the regular clock method 2546 _words_scanned = 0; 2547 _refs_reached = 0; 2548 recalculate_limits(); 2549 2550 // clear all flags 2551 clear_has_aborted(); 2552 _has_timed_out = false; 2553 _draining_satb_buffers = false; 2554 2555 ++_calls; 2556 2557 // Set up the bitmap and oop closures. Anything that uses them is 2558 // eventually called from this method, so it is OK to allocate these 2559 // statically. 2560 G1CMBitMapClosure bitmap_closure(this, _cm); 2561 G1CMOopClosure cm_oop_closure(_g1h, this); 2562 set_cm_oop_closure(&cm_oop_closure); 2563 2564 if (_cm->has_overflown()) { 2565 // This can happen if the mark stack overflows during a GC pause 2566 // and this task, after a yield point, restarts. We have to abort 2567 // as we need to get into the overflow protocol which happens 2568 // right at the end of this task. 2569 set_has_aborted(); 2570 } 2571 2572 // First drain any available SATB buffers. After this, we will not 2573 // look at SATB buffers before the next invocation of this method. 2574 // If enough completed SATB buffers are queued up, the regular clock 2575 // will abort this task so that it restarts. 2576 drain_satb_buffers(); 2577 // ...then partially drain the local queue and the global stack 2578 drain_local_queue(true); 2579 drain_global_stack(true); 2580 2581 do { 2582 if (!has_aborted() && _curr_region != NULL) { 2583 // This means that we're already holding on to a region. 2584 assert(_finger != NULL, "if region is not NULL, then the finger " 2585 "should not be NULL either"); 2586 2587 // We might have restarted this task after an evacuation pause 2588 // which might have evacuated the region we're holding on to 2589 // underneath our feet. Let's read its limit again to make sure 2590 // that we do not iterate over a region of the heap that 2591 // contains garbage (update_region_limit() will also move 2592 // _finger to the start of the region if it is found empty). 2593 update_region_limit(); 2594 // We will start from _finger not from the start of the region, 2595 // as we might be restarting this task after aborting half-way 2596 // through scanning this region. In this case, _finger points to 2597 // the address where we last found a marked object. If this is a 2598 // fresh region, _finger points to start(). 2599 MemRegion mr = MemRegion(_finger, _region_limit); 2600 2601 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2602 "humongous regions should go around loop once only"); 2603 2604 // Some special cases: 2605 // If the memory region is empty, we can just give up the region. 2606 // If the current region is humongous then we only need to check 2607 // the bitmap for the bit associated with the start of the object, 2608 // scan the object if it's live, and give up the region. 2609 // Otherwise, let's iterate over the bitmap of the part of the region 2610 // that is left. 2611 // If the iteration is successful, give up the region. 2612 if (mr.is_empty()) { 2613 giveup_current_region(); 2614 regular_clock_call(); 2615 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2616 if (_next_mark_bitmap->is_marked(mr.start())) { 2617 // The object is marked - apply the closure 2618 bitmap_closure.do_addr(mr.start()); 2619 } 2620 // Even if this task aborted while scanning the humongous object 2621 // we can (and should) give up the current region. 2622 giveup_current_region(); 2623 regular_clock_call(); 2624 } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) { 2625 giveup_current_region(); 2626 regular_clock_call(); 2627 } else { 2628 assert(has_aborted(), "currently the only way to do so"); 2629 // The only way to abort the bitmap iteration is to return 2630 // false from the do_bit() method. However, inside the 2631 // do_bit() method we move the _finger to point to the 2632 // object currently being looked at. So, if we bail out, we 2633 // have definitely set _finger to something non-null. 2634 assert(_finger != NULL, "invariant"); 2635 2636 // Region iteration was actually aborted. So now _finger 2637 // points to the address of the object we last scanned. If we 2638 // leave it there, when we restart this task, we will rescan 2639 // the object. It is easy to avoid this. We move the finger by 2640 // enough to point to the next possible object header. 2641 assert(_finger < _region_limit, "invariant"); 2642 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2643 // Check if bitmap iteration was aborted while scanning the last object 2644 if (new_finger >= _region_limit) { 2645 giveup_current_region(); 2646 } else { 2647 move_finger_to(new_finger); 2648 } 2649 } 2650 } 2651 // At this point we have either completed iterating over the 2652 // region we were holding on to, or we have aborted. 2653 2654 // We then partially drain the local queue and the global stack. 2655 // (Do we really need this?) 2656 drain_local_queue(true); 2657 drain_global_stack(true); 2658 2659 // Read the note on the claim_region() method on why it might 2660 // return NULL with potentially more regions available for 2661 // claiming and why we have to check out_of_regions() to determine 2662 // whether we're done or not. 2663 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2664 // We are going to try to claim a new region. We should have 2665 // given up on the previous one. 2666 // Separated the asserts so that we know which one fires. 2667 assert(_curr_region == NULL, "invariant"); 2668 assert(_finger == NULL, "invariant"); 2669 assert(_region_limit == NULL, "invariant"); 2670 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2671 if (claimed_region != NULL) { 2672 // Yes, we managed to claim one 2673 setup_for_region(claimed_region); 2674 assert(_curr_region == claimed_region, "invariant"); 2675 } 2676 // It is important to call the regular clock here. It might take 2677 // a while to claim a region if, for example, we hit a large 2678 // block of empty regions. So we need to call the regular clock 2679 // method once round the loop to make sure it's called 2680 // frequently enough. 2681 regular_clock_call(); 2682 } 2683 2684 if (!has_aborted() && _curr_region == NULL) { 2685 assert(_cm->out_of_regions(), 2686 "at this point we should be out of regions"); 2687 } 2688 } while ( _curr_region != NULL && !has_aborted()); 2689 2690 if (!has_aborted()) { 2691 // We cannot check whether the global stack is empty, since other 2692 // tasks might be pushing objects to it concurrently. 2693 assert(_cm->out_of_regions(), 2694 "at this point we should be out of regions"); 2695 // Try to reduce the number of available SATB buffers so that 2696 // remark has less work to do. 2697 drain_satb_buffers(); 2698 } 2699 2700 // Since we've done everything else, we can now totally drain the 2701 // local queue and global stack. 2702 drain_local_queue(false); 2703 drain_global_stack(false); 2704 2705 // Attempt at work stealing from other task's queues. 2706 if (do_stealing && !has_aborted()) { 2707 // We have not aborted. This means that we have finished all that 2708 // we could. Let's try to do some stealing... 2709 2710 // We cannot check whether the global stack is empty, since other 2711 // tasks might be pushing objects to it concurrently. 2712 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2713 "only way to reach here"); 2714 while (!has_aborted()) { 2715 G1TaskQueueEntry entry; 2716 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2717 scan_task_entry(entry); 2718 2719 // And since we're towards the end, let's totally drain the 2720 // local queue and global stack. 2721 drain_local_queue(false); 2722 drain_global_stack(false); 2723 } else { 2724 break; 2725 } 2726 } 2727 } 2728 2729 // We still haven't aborted. Now, let's try to get into the 2730 // termination protocol. 2731 if (do_termination && !has_aborted()) { 2732 // We cannot check whether the global stack is empty, since other 2733 // tasks might be concurrently pushing objects on it. 2734 // Separated the asserts so that we know which one fires. 2735 assert(_cm->out_of_regions(), "only way to reach here"); 2736 assert(_task_queue->size() == 0, "only way to reach here"); 2737 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2738 2739 // The G1CMTask class also extends the TerminatorTerminator class, 2740 // hence its should_exit_termination() method will also decide 2741 // whether to exit the termination protocol or not. 2742 bool finished = (is_serial || 2743 _cm->terminator()->offer_termination(this)); 2744 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2745 _termination_time_ms += 2746 termination_end_time_ms - _termination_start_time_ms; 2747 2748 if (finished) { 2749 // We're all done. 2750 2751 // We can now guarantee that the global stack is empty, since 2752 // all other tasks have finished. We separated the guarantees so 2753 // that, if a condition is false, we can immediately find out 2754 // which one. 2755 guarantee(_cm->out_of_regions(), "only way to reach here"); 2756 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2757 guarantee(_task_queue->size() == 0, "only way to reach here"); 2758 guarantee(!_cm->has_overflown(), "only way to reach here"); 2759 } else { 2760 // Apparently there's more work to do. Let's abort this task. It 2761 // will restart it and we can hopefully find more things to do. 2762 set_has_aborted(); 2763 } 2764 } 2765 2766 // Mainly for debugging purposes to make sure that a pointer to the 2767 // closure which was statically allocated in this frame doesn't 2768 // escape it by accident. 2769 set_cm_oop_closure(NULL); 2770 double end_time_ms = os::elapsedVTime() * 1000.0; 2771 double elapsed_time_ms = end_time_ms - _start_time_ms; 2772 // Update the step history. 2773 _step_times_ms.add(elapsed_time_ms); 2774 2775 if (has_aborted()) { 2776 // The task was aborted for some reason. 2777 if (_has_timed_out) { 2778 double diff_ms = elapsed_time_ms - _time_target_ms; 2779 // Keep statistics of how well we did with respect to hitting 2780 // our target only if we actually timed out (if we aborted for 2781 // other reasons, then the results might get skewed). 2782 _marking_step_diffs_ms.add(diff_ms); 2783 } 2784 2785 if (_cm->has_overflown()) { 2786 // This is the interesting one. We aborted because a global 2787 // overflow was raised. This means we have to restart the 2788 // marking phase and start iterating over regions. However, in 2789 // order to do this we have to make sure that all tasks stop 2790 // what they are doing and re-initialize in a safe manner. We 2791 // will achieve this with the use of two barrier sync points. 2792 2793 if (!is_serial) { 2794 // We only need to enter the sync barrier if being called 2795 // from a parallel context 2796 _cm->enter_first_sync_barrier(_worker_id); 2797 2798 // When we exit this sync barrier we know that all tasks have 2799 // stopped doing marking work. So, it's now safe to 2800 // re-initialize our data structures. 2801 } 2802 2803 clear_region_fields(); 2804 flush_mark_stats_cache(); 2805 2806 if (!is_serial) { 2807 // If we're executing the concurrent phase of marking, reset the marking 2808 // state; otherwise the marking state is reset after reference processing, 2809 // during the remark pause. 2810 // If we reset here as a result of an overflow during the remark we will 2811 // see assertion failures from any subsequent set_concurrency_and_phase() 2812 // calls. 2813 if (_cm->concurrent() && _worker_id == 0) { 2814 // Worker 0 is responsible for clearing the global data structures because 2815 // of an overflow. During STW we should not clear the overflow flag (in 2816 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit 2817 // method to abort the pause and restart concurrent marking. 2818 _cm->reset_marking_for_restart(); 2819 2820 log_info(gc, marking)("Concurrent Mark reset for overflow"); 2821 } 2822 2823 // ...and enter the second barrier. 2824 _cm->enter_second_sync_barrier(_worker_id); 2825 } 2826 // At this point, if we're during the concurrent phase of 2827 // marking, everything has been re-initialized and we're 2828 // ready to restart. 2829 } 2830 } 2831 } 2832 2833 G1CMTask::G1CMTask(uint worker_id, 2834 G1ConcurrentMark* cm, 2835 G1CMTaskQueue* task_queue, 2836 G1RegionMarkStats* mark_stats, 2837 uint max_regions) : 2838 _objArray_processor(this), 2839 _worker_id(worker_id), 2840 _g1h(G1CollectedHeap::heap()), 2841 _cm(cm), 2842 _next_mark_bitmap(NULL), 2843 _task_queue(task_queue), 2844 _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize), 2845 _calls(0), 2846 _time_target_ms(0.0), 2847 _start_time_ms(0.0), 2848 _cm_oop_closure(NULL), 2849 _curr_region(NULL), 2850 _finger(NULL), 2851 _region_limit(NULL), 2852 _words_scanned(0), 2853 _words_scanned_limit(0), 2854 _real_words_scanned_limit(0), 2855 _refs_reached(0), 2856 _refs_reached_limit(0), 2857 _real_refs_reached_limit(0), 2858 _hash_seed(17), 2859 _has_aborted(false), 2860 _has_timed_out(false), 2861 _draining_satb_buffers(false), 2862 _step_times_ms(), 2863 _elapsed_time_ms(0.0), 2864 _termination_time_ms(0.0), 2865 _termination_start_time_ms(0.0), 2866 _marking_step_diffs_ms() 2867 { 2868 guarantee(task_queue != NULL, "invariant"); 2869 2870 _marking_step_diffs_ms.add(0.5); 2871 } 2872 2873 // These are formatting macros that are used below to ensure 2874 // consistent formatting. The *_H_* versions are used to format the 2875 // header for a particular value and they should be kept consistent 2876 // with the corresponding macro. Also note that most of the macros add 2877 // the necessary white space (as a prefix) which makes them a bit 2878 // easier to compose. 2879 2880 // All the output lines are prefixed with this string to be able to 2881 // identify them easily in a large log file. 2882 #define G1PPRL_LINE_PREFIX "###" 2883 2884 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2885 #ifdef _LP64 2886 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2887 #else // _LP64 2888 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2889 #endif // _LP64 2890 2891 // For per-region info 2892 #define G1PPRL_TYPE_FORMAT " %-4s" 2893 #define G1PPRL_TYPE_H_FORMAT " %4s" 2894 #define G1PPRL_STATE_FORMAT " %-5s" 2895 #define G1PPRL_STATE_H_FORMAT " %5s" 2896 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2897 #define G1PPRL_BYTE_H_FORMAT " %9s" 2898 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2899 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2900 2901 // For summary info 2902 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2903 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2904 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2905 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2906 2907 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) : 2908 _total_used_bytes(0), _total_capacity_bytes(0), 2909 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2910 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) 2911 { 2912 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2913 MemRegion g1_reserved = g1h->g1_reserved(); 2914 double now = os::elapsedTime(); 2915 2916 // Print the header of the output. 2917 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2918 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2919 G1PPRL_SUM_ADDR_FORMAT("reserved") 2920 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2921 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2922 HeapRegion::GrainBytes); 2923 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2924 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2925 G1PPRL_TYPE_H_FORMAT 2926 G1PPRL_ADDR_BASE_H_FORMAT 2927 G1PPRL_BYTE_H_FORMAT 2928 G1PPRL_BYTE_H_FORMAT 2929 G1PPRL_BYTE_H_FORMAT 2930 G1PPRL_DOUBLE_H_FORMAT 2931 G1PPRL_BYTE_H_FORMAT 2932 G1PPRL_STATE_H_FORMAT 2933 G1PPRL_BYTE_H_FORMAT, 2934 "type", "address-range", 2935 "used", "prev-live", "next-live", "gc-eff", 2936 "remset", "state", "code-roots"); 2937 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2938 G1PPRL_TYPE_H_FORMAT 2939 G1PPRL_ADDR_BASE_H_FORMAT 2940 G1PPRL_BYTE_H_FORMAT 2941 G1PPRL_BYTE_H_FORMAT 2942 G1PPRL_BYTE_H_FORMAT 2943 G1PPRL_DOUBLE_H_FORMAT 2944 G1PPRL_BYTE_H_FORMAT 2945 G1PPRL_STATE_H_FORMAT 2946 G1PPRL_BYTE_H_FORMAT, 2947 "", "", 2948 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2949 "(bytes)", "", "(bytes)"); 2950 } 2951 2952 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) { 2953 const char* type = r->get_type_str(); 2954 HeapWord* bottom = r->bottom(); 2955 HeapWord* end = r->end(); 2956 size_t capacity_bytes = r->capacity(); 2957 size_t used_bytes = r->used(); 2958 size_t prev_live_bytes = r->live_bytes(); 2959 size_t next_live_bytes = r->next_live_bytes(); 2960 double gc_eff = r->gc_efficiency(); 2961 size_t remset_bytes = r->rem_set()->mem_size(); 2962 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2963 const char* remset_type = r->rem_set()->get_short_state_str(); 2964 2965 _total_used_bytes += used_bytes; 2966 _total_capacity_bytes += capacity_bytes; 2967 _total_prev_live_bytes += prev_live_bytes; 2968 _total_next_live_bytes += next_live_bytes; 2969 _total_remset_bytes += remset_bytes; 2970 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2971 2972 // Print a line for this particular region. 2973 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2974 G1PPRL_TYPE_FORMAT 2975 G1PPRL_ADDR_BASE_FORMAT 2976 G1PPRL_BYTE_FORMAT 2977 G1PPRL_BYTE_FORMAT 2978 G1PPRL_BYTE_FORMAT 2979 G1PPRL_DOUBLE_FORMAT 2980 G1PPRL_BYTE_FORMAT 2981 G1PPRL_STATE_FORMAT 2982 G1PPRL_BYTE_FORMAT, 2983 type, p2i(bottom), p2i(end), 2984 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2985 remset_bytes, remset_type, strong_code_roots_bytes); 2986 2987 return false; 2988 } 2989 2990 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 2991 // add static memory usages to remembered set sizes 2992 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 2993 // Print the footer of the output. 2994 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2995 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2996 " SUMMARY" 2997 G1PPRL_SUM_MB_FORMAT("capacity") 2998 G1PPRL_SUM_MB_PERC_FORMAT("used") 2999 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3000 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3001 G1PPRL_SUM_MB_FORMAT("remset") 3002 G1PPRL_SUM_MB_FORMAT("code-roots"), 3003 bytes_to_mb(_total_capacity_bytes), 3004 bytes_to_mb(_total_used_bytes), 3005 percent_of(_total_used_bytes, _total_capacity_bytes), 3006 bytes_to_mb(_total_prev_live_bytes), 3007 percent_of(_total_prev_live_bytes, _total_capacity_bytes), 3008 bytes_to_mb(_total_next_live_bytes), 3009 percent_of(_total_next_live_bytes, _total_capacity_bytes), 3010 bytes_to_mb(_total_remset_bytes), 3011 bytes_to_mb(_total_strong_code_roots_bytes)); 3012 }