1 /* 2 * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/g1CollectedHeap.inline.hpp" 30 #include "gc/g1/g1CollectorState.hpp" 31 #include "gc/g1/g1ConcurrentMark.inline.hpp" 32 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1Policy.hpp" 36 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/shared/adaptiveSizePolicy.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/suspendibleThreadSet.hpp" 50 #include "gc/shared/taskqueue.inline.hpp" 51 #include "gc/shared/vmGCOperations.hpp" 52 #include "gc/shared/weakProcessor.hpp" 53 #include "include/jvm.h" 54 #include "logging/log.hpp" 55 #include "memory/allocation.hpp" 56 #include "memory/resourceArea.hpp" 57 #include "oops/access.inline.hpp" 58 #include "oops/oop.inline.hpp" 59 #include "runtime/atomic.hpp" 60 #include "runtime/handles.inline.hpp" 61 #include "runtime/java.hpp" 62 #include "runtime/prefetch.inline.hpp" 63 #include "services/memTracker.hpp" 64 #include "utilities/align.hpp" 65 #include "utilities/growableArray.hpp" 66 67 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 68 assert(addr < _cm->finger(), "invariant"); 69 assert(addr >= _task->finger(), "invariant"); 70 71 // We move that task's local finger along. 72 _task->move_finger_to(addr); 73 74 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 75 // we only partially drain the local queue and global stack 76 _task->drain_local_queue(true); 77 _task->drain_global_stack(true); 78 79 // if the has_aborted flag has been raised, we need to bail out of 80 // the iteration 81 return !_task->has_aborted(); 82 } 83 84 G1CMMarkStack::G1CMMarkStack() : 85 _max_chunk_capacity(0), 86 _base(NULL), 87 _chunk_capacity(0) { 88 set_empty(); 89 } 90 91 bool G1CMMarkStack::resize(size_t new_capacity) { 92 assert(is_empty(), "Only resize when stack is empty."); 93 assert(new_capacity <= _max_chunk_capacity, 94 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 95 96 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 97 98 if (new_base == NULL) { 99 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 100 return false; 101 } 102 // Release old mapping. 103 if (_base != NULL) { 104 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 105 } 106 107 _base = new_base; 108 _chunk_capacity = new_capacity; 109 set_empty(); 110 111 return true; 112 } 113 114 size_t G1CMMarkStack::capacity_alignment() { 115 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 116 } 117 118 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 119 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 120 121 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 122 123 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 124 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 125 126 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 127 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 128 _max_chunk_capacity, 129 initial_chunk_capacity); 130 131 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 132 initial_chunk_capacity, _max_chunk_capacity); 133 134 return resize(initial_chunk_capacity); 135 } 136 137 void G1CMMarkStack::expand() { 138 if (_chunk_capacity == _max_chunk_capacity) { 139 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 140 return; 141 } 142 size_t old_capacity = _chunk_capacity; 143 // Double capacity if possible 144 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 145 146 if (resize(new_capacity)) { 147 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 148 old_capacity, new_capacity); 149 } else { 150 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 151 old_capacity, new_capacity); 152 } 153 } 154 155 G1CMMarkStack::~G1CMMarkStack() { 156 if (_base != NULL) { 157 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 158 } 159 } 160 161 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 162 elem->next = *list; 163 *list = elem; 164 } 165 166 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 167 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 168 add_chunk_to_list(&_chunk_list, elem); 169 _chunks_in_chunk_list++; 170 } 171 172 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 173 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 174 add_chunk_to_list(&_free_list, elem); 175 } 176 177 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 178 TaskQueueEntryChunk* result = *list; 179 if (result != NULL) { 180 *list = (*list)->next; 181 } 182 return result; 183 } 184 185 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 186 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 187 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 188 if (result != NULL) { 189 _chunks_in_chunk_list--; 190 } 191 return result; 192 } 193 194 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 195 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 196 return remove_chunk_from_list(&_free_list); 197 } 198 199 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 200 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 201 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 202 // wraparound of _hwm. 203 if (_hwm >= _chunk_capacity) { 204 return NULL; 205 } 206 207 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 208 if (cur_idx >= _chunk_capacity) { 209 return NULL; 210 } 211 212 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 213 result->next = NULL; 214 return result; 215 } 216 217 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 218 // Get a new chunk. 219 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 220 221 if (new_chunk == NULL) { 222 // Did not get a chunk from the free list. Allocate from backing memory. 223 new_chunk = allocate_new_chunk(); 224 225 if (new_chunk == NULL) { 226 return false; 227 } 228 } 229 230 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 231 232 add_chunk_to_chunk_list(new_chunk); 233 234 return true; 235 } 236 237 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 238 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 239 240 if (cur == NULL) { 241 return false; 242 } 243 244 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 245 246 add_chunk_to_free_list(cur); 247 return true; 248 } 249 250 void G1CMMarkStack::set_empty() { 251 _chunks_in_chunk_list = 0; 252 _hwm = 0; 253 _chunk_list = NULL; 254 _free_list = NULL; 255 } 256 257 G1CMRootRegions::G1CMRootRegions() : 258 _survivors(NULL), _cm(NULL), _scan_in_progress(false), 259 _should_abort(false), _claimed_survivor_index(0) { } 260 261 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 262 _survivors = survivors; 263 _cm = cm; 264 } 265 266 void G1CMRootRegions::prepare_for_scan() { 267 assert(!scan_in_progress(), "pre-condition"); 268 269 // Currently, only survivors can be root regions. 270 _claimed_survivor_index = 0; 271 _scan_in_progress = _survivors->regions()->is_nonempty(); 272 _should_abort = false; 273 } 274 275 HeapRegion* G1CMRootRegions::claim_next() { 276 if (_should_abort) { 277 // If someone has set the should_abort flag, we return NULL to 278 // force the caller to bail out of their loop. 279 return NULL; 280 } 281 282 // Currently, only survivors can be root regions. 283 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 284 285 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 286 if (claimed_index < survivor_regions->length()) { 287 return survivor_regions->at(claimed_index); 288 } 289 return NULL; 290 } 291 292 uint G1CMRootRegions::num_root_regions() const { 293 return (uint)_survivors->regions()->length(); 294 } 295 296 void G1CMRootRegions::notify_scan_done() { 297 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 298 _scan_in_progress = false; 299 RootRegionScan_lock->notify_all(); 300 } 301 302 void G1CMRootRegions::cancel_scan() { 303 notify_scan_done(); 304 } 305 306 void G1CMRootRegions::scan_finished() { 307 assert(scan_in_progress(), "pre-condition"); 308 309 // Currently, only survivors can be root regions. 310 if (!_should_abort) { 311 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 312 assert((uint)_claimed_survivor_index >= _survivors->length(), 313 "we should have claimed all survivors, claimed index = %u, length = %u", 314 (uint)_claimed_survivor_index, _survivors->length()); 315 } 316 317 notify_scan_done(); 318 } 319 320 bool G1CMRootRegions::wait_until_scan_finished() { 321 if (!scan_in_progress()) { 322 return false; 323 } 324 325 { 326 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 327 while (scan_in_progress()) { 328 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 329 } 330 } 331 return true; 332 } 333 334 // Returns the maximum number of workers to be used in a concurrent 335 // phase based on the number of GC workers being used in a STW 336 // phase. 337 static uint scale_concurrent_worker_threads(uint num_gc_workers) { 338 return MAX2((num_gc_workers + 2) / 4, 1U); 339 } 340 341 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, 342 G1RegionToSpaceMapper* prev_bitmap_storage, 343 G1RegionToSpaceMapper* next_bitmap_storage) : 344 // _cm_thread set inside the constructor 345 _g1h(g1h), 346 _completed_initialization(false), 347 348 _mark_bitmap_1(), 349 _mark_bitmap_2(), 350 _prev_mark_bitmap(&_mark_bitmap_1), 351 _next_mark_bitmap(&_mark_bitmap_2), 352 353 _heap(_g1h->reserved_region()), 354 355 _root_regions(), 356 357 _global_mark_stack(), 358 359 // _finger set in set_non_marking_state 360 361 _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads), 362 _max_num_tasks(ParallelGCThreads), 363 // _num_active_tasks set in set_non_marking_state() 364 // _tasks set inside the constructor 365 366 _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)), 367 _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)), 368 369 _first_overflow_barrier_sync(), 370 _second_overflow_barrier_sync(), 371 372 _has_overflown(false), 373 _concurrent(false), 374 _has_aborted(false), 375 _restart_for_overflow(false), 376 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 377 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 378 379 // _verbose_level set below 380 381 _init_times(), 382 _remark_times(), 383 _remark_mark_times(), 384 _remark_weak_ref_times(), 385 _cleanup_times(), 386 _total_cleanup_time(0.0), 387 388 _accum_task_vtime(NULL), 389 390 _concurrent_workers(NULL), 391 _num_concurrent_workers(0), 392 _max_concurrent_workers(0), 393 394 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)), 395 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC)) 396 { 397 _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage); 398 _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage); 399 400 // Create & start ConcurrentMark thread. 401 _cm_thread = new G1ConcurrentMarkThread(this); 402 if (_cm_thread->osthread() == NULL) { 403 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 404 } 405 406 assert(CGC_lock != NULL, "CGC_lock must be initialized"); 407 408 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 409 satb_qs.set_buffer_size(G1SATBBufferSize); 410 411 _root_regions.init(_g1h->survivor(), this); 412 413 if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) { 414 // Calculate the number of concurrent worker threads by scaling 415 // the number of parallel GC threads. 416 uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads); 417 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 418 } 419 420 assert(ConcGCThreads > 0, "ConcGCThreads have been set."); 421 if (ConcGCThreads > ParallelGCThreads) { 422 log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).", 423 ConcGCThreads, ParallelGCThreads); 424 return; 425 } 426 427 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset); 428 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 429 430 _num_concurrent_workers = ConcGCThreads; 431 _max_concurrent_workers = _num_concurrent_workers; 432 433 _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true); 434 _concurrent_workers->initialize_workers(); 435 436 if (FLAG_IS_DEFAULT(MarkStackSize)) { 437 size_t mark_stack_size = 438 MIN2(MarkStackSizeMax, 439 MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE))); 440 // Verify that the calculated value for MarkStackSize is in range. 441 // It would be nice to use the private utility routine from Arguments. 442 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 443 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 444 "must be between 1 and " SIZE_FORMAT, 445 mark_stack_size, MarkStackSizeMax); 446 return; 447 } 448 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 449 } else { 450 // Verify MarkStackSize is in range. 451 if (FLAG_IS_CMDLINE(MarkStackSize)) { 452 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 453 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 454 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 455 "must be between 1 and " SIZE_FORMAT, 456 MarkStackSize, MarkStackSizeMax); 457 return; 458 } 459 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 460 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 461 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 462 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 463 MarkStackSize, MarkStackSizeMax); 464 return; 465 } 466 } 467 } 468 } 469 470 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 471 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 472 } 473 474 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC); 475 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC); 476 477 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 478 _num_active_tasks = _max_num_tasks; 479 480 for (uint i = 0; i < _max_num_tasks; ++i) { 481 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 482 task_queue->initialize(); 483 _task_queues->register_queue(i, task_queue); 484 485 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions()); 486 487 _accum_task_vtime[i] = 0.0; 488 } 489 490 reset_at_marking_complete(); 491 _completed_initialization = true; 492 } 493 494 void G1ConcurrentMark::reset() { 495 _has_aborted = false; 496 497 reset_marking_for_restart(); 498 499 // Reset all tasks, since different phases will use different number of active 500 // threads. So, it's easiest to have all of them ready. 501 for (uint i = 0; i < _max_num_tasks; ++i) { 502 _tasks[i]->reset(_next_mark_bitmap); 503 } 504 505 uint max_regions = _g1h->max_regions(); 506 for (uint i = 0; i < max_regions; i++) { 507 _top_at_rebuild_starts[i] = NULL; 508 _region_mark_stats[i].clear(); 509 } 510 } 511 512 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) { 513 for (uint j = 0; j < _max_num_tasks; ++j) { 514 _tasks[j]->clear_mark_stats_cache(region_idx); 515 } 516 _top_at_rebuild_starts[region_idx] = NULL; 517 _region_mark_stats[region_idx].clear(); 518 } 519 520 void G1ConcurrentMark::clear_statistics(HeapRegion* r) { 521 uint const region_idx = r->hrm_index(); 522 if (r->is_humongous()) { 523 assert(r->is_starts_humongous(), "Got humongous continues region here"); 524 uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size()); 525 for (uint j = region_idx; j < (region_idx + size_in_regions); j++) { 526 clear_statistics_in_region(j); 527 } 528 } else { 529 clear_statistics_in_region(region_idx); 530 } 531 } 532 533 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) { 534 if (bitmap->is_marked(addr)) { 535 bitmap->clear(addr); 536 } 537 } 538 539 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) { 540 assert_at_safepoint_on_vm_thread(); 541 542 // Need to clear all mark bits of the humongous object. 543 clear_mark_if_set(_prev_mark_bitmap, r->bottom()); 544 clear_mark_if_set(_next_mark_bitmap, r->bottom()); 545 546 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 547 return; 548 } 549 550 // Clear any statistics about the region gathered so far. 551 clear_statistics(r); 552 } 553 554 void G1ConcurrentMark::reset_marking_for_restart() { 555 _global_mark_stack.set_empty(); 556 557 // Expand the marking stack, if we have to and if we can. 558 if (has_overflown()) { 559 _global_mark_stack.expand(); 560 561 uint max_regions = _g1h->max_regions(); 562 for (uint i = 0; i < max_regions; i++) { 563 _region_mark_stats[i].clear_during_overflow(); 564 } 565 } 566 567 clear_has_overflown(); 568 _finger = _heap.start(); 569 570 for (uint i = 0; i < _max_num_tasks; ++i) { 571 G1CMTaskQueue* queue = _task_queues->queue(i); 572 queue->set_empty(); 573 } 574 } 575 576 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 577 assert(active_tasks <= _max_num_tasks, "we should not have more"); 578 579 _num_active_tasks = active_tasks; 580 // Need to update the three data structures below according to the 581 // number of active threads for this phase. 582 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 583 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 584 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 585 } 586 587 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 588 set_concurrency(active_tasks); 589 590 _concurrent = concurrent; 591 592 if (!concurrent) { 593 // At this point we should be in a STW phase, and completed marking. 594 assert_at_safepoint_on_vm_thread(); 595 assert(out_of_regions(), 596 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 597 p2i(_finger), p2i(_heap.end())); 598 } 599 } 600 601 void G1ConcurrentMark::reset_at_marking_complete() { 602 // We set the global marking state to some default values when we're 603 // not doing marking. 604 reset_marking_for_restart(); 605 _num_active_tasks = 0; 606 } 607 608 G1ConcurrentMark::~G1ConcurrentMark() { 609 FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts); 610 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats); 611 // The G1ConcurrentMark instance is never freed. 612 ShouldNotReachHere(); 613 } 614 615 class G1ClearBitMapTask : public AbstractGangTask { 616 public: 617 static size_t chunk_size() { return M; } 618 619 private: 620 // Heap region closure used for clearing the given mark bitmap. 621 class G1ClearBitmapHRClosure : public HeapRegionClosure { 622 private: 623 G1CMBitMap* _bitmap; 624 G1ConcurrentMark* _cm; 625 public: 626 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 627 } 628 629 virtual bool do_heap_region(HeapRegion* r) { 630 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 631 632 HeapWord* cur = r->bottom(); 633 HeapWord* const end = r->end(); 634 635 while (cur < end) { 636 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 637 _bitmap->clear_range(mr); 638 639 cur += chunk_size_in_words; 640 641 // Abort iteration if after yielding the marking has been aborted. 642 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 643 return true; 644 } 645 // Repeat the asserts from before the start of the closure. We will do them 646 // as asserts here to minimize their overhead on the product. However, we 647 // will have them as guarantees at the beginning / end of the bitmap 648 // clearing to get some checking in the product. 649 assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant"); 650 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 651 } 652 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 653 654 return false; 655 } 656 }; 657 658 G1ClearBitmapHRClosure _cl; 659 HeapRegionClaimer _hr_claimer; 660 bool _suspendible; // If the task is suspendible, workers must join the STS. 661 662 public: 663 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 664 AbstractGangTask("G1 Clear Bitmap"), 665 _cl(bitmap, suspendible ? cm : NULL), 666 _hr_claimer(n_workers), 667 _suspendible(suspendible) 668 { } 669 670 void work(uint worker_id) { 671 SuspendibleThreadSetJoiner sts_join(_suspendible); 672 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id); 673 } 674 675 bool is_complete() { 676 return _cl.is_complete(); 677 } 678 }; 679 680 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 681 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 682 683 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 684 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 685 686 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 687 688 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 689 690 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 691 workers->run_task(&cl, num_workers); 692 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 693 } 694 695 void G1ConcurrentMark::cleanup_for_next_mark() { 696 // Make sure that the concurrent mark thread looks to still be in 697 // the current cycle. 698 guarantee(cm_thread()->during_cycle(), "invariant"); 699 700 // We are finishing up the current cycle by clearing the next 701 // marking bitmap and getting it ready for the next cycle. During 702 // this time no other cycle can start. So, let's make sure that this 703 // is the case. 704 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 705 706 clear_bitmap(_next_mark_bitmap, _concurrent_workers, true); 707 708 // Repeat the asserts from above. 709 guarantee(cm_thread()->during_cycle(), "invariant"); 710 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 711 } 712 713 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 714 assert_at_safepoint_on_vm_thread(); 715 clear_bitmap(_prev_mark_bitmap, workers, false); 716 } 717 718 class CheckBitmapClearHRClosure : public HeapRegionClosure { 719 G1CMBitMap* _bitmap; 720 public: 721 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 722 } 723 724 virtual bool do_heap_region(HeapRegion* r) { 725 // This closure can be called concurrently to the mutator, so we must make sure 726 // that the result of the getNextMarkedWordAddress() call is compared to the 727 // value passed to it as limit to detect any found bits. 728 // end never changes in G1. 729 HeapWord* end = r->end(); 730 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 731 } 732 }; 733 734 bool G1ConcurrentMark::next_mark_bitmap_is_clear() { 735 CheckBitmapClearHRClosure cl(_next_mark_bitmap); 736 _g1h->heap_region_iterate(&cl); 737 return cl.is_complete(); 738 } 739 740 class NoteStartOfMarkHRClosure : public HeapRegionClosure { 741 public: 742 bool do_heap_region(HeapRegion* r) { 743 r->note_start_of_marking(); 744 return false; 745 } 746 }; 747 748 void G1ConcurrentMark::pre_initial_mark() { 749 // Initialize marking structures. This has to be done in a STW phase. 750 reset(); 751 752 // For each region note start of marking. 753 NoteStartOfMarkHRClosure startcl; 754 _g1h->heap_region_iterate(&startcl); 755 } 756 757 758 void G1ConcurrentMark::post_initial_mark() { 759 // Start Concurrent Marking weak-reference discovery. 760 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 761 // enable ("weak") refs discovery 762 rp->enable_discovery(); 763 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 764 765 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 766 // This is the start of the marking cycle, we're expected all 767 // threads to have SATB queues with active set to false. 768 satb_mq_set.set_active_all_threads(true, /* new active value */ 769 false /* expected_active */); 770 771 _root_regions.prepare_for_scan(); 772 773 // update_g1_committed() will be called at the end of an evac pause 774 // when marking is on. So, it's also called at the end of the 775 // initial-mark pause to update the heap end, if the heap expands 776 // during it. No need to call it here. 777 } 778 779 /* 780 * Notice that in the next two methods, we actually leave the STS 781 * during the barrier sync and join it immediately afterwards. If we 782 * do not do this, the following deadlock can occur: one thread could 783 * be in the barrier sync code, waiting for the other thread to also 784 * sync up, whereas another one could be trying to yield, while also 785 * waiting for the other threads to sync up too. 786 * 787 * Note, however, that this code is also used during remark and in 788 * this case we should not attempt to leave / enter the STS, otherwise 789 * we'll either hit an assert (debug / fastdebug) or deadlock 790 * (product). So we should only leave / enter the STS if we are 791 * operating concurrently. 792 * 793 * Because the thread that does the sync barrier has left the STS, it 794 * is possible to be suspended for a Full GC or an evacuation pause 795 * could occur. This is actually safe, since the entering the sync 796 * barrier is one of the last things do_marking_step() does, and it 797 * doesn't manipulate any data structures afterwards. 798 */ 799 800 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 801 bool barrier_aborted; 802 { 803 SuspendibleThreadSetLeaver sts_leave(concurrent()); 804 barrier_aborted = !_first_overflow_barrier_sync.enter(); 805 } 806 807 // at this point everyone should have synced up and not be doing any 808 // more work 809 810 if (barrier_aborted) { 811 // If the barrier aborted we ignore the overflow condition and 812 // just abort the whole marking phase as quickly as possible. 813 return; 814 } 815 } 816 817 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 818 SuspendibleThreadSetLeaver sts_leave(concurrent()); 819 _second_overflow_barrier_sync.enter(); 820 821 // at this point everything should be re-initialized and ready to go 822 } 823 824 class G1CMConcurrentMarkingTask : public AbstractGangTask { 825 G1ConcurrentMark* _cm; 826 827 public: 828 void work(uint worker_id) { 829 assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread"); 830 ResourceMark rm; 831 832 double start_vtime = os::elapsedVTime(); 833 834 { 835 SuspendibleThreadSetJoiner sts_join; 836 837 assert(worker_id < _cm->active_tasks(), "invariant"); 838 839 G1CMTask* task = _cm->task(worker_id); 840 task->record_start_time(); 841 if (!_cm->has_aborted()) { 842 do { 843 task->do_marking_step(G1ConcMarkStepDurationMillis, 844 true /* do_termination */, 845 false /* is_serial*/); 846 847 _cm->do_yield_check(); 848 } while (!_cm->has_aborted() && task->has_aborted()); 849 } 850 task->record_end_time(); 851 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant"); 852 } 853 854 double end_vtime = os::elapsedVTime(); 855 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 856 } 857 858 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) : 859 AbstractGangTask("Concurrent Mark"), _cm(cm) { } 860 861 ~G1CMConcurrentMarkingTask() { } 862 }; 863 864 uint G1ConcurrentMark::calc_active_marking_workers() { 865 uint result = 0; 866 if (!UseDynamicNumberOfGCThreads || 867 (!FLAG_IS_DEFAULT(ConcGCThreads) && 868 !ForceDynamicNumberOfGCThreads)) { 869 result = _max_concurrent_workers; 870 } else { 871 result = 872 AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers, 873 1, /* Minimum workers */ 874 _num_concurrent_workers, 875 Threads::number_of_non_daemon_threads()); 876 // Don't scale the result down by scale_concurrent_workers() because 877 // that scaling has already gone into "_max_concurrent_workers". 878 } 879 assert(result > 0 && result <= _max_concurrent_workers, 880 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u", 881 _max_concurrent_workers, result); 882 return result; 883 } 884 885 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) { 886 // Currently, only survivors can be root regions. 887 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 888 G1RootRegionScanClosure cl(_g1h, this, worker_id); 889 890 const uintx interval = PrefetchScanIntervalInBytes; 891 HeapWord* curr = hr->bottom(); 892 const HeapWord* end = hr->top(); 893 while (curr < end) { 894 Prefetch::read(curr, interval); 895 oop obj = oop(curr); 896 int size = obj->oop_iterate_size(&cl); 897 assert(size == obj->size(), "sanity"); 898 curr += size; 899 } 900 } 901 902 class G1CMRootRegionScanTask : public AbstractGangTask { 903 G1ConcurrentMark* _cm; 904 public: 905 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 906 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 907 908 void work(uint worker_id) { 909 assert(Thread::current()->is_ConcurrentGC_thread(), 910 "this should only be done by a conc GC thread"); 911 912 G1CMRootRegions* root_regions = _cm->root_regions(); 913 HeapRegion* hr = root_regions->claim_next(); 914 while (hr != NULL) { 915 _cm->scan_root_region(hr, worker_id); 916 hr = root_regions->claim_next(); 917 } 918 } 919 }; 920 921 void G1ConcurrentMark::scan_root_regions() { 922 // scan_in_progress() will have been set to true only if there was 923 // at least one root region to scan. So, if it's false, we 924 // should not attempt to do any further work. 925 if (root_regions()->scan_in_progress()) { 926 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 927 928 _num_concurrent_workers = MIN2(calc_active_marking_workers(), 929 // We distribute work on a per-region basis, so starting 930 // more threads than that is useless. 931 root_regions()->num_root_regions()); 932 assert(_num_concurrent_workers <= _max_concurrent_workers, 933 "Maximum number of marking threads exceeded"); 934 935 G1CMRootRegionScanTask task(this); 936 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 937 task.name(), _num_concurrent_workers, root_regions()->num_root_regions()); 938 _concurrent_workers->run_task(&task, _num_concurrent_workers); 939 940 // It's possible that has_aborted() is true here without actually 941 // aborting the survivor scan earlier. This is OK as it's 942 // mainly used for sanity checking. 943 root_regions()->scan_finished(); 944 } 945 } 946 947 void G1ConcurrentMark::concurrent_cycle_start() { 948 _gc_timer_cm->register_gc_start(); 949 950 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 951 952 _g1h->trace_heap_before_gc(_gc_tracer_cm); 953 } 954 955 void G1ConcurrentMark::concurrent_cycle_end() { 956 _g1h->collector_state()->set_clearing_next_bitmap(false); 957 958 _g1h->trace_heap_after_gc(_gc_tracer_cm); 959 960 if (has_aborted()) { 961 log_info(gc, marking)("Concurrent Mark Abort"); 962 _gc_tracer_cm->report_concurrent_mode_failure(); 963 } 964 965 _gc_timer_cm->register_gc_end(); 966 967 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 968 } 969 970 void G1ConcurrentMark::mark_from_roots() { 971 _restart_for_overflow = false; 972 973 _num_concurrent_workers = calc_active_marking_workers(); 974 975 uint active_workers = MAX2(1U, _num_concurrent_workers); 976 977 // Setting active workers is not guaranteed since fewer 978 // worker threads may currently exist and more may not be 979 // available. 980 active_workers = _concurrent_workers->update_active_workers(active_workers); 981 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers()); 982 983 // Parallel task terminator is set in "set_concurrency_and_phase()" 984 set_concurrency_and_phase(active_workers, true /* concurrent */); 985 986 G1CMConcurrentMarkingTask marking_task(this); 987 _concurrent_workers->run_task(&marking_task); 988 print_stats(); 989 } 990 991 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) { 992 G1HeapVerifier* verifier = _g1h->verifier(); 993 994 verifier->verify_region_sets_optional(); 995 996 if (VerifyDuringGC) { 997 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm); 998 999 size_t const BufLen = 512; 1000 char buffer[BufLen]; 1001 1002 jio_snprintf(buffer, BufLen, "During GC (%s)", caller); 1003 verifier->verify(type, vo, buffer); 1004 } 1005 1006 verifier->check_bitmaps(caller); 1007 } 1008 1009 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure { 1010 G1CollectedHeap* _g1h; 1011 G1ConcurrentMark* _cm; 1012 1013 G1PrintRegionLivenessInfoClosure _cl; 1014 1015 uint _num_regions_selected_for_rebuild; // The number of regions actually selected for rebuild. 1016 1017 void update_remset_before_rebuild(HeapRegion * hr) { 1018 G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker(); 1019 1020 size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize; 1021 bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes); 1022 if (selected_for_rebuild) { 1023 _num_regions_selected_for_rebuild++; 1024 } 1025 _cm->update_top_at_rebuild_start(hr); 1026 } 1027 1028 void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) { 1029 uint const region_idx = hr->hrm_index(); 1030 uint num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(marked_words); 1031 1032 for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) { 1033 HeapRegion* const r = _g1h->region_at(i); 1034 size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words); 1035 assert(words_to_add > 0, "Out of space to distribute before end of humongous object in region %u (starts %u)", i, region_idx); 1036 1037 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to humongous region %u (%s)", 1038 words_to_add, i, r->get_type_str()); 1039 r->add_to_marked_bytes(words_to_add * HeapWordSize); 1040 marked_words -= words_to_add; 1041 } 1042 assert(marked_words == 0, 1043 SIZE_FORMAT " words left after distributing space across %u regions", 1044 marked_words, num_regions_in_humongous); 1045 } 1046 1047 void update_marked_bytes(HeapRegion* hr) { 1048 uint const region_idx = hr->hrm_index(); 1049 size_t marked_words = _cm->liveness(region_idx); 1050 // The marking attributes the object's size completely to the humongous starts 1051 // region. We need to distribute this value across the entire set of regions a 1052 // humongous object spans. 1053 if (hr->is_humongous()) { 1054 assert(hr->is_starts_humongous() || marked_words == 0, 1055 "Should not have marked words " SIZE_FORMAT " in non-starts humongous region %u (%s)", 1056 marked_words, region_idx, hr->get_type_str()); 1057 1058 if (marked_words > 0) { 1059 distribute_marked_bytes(hr, marked_words); 1060 } 1061 } else { 1062 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1063 hr->add_to_marked_bytes(marked_words * HeapWordSize); 1064 } 1065 } 1066 1067 public: 1068 G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) : 1069 _g1h(g1h), _cm(cm), _cl("Post-Marking"), _num_regions_selected_for_rebuild(0) { } 1070 1071 virtual bool do_heap_region(HeapRegion* r) { 1072 update_remset_before_rebuild(r); 1073 update_marked_bytes(r); 1074 if (log_is_enabled(Trace, gc, liveness)) { 1075 _cl.do_heap_region(r); 1076 } 1077 r->note_end_of_marking(); 1078 return false; 1079 } 1080 1081 uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; } 1082 }; 1083 1084 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure { 1085 G1CollectedHeap* _g1h; 1086 public: 1087 G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { } 1088 1089 virtual bool do_heap_region(HeapRegion* r) { 1090 _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r); 1091 return false; 1092 } 1093 }; 1094 1095 void G1ConcurrentMark::remark() { 1096 assert_at_safepoint_on_vm_thread(); 1097 1098 // If a full collection has happened, we should not continue. However we might 1099 // have ended up here as the Remark VM operation has been scheduled already. 1100 if (has_aborted()) { 1101 return; 1102 } 1103 1104 G1Policy* g1p = _g1h->g1_policy(); 1105 g1p->record_concurrent_mark_remark_start(); 1106 1107 double start = os::elapsedTime(); 1108 1109 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before"); 1110 1111 { 1112 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm); 1113 finalize_marking(); 1114 } 1115 1116 double mark_work_end = os::elapsedTime(); 1117 1118 bool const mark_finished = !has_overflown(); 1119 if (mark_finished) { 1120 weak_refs_work(false /* clear_all_soft_refs */); 1121 1122 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1123 // We're done with marking. 1124 // This is the end of the marking cycle, we're expected all 1125 // threads to have SATB queues with active set to true. 1126 satb_mq_set.set_active_all_threads(false, /* new active value */ 1127 true /* expected_active */); 1128 1129 { 1130 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm); 1131 flush_all_task_caches(); 1132 } 1133 1134 // Install newly created mark bitmap as "prev". 1135 swap_mark_bitmaps(); 1136 { 1137 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking Before Rebuild", _gc_timer_cm); 1138 G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this); 1139 _g1h->heap_region_iterate(&cl); 1140 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u", 1141 _g1h->num_regions(), cl.num_selected_for_rebuild()); 1142 } 1143 { 1144 GCTraceTime(Debug, gc, phases) debug("Reclaim Empty Regions", _gc_timer_cm); 1145 reclaim_empty_regions(); 1146 } 1147 1148 // Clean out dead classes 1149 if (ClassUnloadingWithConcurrentMark) { 1150 GCTraceTime(Debug, gc, phases) debug("Purge Metaspace", _gc_timer_cm); 1151 ClassLoaderDataGraph::purge(); 1152 } 1153 1154 compute_new_sizes(); 1155 1156 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after"); 1157 1158 assert(!restart_for_overflow(), "sanity"); 1159 // Completely reset the marking state since marking completed 1160 reset_at_marking_complete(); 1161 } else { 1162 // We overflowed. Restart concurrent marking. 1163 _restart_for_overflow = true; 1164 1165 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow"); 1166 1167 // Clear the marking state because we will be restarting 1168 // marking due to overflowing the global mark stack. 1169 reset_marking_for_restart(); 1170 } 1171 1172 { 1173 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm); 1174 report_object_count(mark_finished); 1175 } 1176 1177 // Statistics 1178 double now = os::elapsedTime(); 1179 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1180 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1181 _remark_times.add((now - start) * 1000.0); 1182 1183 g1p->record_concurrent_mark_remark_end(); 1184 } 1185 1186 class G1ReclaimEmptyRegionsTask : public AbstractGangTask { 1187 // Per-region work during the Cleanup pause. 1188 class G1ReclaimEmptyRegionsClosure : public HeapRegionClosure { 1189 G1CollectedHeap* _g1h; 1190 size_t _freed_bytes; 1191 FreeRegionList* _local_cleanup_list; 1192 uint _old_regions_removed; 1193 uint _humongous_regions_removed; 1194 HRRSCleanupTask* _hrrs_cleanup_task; 1195 1196 public: 1197 G1ReclaimEmptyRegionsClosure(G1CollectedHeap* g1h, 1198 FreeRegionList* local_cleanup_list, 1199 HRRSCleanupTask* hrrs_cleanup_task) : 1200 _g1h(g1h), 1201 _freed_bytes(0), 1202 _local_cleanup_list(local_cleanup_list), 1203 _old_regions_removed(0), 1204 _humongous_regions_removed(0), 1205 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1206 1207 size_t freed_bytes() { return _freed_bytes; } 1208 const uint old_regions_removed() { return _old_regions_removed; } 1209 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1210 1211 bool do_heap_region(HeapRegion *hr) { 1212 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1213 _freed_bytes += hr->used(); 1214 hr->set_containing_set(NULL); 1215 if (hr->is_humongous()) { 1216 _humongous_regions_removed++; 1217 _g1h->free_humongous_region(hr, _local_cleanup_list); 1218 } else { 1219 _old_regions_removed++; 1220 _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */); 1221 } 1222 hr->clear_cardtable(); 1223 _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index()); 1224 log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom())); 1225 } else { 1226 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1227 } 1228 1229 return false; 1230 } 1231 }; 1232 1233 G1CollectedHeap* _g1h; 1234 FreeRegionList* _cleanup_list; 1235 HeapRegionClaimer _hrclaimer; 1236 1237 public: 1238 G1ReclaimEmptyRegionsTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1239 AbstractGangTask("G1 Cleanup"), 1240 _g1h(g1h), 1241 _cleanup_list(cleanup_list), 1242 _hrclaimer(n_workers) { 1243 1244 HeapRegionRemSet::reset_for_cleanup_tasks(); 1245 } 1246 1247 void work(uint worker_id) { 1248 FreeRegionList local_cleanup_list("Local Cleanup List"); 1249 HRRSCleanupTask hrrs_cleanup_task; 1250 G1ReclaimEmptyRegionsClosure cl(_g1h, 1251 &local_cleanup_list, 1252 &hrrs_cleanup_task); 1253 _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id); 1254 assert(cl.is_complete(), "Shouldn't have aborted!"); 1255 1256 // Now update the old/humongous region sets 1257 _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed()); 1258 { 1259 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1260 _g1h->decrement_summary_bytes(cl.freed_bytes()); 1261 1262 _cleanup_list->add_ordered(&local_cleanup_list); 1263 assert(local_cleanup_list.is_empty(), "post-condition"); 1264 1265 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1266 } 1267 } 1268 }; 1269 1270 void G1ConcurrentMark::reclaim_empty_regions() { 1271 WorkGang* workers = _g1h->workers(); 1272 FreeRegionList empty_regions_list("Empty Regions After Mark List"); 1273 1274 G1ReclaimEmptyRegionsTask cl(_g1h, &empty_regions_list, workers->active_workers()); 1275 workers->run_task(&cl); 1276 1277 if (!empty_regions_list.is_empty()) { 1278 log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length()); 1279 // Now print the empty regions list. 1280 G1HRPrinter* hrp = _g1h->hr_printer(); 1281 if (hrp->is_active()) { 1282 FreeRegionListIterator iter(&empty_regions_list); 1283 while (iter.more_available()) { 1284 HeapRegion* hr = iter.get_next(); 1285 hrp->cleanup(hr); 1286 } 1287 } 1288 // And actually make them available. 1289 _g1h->prepend_to_freelist(&empty_regions_list); 1290 } 1291 } 1292 1293 void G1ConcurrentMark::compute_new_sizes() { 1294 MetaspaceGC::compute_new_size(); 1295 1296 // Cleanup will have freed any regions completely full of garbage. 1297 // Update the soft reference policy with the new heap occupancy. 1298 Universe::update_heap_info_at_gc(); 1299 1300 // We reclaimed old regions so we should calculate the sizes to make 1301 // sure we update the old gen/space data. 1302 _g1h->g1mm()->update_sizes(); 1303 } 1304 1305 void G1ConcurrentMark::cleanup() { 1306 assert_at_safepoint_on_vm_thread(); 1307 1308 // If a full collection has happened, we shouldn't do this. 1309 if (has_aborted()) { 1310 return; 1311 } 1312 1313 G1Policy* g1p = _g1h->g1_policy(); 1314 g1p->record_concurrent_mark_cleanup_start(); 1315 1316 double start = os::elapsedTime(); 1317 1318 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before"); 1319 1320 { 1321 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm); 1322 G1UpdateRemSetTrackingAfterRebuild cl(_g1h); 1323 _g1h->heap_region_iterate(&cl); 1324 } 1325 1326 if (log_is_enabled(Trace, gc, liveness)) { 1327 G1PrintRegionLivenessInfoClosure cl("Post-Cleanup"); 1328 _g1h->heap_region_iterate(&cl); 1329 } 1330 1331 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after"); 1332 1333 // We need to make this be a "collection" so any collection pause that 1334 // races with it goes around and waits for Cleanup to finish. 1335 _g1h->increment_total_collections(); 1336 1337 // Local statistics 1338 double recent_cleanup_time = (os::elapsedTime() - start); 1339 _total_cleanup_time += recent_cleanup_time; 1340 _cleanup_times.add(recent_cleanup_time); 1341 1342 { 1343 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm); 1344 _g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1345 } 1346 } 1347 1348 // Supporting Object and Oop closures for reference discovery 1349 // and processing in during marking 1350 1351 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1352 HeapWord* addr = (HeapWord*)obj; 1353 return addr != NULL && 1354 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj)); 1355 } 1356 1357 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1358 // Uses the G1CMTask associated with a worker thread (for serial reference 1359 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1360 // trace referent objects. 1361 // 1362 // Using the G1CMTask and embedded local queues avoids having the worker 1363 // threads operating on the global mark stack. This reduces the risk 1364 // of overflowing the stack - which we would rather avoid at this late 1365 // state. Also using the tasks' local queues removes the potential 1366 // of the workers interfering with each other that could occur if 1367 // operating on the global stack. 1368 1369 class G1CMKeepAliveAndDrainClosure : public OopClosure { 1370 G1ConcurrentMark* _cm; 1371 G1CMTask* _task; 1372 int _ref_counter_limit; 1373 int _ref_counter; 1374 bool _is_serial; 1375 public: 1376 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1377 _cm(cm), _task(task), _is_serial(is_serial), 1378 _ref_counter_limit(G1RefProcDrainInterval) { 1379 assert(_ref_counter_limit > 0, "sanity"); 1380 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1381 _ref_counter = _ref_counter_limit; 1382 } 1383 1384 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1385 virtual void do_oop( oop* p) { do_oop_work(p); } 1386 1387 template <class T> void do_oop_work(T* p) { 1388 if (!_cm->has_overflown()) { 1389 _task->deal_with_reference(p); 1390 _ref_counter--; 1391 1392 if (_ref_counter == 0) { 1393 // We have dealt with _ref_counter_limit references, pushing them 1394 // and objects reachable from them on to the local stack (and 1395 // possibly the global stack). Call G1CMTask::do_marking_step() to 1396 // process these entries. 1397 // 1398 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1399 // there's nothing more to do (i.e. we're done with the entries that 1400 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1401 // above) or we overflow. 1402 // 1403 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1404 // flag while there may still be some work to do. (See the comment at 1405 // the beginning of G1CMTask::do_marking_step() for those conditions - 1406 // one of which is reaching the specified time target.) It is only 1407 // when G1CMTask::do_marking_step() returns without setting the 1408 // has_aborted() flag that the marking step has completed. 1409 do { 1410 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1411 _task->do_marking_step(mark_step_duration_ms, 1412 false /* do_termination */, 1413 _is_serial); 1414 } while (_task->has_aborted() && !_cm->has_overflown()); 1415 _ref_counter = _ref_counter_limit; 1416 } 1417 } 1418 } 1419 }; 1420 1421 // 'Drain' oop closure used by both serial and parallel reference processing. 1422 // Uses the G1CMTask associated with a given worker thread (for serial 1423 // reference processing the G1CMtask for worker 0 is used). Calls the 1424 // do_marking_step routine, with an unbelievably large timeout value, 1425 // to drain the marking data structures of the remaining entries 1426 // added by the 'keep alive' oop closure above. 1427 1428 class G1CMDrainMarkingStackClosure : public VoidClosure { 1429 G1ConcurrentMark* _cm; 1430 G1CMTask* _task; 1431 bool _is_serial; 1432 public: 1433 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1434 _cm(cm), _task(task), _is_serial(is_serial) { 1435 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1436 } 1437 1438 void do_void() { 1439 do { 1440 // We call G1CMTask::do_marking_step() to completely drain the local 1441 // and global marking stacks of entries pushed by the 'keep alive' 1442 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1443 // 1444 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1445 // if there's nothing more to do (i.e. we've completely drained the 1446 // entries that were pushed as a a result of applying the 'keep alive' 1447 // closure to the entries on the discovered ref lists) or we overflow 1448 // the global marking stack. 1449 // 1450 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1451 // flag while there may still be some work to do. (See the comment at 1452 // the beginning of G1CMTask::do_marking_step() for those conditions - 1453 // one of which is reaching the specified time target.) It is only 1454 // when G1CMTask::do_marking_step() returns without setting the 1455 // has_aborted() flag that the marking step has completed. 1456 1457 _task->do_marking_step(1000000000.0 /* something very large */, 1458 true /* do_termination */, 1459 _is_serial); 1460 } while (_task->has_aborted() && !_cm->has_overflown()); 1461 } 1462 }; 1463 1464 // Implementation of AbstractRefProcTaskExecutor for parallel 1465 // reference processing at the end of G1 concurrent marking 1466 1467 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor { 1468 private: 1469 G1CollectedHeap* _g1h; 1470 G1ConcurrentMark* _cm; 1471 WorkGang* _workers; 1472 uint _active_workers; 1473 1474 public: 1475 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1476 G1ConcurrentMark* cm, 1477 WorkGang* workers, 1478 uint n_workers) : 1479 _g1h(g1h), _cm(cm), 1480 _workers(workers), _active_workers(n_workers) { } 1481 1482 // Executes the given task using concurrent marking worker threads. 1483 virtual void execute(ProcessTask& task); 1484 virtual void execute(EnqueueTask& task); 1485 }; 1486 1487 class G1CMRefProcTaskProxy : public AbstractGangTask { 1488 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1489 ProcessTask& _proc_task; 1490 G1CollectedHeap* _g1h; 1491 G1ConcurrentMark* _cm; 1492 1493 public: 1494 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1495 G1CollectedHeap* g1h, 1496 G1ConcurrentMark* cm) : 1497 AbstractGangTask("Process reference objects in parallel"), 1498 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1499 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1500 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1501 } 1502 1503 virtual void work(uint worker_id) { 1504 ResourceMark rm; 1505 HandleMark hm; 1506 G1CMTask* task = _cm->task(worker_id); 1507 G1CMIsAliveClosure g1_is_alive(_g1h); 1508 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1509 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1510 1511 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1512 } 1513 }; 1514 1515 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1516 assert(_workers != NULL, "Need parallel worker threads."); 1517 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1518 1519 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1520 1521 // We need to reset the concurrency level before each 1522 // proxy task execution, so that the termination protocol 1523 // and overflow handling in G1CMTask::do_marking_step() knows 1524 // how many workers to wait for. 1525 _cm->set_concurrency(_active_workers); 1526 _workers->run_task(&proc_task_proxy); 1527 } 1528 1529 class G1CMRefEnqueueTaskProxy : public AbstractGangTask { 1530 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1531 EnqueueTask& _enq_task; 1532 1533 public: 1534 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1535 AbstractGangTask("Enqueue reference objects in parallel"), 1536 _enq_task(enq_task) { } 1537 1538 virtual void work(uint worker_id) { 1539 _enq_task.work(worker_id); 1540 } 1541 }; 1542 1543 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1544 assert(_workers != NULL, "Need parallel worker threads."); 1545 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1546 1547 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1548 1549 // Not strictly necessary but... 1550 // 1551 // We need to reset the concurrency level before each 1552 // proxy task execution, so that the termination protocol 1553 // and overflow handling in G1CMTask::do_marking_step() knows 1554 // how many workers to wait for. 1555 _cm->set_concurrency(_active_workers); 1556 _workers->run_task(&enq_task_proxy); 1557 } 1558 1559 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) { 1560 ResourceMark rm; 1561 HandleMark hm; 1562 1563 // Is alive closure. 1564 G1CMIsAliveClosure g1_is_alive(_g1h); 1565 1566 // Inner scope to exclude the cleaning of the string and symbol 1567 // tables from the displayed time. 1568 { 1569 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm); 1570 1571 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1572 1573 // See the comment in G1CollectedHeap::ref_processing_init() 1574 // about how reference processing currently works in G1. 1575 1576 // Set the soft reference policy 1577 rp->setup_policy(clear_all_soft_refs); 1578 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1579 1580 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1581 // in serial reference processing. Note these closures are also 1582 // used for serially processing (by the the current thread) the 1583 // JNI references during parallel reference processing. 1584 // 1585 // These closures do not need to synchronize with the worker 1586 // threads involved in parallel reference processing as these 1587 // instances are executed serially by the current thread (e.g. 1588 // reference processing is not multi-threaded and is thus 1589 // performed by the current thread instead of a gang worker). 1590 // 1591 // The gang tasks involved in parallel reference processing create 1592 // their own instances of these closures, which do their own 1593 // synchronization among themselves. 1594 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1595 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1596 1597 // We need at least one active thread. If reference processing 1598 // is not multi-threaded we use the current (VMThread) thread, 1599 // otherwise we use the work gang from the G1CollectedHeap and 1600 // we utilize all the worker threads we can. 1601 bool processing_is_mt = rp->processing_is_mt(); 1602 uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U); 1603 active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U); 1604 1605 // Parallel processing task executor. 1606 G1CMRefProcTaskExecutor par_task_executor(_g1h, this, 1607 _g1h->workers(), active_workers); 1608 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1609 1610 // Set the concurrency level. The phase was already set prior to 1611 // executing the remark task. 1612 set_concurrency(active_workers); 1613 1614 // Set the degree of MT processing here. If the discovery was done MT, 1615 // the number of threads involved during discovery could differ from 1616 // the number of active workers. This is OK as long as the discovered 1617 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1618 rp->set_active_mt_degree(active_workers); 1619 1620 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1621 1622 // Process the weak references. 1623 const ReferenceProcessorStats& stats = 1624 rp->process_discovered_references(&g1_is_alive, 1625 &g1_keep_alive, 1626 &g1_drain_mark_stack, 1627 executor, 1628 &pt); 1629 _gc_tracer_cm->report_gc_reference_stats(stats); 1630 pt.print_all_references(); 1631 1632 // The do_oop work routines of the keep_alive and drain_marking_stack 1633 // oop closures will set the has_overflown flag if we overflow the 1634 // global marking stack. 1635 1636 assert(has_overflown() || _global_mark_stack.is_empty(), 1637 "Mark stack should be empty (unless it has overflown)"); 1638 1639 assert(rp->num_q() == active_workers, "why not"); 1640 1641 rp->enqueue_discovered_references(executor, &pt); 1642 1643 rp->verify_no_references_recorded(); 1644 1645 pt.print_enqueue_phase(); 1646 1647 assert(!rp->discovery_enabled(), "Post condition"); 1648 } 1649 1650 assert(has_overflown() || _global_mark_stack.is_empty(), 1651 "Mark stack should be empty (unless it has overflown)"); 1652 1653 { 1654 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm); 1655 WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl); 1656 } 1657 1658 if (has_overflown()) { 1659 // We can not trust g1_is_alive if the marking stack overflowed 1660 return; 1661 } 1662 1663 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1664 1665 // Unload Klasses, String, Symbols, Code Cache, etc. 1666 if (ClassUnloadingWithConcurrentMark) { 1667 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1668 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1669 _g1h->complete_cleaning(&g1_is_alive, purged_classes); 1670 } else { 1671 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1672 // No need to clean string table and symbol table as they are treated as strong roots when 1673 // class unloading is disabled. 1674 _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1675 } 1676 } 1677 1678 // When sampling object counts, we already swapped the mark bitmaps, so we need to use 1679 // the prev bitmap determining liveness. 1680 class G1ObjectCountIsAliveClosure: public BoolObjectClosure { 1681 G1CollectedHeap* _g1h; 1682 public: 1683 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) { } 1684 1685 bool do_object_b(oop obj) { 1686 HeapWord* addr = (HeapWord*)obj; 1687 return addr != NULL && 1688 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_dead(obj)); 1689 } 1690 }; 1691 1692 void G1ConcurrentMark::report_object_count(bool mark_completed) { 1693 // Depending on the completion of the marking liveness needs to be determined 1694 // using either the next or prev bitmap. 1695 if (mark_completed) { 1696 G1ObjectCountIsAliveClosure is_alive(_g1h); 1697 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1698 } else { 1699 G1CMIsAliveClosure is_alive(_g1h); 1700 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1701 } 1702 } 1703 1704 1705 void G1ConcurrentMark::swap_mark_bitmaps() { 1706 G1CMBitMap* temp = _prev_mark_bitmap; 1707 _prev_mark_bitmap = _next_mark_bitmap; 1708 _next_mark_bitmap = temp; 1709 _g1h->collector_state()->set_clearing_next_bitmap(true); 1710 } 1711 1712 // Closure for marking entries in SATB buffers. 1713 class G1CMSATBBufferClosure : public SATBBufferClosure { 1714 private: 1715 G1CMTask* _task; 1716 G1CollectedHeap* _g1h; 1717 1718 // This is very similar to G1CMTask::deal_with_reference, but with 1719 // more relaxed requirements for the argument, so this must be more 1720 // circumspect about treating the argument as an object. 1721 void do_entry(void* entry) const { 1722 _task->increment_refs_reached(); 1723 oop const obj = static_cast<oop>(entry); 1724 _task->make_reference_grey(obj); 1725 } 1726 1727 public: 1728 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1729 : _task(task), _g1h(g1h) { } 1730 1731 virtual void do_buffer(void** buffer, size_t size) { 1732 for (size_t i = 0; i < size; ++i) { 1733 do_entry(buffer[i]); 1734 } 1735 } 1736 }; 1737 1738 class G1RemarkThreadsClosure : public ThreadClosure { 1739 G1CMSATBBufferClosure _cm_satb_cl; 1740 G1CMOopClosure _cm_cl; 1741 MarkingCodeBlobClosure _code_cl; 1742 int _thread_parity; 1743 1744 public: 1745 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1746 _cm_satb_cl(task, g1h), 1747 _cm_cl(g1h, task), 1748 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1749 _thread_parity(Threads::thread_claim_parity()) {} 1750 1751 void do_thread(Thread* thread) { 1752 if (thread->is_Java_thread()) { 1753 if (thread->claim_oops_do(true, _thread_parity)) { 1754 JavaThread* jt = (JavaThread*)thread; 1755 1756 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1757 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1758 // * Alive if on the stack of an executing method 1759 // * Weakly reachable otherwise 1760 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1761 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1762 jt->nmethods_do(&_code_cl); 1763 1764 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1765 } 1766 } else if (thread->is_VM_thread()) { 1767 if (thread->claim_oops_do(true, _thread_parity)) { 1768 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1769 } 1770 } 1771 } 1772 }; 1773 1774 class G1CMRemarkTask : public AbstractGangTask { 1775 G1ConcurrentMark* _cm; 1776 public: 1777 void work(uint worker_id) { 1778 G1CMTask* task = _cm->task(worker_id); 1779 task->record_start_time(); 1780 { 1781 ResourceMark rm; 1782 HandleMark hm; 1783 1784 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1785 Threads::threads_do(&threads_f); 1786 } 1787 1788 do { 1789 task->do_marking_step(1000000000.0 /* something very large */, 1790 true /* do_termination */, 1791 false /* is_serial */); 1792 } while (task->has_aborted() && !_cm->has_overflown()); 1793 // If we overflow, then we do not want to restart. We instead 1794 // want to abort remark and do concurrent marking again. 1795 task->record_end_time(); 1796 } 1797 1798 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1799 AbstractGangTask("Par Remark"), _cm(cm) { 1800 _cm->terminator()->reset_for_reuse(active_workers); 1801 } 1802 }; 1803 1804 void G1ConcurrentMark::finalize_marking() { 1805 ResourceMark rm; 1806 HandleMark hm; 1807 1808 _g1h->ensure_parsability(false); 1809 1810 // this is remark, so we'll use up all active threads 1811 uint active_workers = _g1h->workers()->active_workers(); 1812 set_concurrency_and_phase(active_workers, false /* concurrent */); 1813 // Leave _parallel_marking_threads at it's 1814 // value originally calculated in the G1ConcurrentMark 1815 // constructor and pass values of the active workers 1816 // through the gang in the task. 1817 1818 { 1819 StrongRootsScope srs(active_workers); 1820 1821 G1CMRemarkTask remarkTask(this, active_workers); 1822 // We will start all available threads, even if we decide that the 1823 // active_workers will be fewer. The extra ones will just bail out 1824 // immediately. 1825 _g1h->workers()->run_task(&remarkTask); 1826 } 1827 1828 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1829 guarantee(has_overflown() || 1830 satb_mq_set.completed_buffers_num() == 0, 1831 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1832 BOOL_TO_STR(has_overflown()), 1833 satb_mq_set.completed_buffers_num()); 1834 1835 print_stats(); 1836 } 1837 1838 void G1ConcurrentMark::flush_all_task_caches() { 1839 size_t hits = 0; 1840 size_t misses = 0; 1841 for (uint i = 0; i < _max_num_tasks; i++) { 1842 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache(); 1843 hits += stats.first; 1844 misses += stats.second; 1845 } 1846 size_t sum = hits + misses; 1847 log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf", 1848 hits, misses, percent_of(hits, sum)); 1849 } 1850 1851 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) { 1852 _prev_mark_bitmap->clear_range(mr); 1853 } 1854 1855 HeapRegion* 1856 G1ConcurrentMark::claim_region(uint worker_id) { 1857 // "checkpoint" the finger 1858 HeapWord* finger = _finger; 1859 1860 while (finger < _heap.end()) { 1861 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1862 1863 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1864 // Make sure that the reads below do not float before loading curr_region. 1865 OrderAccess::loadload(); 1866 // Above heap_region_containing may return NULL as we always scan claim 1867 // until the end of the heap. In this case, just jump to the next region. 1868 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1869 1870 // Is the gap between reading the finger and doing the CAS too long? 1871 HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); 1872 if (res == finger && curr_region != NULL) { 1873 // we succeeded 1874 HeapWord* bottom = curr_region->bottom(); 1875 HeapWord* limit = curr_region->next_top_at_mark_start(); 1876 1877 // notice that _finger == end cannot be guaranteed here since, 1878 // someone else might have moved the finger even further 1879 assert(_finger >= end, "the finger should have moved forward"); 1880 1881 if (limit > bottom) { 1882 return curr_region; 1883 } else { 1884 assert(limit == bottom, 1885 "the region limit should be at bottom"); 1886 // we return NULL and the caller should try calling 1887 // claim_region() again. 1888 return NULL; 1889 } 1890 } else { 1891 assert(_finger > finger, "the finger should have moved forward"); 1892 // read it again 1893 finger = _finger; 1894 } 1895 } 1896 1897 return NULL; 1898 } 1899 1900 #ifndef PRODUCT 1901 class VerifyNoCSetOops { 1902 G1CollectedHeap* _g1h; 1903 const char* _phase; 1904 int _info; 1905 1906 public: 1907 VerifyNoCSetOops(const char* phase, int info = -1) : 1908 _g1h(G1CollectedHeap::heap()), 1909 _phase(phase), 1910 _info(info) 1911 { } 1912 1913 void operator()(G1TaskQueueEntry task_entry) const { 1914 if (task_entry.is_array_slice()) { 1915 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1916 return; 1917 } 1918 guarantee(oopDesc::is_oop(task_entry.obj()), 1919 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1920 p2i(task_entry.obj()), _phase, _info); 1921 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1922 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1923 p2i(task_entry.obj()), _phase, _info); 1924 } 1925 }; 1926 1927 void G1ConcurrentMark::verify_no_cset_oops() { 1928 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1929 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 1930 return; 1931 } 1932 1933 // Verify entries on the global mark stack 1934 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1935 1936 // Verify entries on the task queues 1937 for (uint i = 0; i < _max_num_tasks; ++i) { 1938 G1CMTaskQueue* queue = _task_queues->queue(i); 1939 queue->iterate(VerifyNoCSetOops("Queue", i)); 1940 } 1941 1942 // Verify the global finger 1943 HeapWord* global_finger = finger(); 1944 if (global_finger != NULL && global_finger < _heap.end()) { 1945 // Since we always iterate over all regions, we might get a NULL HeapRegion 1946 // here. 1947 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1948 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1949 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1950 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1951 } 1952 1953 // Verify the task fingers 1954 assert(_num_concurrent_workers <= _max_num_tasks, "sanity"); 1955 for (uint i = 0; i < _num_concurrent_workers; ++i) { 1956 G1CMTask* task = _tasks[i]; 1957 HeapWord* task_finger = task->finger(); 1958 if (task_finger != NULL && task_finger < _heap.end()) { 1959 // See above note on the global finger verification. 1960 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1961 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1962 !task_hr->in_collection_set(), 1963 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1964 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1965 } 1966 } 1967 } 1968 #endif // PRODUCT 1969 1970 void G1ConcurrentMark::rebuild_rem_set_concurrently() { 1971 _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset); 1972 } 1973 1974 void G1ConcurrentMark::print_stats() { 1975 if (!log_is_enabled(Debug, gc, stats)) { 1976 return; 1977 } 1978 log_debug(gc, stats)("---------------------------------------------------------------------"); 1979 for (size_t i = 0; i < _num_active_tasks; ++i) { 1980 _tasks[i]->print_stats(); 1981 log_debug(gc, stats)("---------------------------------------------------------------------"); 1982 } 1983 } 1984 1985 void G1ConcurrentMark::concurrent_cycle_abort() { 1986 if (!cm_thread()->during_cycle() || _has_aborted) { 1987 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 1988 return; 1989 } 1990 1991 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 1992 // concurrent bitmap clearing. 1993 { 1994 GCTraceTime(Debug, gc) debug("Clear Next Bitmap"); 1995 clear_bitmap(_next_mark_bitmap, _g1h->workers(), false); 1996 } 1997 // Note we cannot clear the previous marking bitmap here 1998 // since VerifyDuringGC verifies the objects marked during 1999 // a full GC against the previous bitmap. 2000 2001 // Empty mark stack 2002 reset_marking_for_restart(); 2003 for (uint i = 0; i < _max_num_tasks; ++i) { 2004 _tasks[i]->clear_region_fields(); 2005 } 2006 _first_overflow_barrier_sync.abort(); 2007 _second_overflow_barrier_sync.abort(); 2008 _has_aborted = true; 2009 2010 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2011 satb_mq_set.abandon_partial_marking(); 2012 // This can be called either during or outside marking, we'll read 2013 // the expected_active value from the SATB queue set. 2014 satb_mq_set.set_active_all_threads( 2015 false, /* new active value */ 2016 satb_mq_set.is_active() /* expected_active */); 2017 } 2018 2019 static void print_ms_time_info(const char* prefix, const char* name, 2020 NumberSeq& ns) { 2021 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2022 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2023 if (ns.num() > 0) { 2024 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2025 prefix, ns.sd(), ns.maximum()); 2026 } 2027 } 2028 2029 void G1ConcurrentMark::print_summary_info() { 2030 Log(gc, marking) log; 2031 if (!log.is_trace()) { 2032 return; 2033 } 2034 2035 log.trace(" Concurrent marking:"); 2036 print_ms_time_info(" ", "init marks", _init_times); 2037 print_ms_time_info(" ", "remarks", _remark_times); 2038 { 2039 print_ms_time_info(" ", "final marks", _remark_mark_times); 2040 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2041 2042 } 2043 print_ms_time_info(" ", "cleanups", _cleanup_times); 2044 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2045 _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2046 log.trace(" Total stop_world time = %8.2f s.", 2047 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2048 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2049 cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum()); 2050 } 2051 2052 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2053 _concurrent_workers->print_worker_threads_on(st); 2054 } 2055 2056 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2057 _concurrent_workers->threads_do(tc); 2058 } 2059 2060 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2061 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2062 p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap)); 2063 _prev_mark_bitmap->print_on_error(st, " Prev Bits: "); 2064 _next_mark_bitmap->print_on_error(st, " Next Bits: "); 2065 } 2066 2067 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2068 ReferenceProcessor* result = g1h->ref_processor_cm(); 2069 assert(result != NULL, "CM reference processor should not be NULL"); 2070 return result; 2071 } 2072 2073 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2074 G1CMTask* task) 2075 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2076 _g1h(g1h), _task(task) 2077 { } 2078 2079 void G1CMTask::setup_for_region(HeapRegion* hr) { 2080 assert(hr != NULL, 2081 "claim_region() should have filtered out NULL regions"); 2082 _curr_region = hr; 2083 _finger = hr->bottom(); 2084 update_region_limit(); 2085 } 2086 2087 void G1CMTask::update_region_limit() { 2088 HeapRegion* hr = _curr_region; 2089 HeapWord* bottom = hr->bottom(); 2090 HeapWord* limit = hr->next_top_at_mark_start(); 2091 2092 if (limit == bottom) { 2093 // The region was collected underneath our feet. 2094 // We set the finger to bottom to ensure that the bitmap 2095 // iteration that will follow this will not do anything. 2096 // (this is not a condition that holds when we set the region up, 2097 // as the region is not supposed to be empty in the first place) 2098 _finger = bottom; 2099 } else if (limit >= _region_limit) { 2100 assert(limit >= _finger, "peace of mind"); 2101 } else { 2102 assert(limit < _region_limit, "only way to get here"); 2103 // This can happen under some pretty unusual circumstances. An 2104 // evacuation pause empties the region underneath our feet (NTAMS 2105 // at bottom). We then do some allocation in the region (NTAMS 2106 // stays at bottom), followed by the region being used as a GC 2107 // alloc region (NTAMS will move to top() and the objects 2108 // originally below it will be grayed). All objects now marked in 2109 // the region are explicitly grayed, if below the global finger, 2110 // and we do not need in fact to scan anything else. So, we simply 2111 // set _finger to be limit to ensure that the bitmap iteration 2112 // doesn't do anything. 2113 _finger = limit; 2114 } 2115 2116 _region_limit = limit; 2117 } 2118 2119 void G1CMTask::giveup_current_region() { 2120 assert(_curr_region != NULL, "invariant"); 2121 clear_region_fields(); 2122 } 2123 2124 void G1CMTask::clear_region_fields() { 2125 // Values for these three fields that indicate that we're not 2126 // holding on to a region. 2127 _curr_region = NULL; 2128 _finger = NULL; 2129 _region_limit = NULL; 2130 } 2131 2132 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2133 if (cm_oop_closure == NULL) { 2134 assert(_cm_oop_closure != NULL, "invariant"); 2135 } else { 2136 assert(_cm_oop_closure == NULL, "invariant"); 2137 } 2138 _cm_oop_closure = cm_oop_closure; 2139 } 2140 2141 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { 2142 guarantee(next_mark_bitmap != NULL, "invariant"); 2143 _next_mark_bitmap = next_mark_bitmap; 2144 clear_region_fields(); 2145 2146 _calls = 0; 2147 _elapsed_time_ms = 0.0; 2148 _termination_time_ms = 0.0; 2149 _termination_start_time_ms = 0.0; 2150 2151 _mark_stats_cache.reset(); 2152 } 2153 2154 bool G1CMTask::should_exit_termination() { 2155 regular_clock_call(); 2156 // This is called when we are in the termination protocol. We should 2157 // quit if, for some reason, this task wants to abort or the global 2158 // stack is not empty (this means that we can get work from it). 2159 return !_cm->mark_stack_empty() || has_aborted(); 2160 } 2161 2162 void G1CMTask::reached_limit() { 2163 assert(_words_scanned >= _words_scanned_limit || 2164 _refs_reached >= _refs_reached_limit , 2165 "shouldn't have been called otherwise"); 2166 regular_clock_call(); 2167 } 2168 2169 void G1CMTask::regular_clock_call() { 2170 if (has_aborted()) { 2171 return; 2172 } 2173 2174 // First, we need to recalculate the words scanned and refs reached 2175 // limits for the next clock call. 2176 recalculate_limits(); 2177 2178 // During the regular clock call we do the following 2179 2180 // (1) If an overflow has been flagged, then we abort. 2181 if (_cm->has_overflown()) { 2182 set_has_aborted(); 2183 return; 2184 } 2185 2186 // If we are not concurrent (i.e. we're doing remark) we don't need 2187 // to check anything else. The other steps are only needed during 2188 // the concurrent marking phase. 2189 if (!_cm->concurrent()) { 2190 return; 2191 } 2192 2193 // (2) If marking has been aborted for Full GC, then we also abort. 2194 if (_cm->has_aborted()) { 2195 set_has_aborted(); 2196 return; 2197 } 2198 2199 double curr_time_ms = os::elapsedVTime() * 1000.0; 2200 2201 // (4) We check whether we should yield. If we have to, then we abort. 2202 if (SuspendibleThreadSet::should_yield()) { 2203 // We should yield. To do this we abort the task. The caller is 2204 // responsible for yielding. 2205 set_has_aborted(); 2206 return; 2207 } 2208 2209 // (5) We check whether we've reached our time quota. If we have, 2210 // then we abort. 2211 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2212 if (elapsed_time_ms > _time_target_ms) { 2213 set_has_aborted(); 2214 _has_timed_out = true; 2215 return; 2216 } 2217 2218 // (6) Finally, we check whether there are enough completed STAB 2219 // buffers available for processing. If there are, we abort. 2220 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2221 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2222 // we do need to process SATB buffers, we'll abort and restart 2223 // the marking task to do so 2224 set_has_aborted(); 2225 return; 2226 } 2227 } 2228 2229 void G1CMTask::recalculate_limits() { 2230 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2231 _words_scanned_limit = _real_words_scanned_limit; 2232 2233 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2234 _refs_reached_limit = _real_refs_reached_limit; 2235 } 2236 2237 void G1CMTask::decrease_limits() { 2238 // This is called when we believe that we're going to do an infrequent 2239 // operation which will increase the per byte scanned cost (i.e. move 2240 // entries to/from the global stack). It basically tries to decrease the 2241 // scanning limit so that the clock is called earlier. 2242 2243 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4; 2244 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4; 2245 } 2246 2247 void G1CMTask::move_entries_to_global_stack() { 2248 // Local array where we'll store the entries that will be popped 2249 // from the local queue. 2250 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2251 2252 size_t n = 0; 2253 G1TaskQueueEntry task_entry; 2254 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2255 buffer[n] = task_entry; 2256 ++n; 2257 } 2258 if (n < G1CMMarkStack::EntriesPerChunk) { 2259 buffer[n] = G1TaskQueueEntry(); 2260 } 2261 2262 if (n > 0) { 2263 if (!_cm->mark_stack_push(buffer)) { 2264 set_has_aborted(); 2265 } 2266 } 2267 2268 // This operation was quite expensive, so decrease the limits. 2269 decrease_limits(); 2270 } 2271 2272 bool G1CMTask::get_entries_from_global_stack() { 2273 // Local array where we'll store the entries that will be popped 2274 // from the global stack. 2275 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2276 2277 if (!_cm->mark_stack_pop(buffer)) { 2278 return false; 2279 } 2280 2281 // We did actually pop at least one entry. 2282 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2283 G1TaskQueueEntry task_entry = buffer[i]; 2284 if (task_entry.is_null()) { 2285 break; 2286 } 2287 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2288 bool success = _task_queue->push(task_entry); 2289 // We only call this when the local queue is empty or under a 2290 // given target limit. So, we do not expect this push to fail. 2291 assert(success, "invariant"); 2292 } 2293 2294 // This operation was quite expensive, so decrease the limits 2295 decrease_limits(); 2296 return true; 2297 } 2298 2299 void G1CMTask::drain_local_queue(bool partially) { 2300 if (has_aborted()) { 2301 return; 2302 } 2303 2304 // Decide what the target size is, depending whether we're going to 2305 // drain it partially (so that other tasks can steal if they run out 2306 // of things to do) or totally (at the very end). 2307 size_t target_size; 2308 if (partially) { 2309 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2310 } else { 2311 target_size = 0; 2312 } 2313 2314 if (_task_queue->size() > target_size) { 2315 G1TaskQueueEntry entry; 2316 bool ret = _task_queue->pop_local(entry); 2317 while (ret) { 2318 scan_task_entry(entry); 2319 if (_task_queue->size() <= target_size || has_aborted()) { 2320 ret = false; 2321 } else { 2322 ret = _task_queue->pop_local(entry); 2323 } 2324 } 2325 } 2326 } 2327 2328 void G1CMTask::drain_global_stack(bool partially) { 2329 if (has_aborted()) { 2330 return; 2331 } 2332 2333 // We have a policy to drain the local queue before we attempt to 2334 // drain the global stack. 2335 assert(partially || _task_queue->size() == 0, "invariant"); 2336 2337 // Decide what the target size is, depending whether we're going to 2338 // drain it partially (so that other tasks can steal if they run out 2339 // of things to do) or totally (at the very end). 2340 // Notice that when draining the global mark stack partially, due to the racyness 2341 // of the mark stack size update we might in fact drop below the target. But, 2342 // this is not a problem. 2343 // In case of total draining, we simply process until the global mark stack is 2344 // totally empty, disregarding the size counter. 2345 if (partially) { 2346 size_t const target_size = _cm->partial_mark_stack_size_target(); 2347 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2348 if (get_entries_from_global_stack()) { 2349 drain_local_queue(partially); 2350 } 2351 } 2352 } else { 2353 while (!has_aborted() && get_entries_from_global_stack()) { 2354 drain_local_queue(partially); 2355 } 2356 } 2357 } 2358 2359 // SATB Queue has several assumptions on whether to call the par or 2360 // non-par versions of the methods. this is why some of the code is 2361 // replicated. We should really get rid of the single-threaded version 2362 // of the code to simplify things. 2363 void G1CMTask::drain_satb_buffers() { 2364 if (has_aborted()) { 2365 return; 2366 } 2367 2368 // We set this so that the regular clock knows that we're in the 2369 // middle of draining buffers and doesn't set the abort flag when it 2370 // notices that SATB buffers are available for draining. It'd be 2371 // very counter productive if it did that. :-) 2372 _draining_satb_buffers = true; 2373 2374 G1CMSATBBufferClosure satb_cl(this, _g1h); 2375 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2376 2377 // This keeps claiming and applying the closure to completed buffers 2378 // until we run out of buffers or we need to abort. 2379 while (!has_aborted() && 2380 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2381 regular_clock_call(); 2382 } 2383 2384 _draining_satb_buffers = false; 2385 2386 assert(has_aborted() || 2387 _cm->concurrent() || 2388 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2389 2390 // again, this was a potentially expensive operation, decrease the 2391 // limits to get the regular clock call early 2392 decrease_limits(); 2393 } 2394 2395 void G1CMTask::clear_mark_stats_cache(uint region_idx) { 2396 _mark_stats_cache.reset(region_idx); 2397 } 2398 2399 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() { 2400 return _mark_stats_cache.evict_all(); 2401 } 2402 2403 void G1CMTask::print_stats() { 2404 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls); 2405 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2406 _elapsed_time_ms, _termination_time_ms); 2407 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms", 2408 _step_times_ms.num(), 2409 _step_times_ms.avg(), 2410 _step_times_ms.sd(), 2411 _step_times_ms.maximum(), 2412 _step_times_ms.sum()); 2413 size_t const hits = _mark_stats_cache.hits(); 2414 size_t const misses = _mark_stats_cache.misses(); 2415 log_debug(gc, stats)(" Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f", 2416 hits, misses, percent_of(hits, hits + misses)); 2417 } 2418 2419 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2420 return _task_queues->steal(worker_id, hash_seed, task_entry); 2421 } 2422 2423 /***************************************************************************** 2424 2425 The do_marking_step(time_target_ms, ...) method is the building 2426 block of the parallel marking framework. It can be called in parallel 2427 with other invocations of do_marking_step() on different tasks 2428 (but only one per task, obviously) and concurrently with the 2429 mutator threads, or during remark, hence it eliminates the need 2430 for two versions of the code. When called during remark, it will 2431 pick up from where the task left off during the concurrent marking 2432 phase. Interestingly, tasks are also claimable during evacuation 2433 pauses too, since do_marking_step() ensures that it aborts before 2434 it needs to yield. 2435 2436 The data structures that it uses to do marking work are the 2437 following: 2438 2439 (1) Marking Bitmap. If there are gray objects that appear only 2440 on the bitmap (this happens either when dealing with an overflow 2441 or when the initial marking phase has simply marked the roots 2442 and didn't push them on the stack), then tasks claim heap 2443 regions whose bitmap they then scan to find gray objects. A 2444 global finger indicates where the end of the last claimed region 2445 is. A local finger indicates how far into the region a task has 2446 scanned. The two fingers are used to determine how to gray an 2447 object (i.e. whether simply marking it is OK, as it will be 2448 visited by a task in the future, or whether it needs to be also 2449 pushed on a stack). 2450 2451 (2) Local Queue. The local queue of the task which is accessed 2452 reasonably efficiently by the task. Other tasks can steal from 2453 it when they run out of work. Throughout the marking phase, a 2454 task attempts to keep its local queue short but not totally 2455 empty, so that entries are available for stealing by other 2456 tasks. Only when there is no more work, a task will totally 2457 drain its local queue. 2458 2459 (3) Global Mark Stack. This handles local queue overflow. During 2460 marking only sets of entries are moved between it and the local 2461 queues, as access to it requires a mutex and more fine-grain 2462 interaction with it which might cause contention. If it 2463 overflows, then the marking phase should restart and iterate 2464 over the bitmap to identify gray objects. Throughout the marking 2465 phase, tasks attempt to keep the global mark stack at a small 2466 length but not totally empty, so that entries are available for 2467 popping by other tasks. Only when there is no more work, tasks 2468 will totally drain the global mark stack. 2469 2470 (4) SATB Buffer Queue. This is where completed SATB buffers are 2471 made available. Buffers are regularly removed from this queue 2472 and scanned for roots, so that the queue doesn't get too 2473 long. During remark, all completed buffers are processed, as 2474 well as the filled in parts of any uncompleted buffers. 2475 2476 The do_marking_step() method tries to abort when the time target 2477 has been reached. There are a few other cases when the 2478 do_marking_step() method also aborts: 2479 2480 (1) When the marking phase has been aborted (after a Full GC). 2481 2482 (2) When a global overflow (on the global stack) has been 2483 triggered. Before the task aborts, it will actually sync up with 2484 the other tasks to ensure that all the marking data structures 2485 (local queues, stacks, fingers etc.) are re-initialized so that 2486 when do_marking_step() completes, the marking phase can 2487 immediately restart. 2488 2489 (3) When enough completed SATB buffers are available. The 2490 do_marking_step() method only tries to drain SATB buffers right 2491 at the beginning. So, if enough buffers are available, the 2492 marking step aborts and the SATB buffers are processed at 2493 the beginning of the next invocation. 2494 2495 (4) To yield. when we have to yield then we abort and yield 2496 right at the end of do_marking_step(). This saves us from a lot 2497 of hassle as, by yielding we might allow a Full GC. If this 2498 happens then objects will be compacted underneath our feet, the 2499 heap might shrink, etc. We save checking for this by just 2500 aborting and doing the yield right at the end. 2501 2502 From the above it follows that the do_marking_step() method should 2503 be called in a loop (or, otherwise, regularly) until it completes. 2504 2505 If a marking step completes without its has_aborted() flag being 2506 true, it means it has completed the current marking phase (and 2507 also all other marking tasks have done so and have all synced up). 2508 2509 A method called regular_clock_call() is invoked "regularly" (in 2510 sub ms intervals) throughout marking. It is this clock method that 2511 checks all the abort conditions which were mentioned above and 2512 decides when the task should abort. A work-based scheme is used to 2513 trigger this clock method: when the number of object words the 2514 marking phase has scanned or the number of references the marking 2515 phase has visited reach a given limit. Additional invocations to 2516 the method clock have been planted in a few other strategic places 2517 too. The initial reason for the clock method was to avoid calling 2518 vtime too regularly, as it is quite expensive. So, once it was in 2519 place, it was natural to piggy-back all the other conditions on it 2520 too and not constantly check them throughout the code. 2521 2522 If do_termination is true then do_marking_step will enter its 2523 termination protocol. 2524 2525 The value of is_serial must be true when do_marking_step is being 2526 called serially (i.e. by the VMThread) and do_marking_step should 2527 skip any synchronization in the termination and overflow code. 2528 Examples include the serial remark code and the serial reference 2529 processing closures. 2530 2531 The value of is_serial must be false when do_marking_step is 2532 being called by any of the worker threads in a work gang. 2533 Examples include the concurrent marking code (CMMarkingTask), 2534 the MT remark code, and the MT reference processing closures. 2535 2536 *****************************************************************************/ 2537 2538 void G1CMTask::do_marking_step(double time_target_ms, 2539 bool do_termination, 2540 bool is_serial) { 2541 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2542 2543 _start_time_ms = os::elapsedVTime() * 1000.0; 2544 2545 // If do_stealing is true then do_marking_step will attempt to 2546 // steal work from the other G1CMTasks. It only makes sense to 2547 // enable stealing when the termination protocol is enabled 2548 // and do_marking_step() is not being called serially. 2549 bool do_stealing = do_termination && !is_serial; 2550 2551 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2552 _time_target_ms = time_target_ms - diff_prediction_ms; 2553 2554 // set up the variables that are used in the work-based scheme to 2555 // call the regular clock method 2556 _words_scanned = 0; 2557 _refs_reached = 0; 2558 recalculate_limits(); 2559 2560 // clear all flags 2561 clear_has_aborted(); 2562 _has_timed_out = false; 2563 _draining_satb_buffers = false; 2564 2565 ++_calls; 2566 2567 // Set up the bitmap and oop closures. Anything that uses them is 2568 // eventually called from this method, so it is OK to allocate these 2569 // statically. 2570 G1CMBitMapClosure bitmap_closure(this, _cm); 2571 G1CMOopClosure cm_oop_closure(_g1h, this); 2572 set_cm_oop_closure(&cm_oop_closure); 2573 2574 if (_cm->has_overflown()) { 2575 // This can happen if the mark stack overflows during a GC pause 2576 // and this task, after a yield point, restarts. We have to abort 2577 // as we need to get into the overflow protocol which happens 2578 // right at the end of this task. 2579 set_has_aborted(); 2580 } 2581 2582 // First drain any available SATB buffers. After this, we will not 2583 // look at SATB buffers before the next invocation of this method. 2584 // If enough completed SATB buffers are queued up, the regular clock 2585 // will abort this task so that it restarts. 2586 drain_satb_buffers(); 2587 // ...then partially drain the local queue and the global stack 2588 drain_local_queue(true); 2589 drain_global_stack(true); 2590 2591 do { 2592 if (!has_aborted() && _curr_region != NULL) { 2593 // This means that we're already holding on to a region. 2594 assert(_finger != NULL, "if region is not NULL, then the finger " 2595 "should not be NULL either"); 2596 2597 // We might have restarted this task after an evacuation pause 2598 // which might have evacuated the region we're holding on to 2599 // underneath our feet. Let's read its limit again to make sure 2600 // that we do not iterate over a region of the heap that 2601 // contains garbage (update_region_limit() will also move 2602 // _finger to the start of the region if it is found empty). 2603 update_region_limit(); 2604 // We will start from _finger not from the start of the region, 2605 // as we might be restarting this task after aborting half-way 2606 // through scanning this region. In this case, _finger points to 2607 // the address where we last found a marked object. If this is a 2608 // fresh region, _finger points to start(). 2609 MemRegion mr = MemRegion(_finger, _region_limit); 2610 2611 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2612 "humongous regions should go around loop once only"); 2613 2614 // Some special cases: 2615 // If the memory region is empty, we can just give up the region. 2616 // If the current region is humongous then we only need to check 2617 // the bitmap for the bit associated with the start of the object, 2618 // scan the object if it's live, and give up the region. 2619 // Otherwise, let's iterate over the bitmap of the part of the region 2620 // that is left. 2621 // If the iteration is successful, give up the region. 2622 if (mr.is_empty()) { 2623 giveup_current_region(); 2624 regular_clock_call(); 2625 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2626 if (_next_mark_bitmap->is_marked(mr.start())) { 2627 // The object is marked - apply the closure 2628 bitmap_closure.do_addr(mr.start()); 2629 } 2630 // Even if this task aborted while scanning the humongous object 2631 // we can (and should) give up the current region. 2632 giveup_current_region(); 2633 regular_clock_call(); 2634 } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) { 2635 giveup_current_region(); 2636 regular_clock_call(); 2637 } else { 2638 assert(has_aborted(), "currently the only way to do so"); 2639 // The only way to abort the bitmap iteration is to return 2640 // false from the do_bit() method. However, inside the 2641 // do_bit() method we move the _finger to point to the 2642 // object currently being looked at. So, if we bail out, we 2643 // have definitely set _finger to something non-null. 2644 assert(_finger != NULL, "invariant"); 2645 2646 // Region iteration was actually aborted. So now _finger 2647 // points to the address of the object we last scanned. If we 2648 // leave it there, when we restart this task, we will rescan 2649 // the object. It is easy to avoid this. We move the finger by 2650 // enough to point to the next possible object header. 2651 assert(_finger < _region_limit, "invariant"); 2652 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2653 // Check if bitmap iteration was aborted while scanning the last object 2654 if (new_finger >= _region_limit) { 2655 giveup_current_region(); 2656 } else { 2657 move_finger_to(new_finger); 2658 } 2659 } 2660 } 2661 // At this point we have either completed iterating over the 2662 // region we were holding on to, or we have aborted. 2663 2664 // We then partially drain the local queue and the global stack. 2665 // (Do we really need this?) 2666 drain_local_queue(true); 2667 drain_global_stack(true); 2668 2669 // Read the note on the claim_region() method on why it might 2670 // return NULL with potentially more regions available for 2671 // claiming and why we have to check out_of_regions() to determine 2672 // whether we're done or not. 2673 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2674 // We are going to try to claim a new region. We should have 2675 // given up on the previous one. 2676 // Separated the asserts so that we know which one fires. 2677 assert(_curr_region == NULL, "invariant"); 2678 assert(_finger == NULL, "invariant"); 2679 assert(_region_limit == NULL, "invariant"); 2680 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2681 if (claimed_region != NULL) { 2682 // Yes, we managed to claim one 2683 setup_for_region(claimed_region); 2684 assert(_curr_region == claimed_region, "invariant"); 2685 } 2686 // It is important to call the regular clock here. It might take 2687 // a while to claim a region if, for example, we hit a large 2688 // block of empty regions. So we need to call the regular clock 2689 // method once round the loop to make sure it's called 2690 // frequently enough. 2691 regular_clock_call(); 2692 } 2693 2694 if (!has_aborted() && _curr_region == NULL) { 2695 assert(_cm->out_of_regions(), 2696 "at this point we should be out of regions"); 2697 } 2698 } while ( _curr_region != NULL && !has_aborted()); 2699 2700 if (!has_aborted()) { 2701 // We cannot check whether the global stack is empty, since other 2702 // tasks might be pushing objects to it concurrently. 2703 assert(_cm->out_of_regions(), 2704 "at this point we should be out of regions"); 2705 // Try to reduce the number of available SATB buffers so that 2706 // remark has less work to do. 2707 drain_satb_buffers(); 2708 } 2709 2710 // Since we've done everything else, we can now totally drain the 2711 // local queue and global stack. 2712 drain_local_queue(false); 2713 drain_global_stack(false); 2714 2715 // Attempt at work stealing from other task's queues. 2716 if (do_stealing && !has_aborted()) { 2717 // We have not aborted. This means that we have finished all that 2718 // we could. Let's try to do some stealing... 2719 2720 // We cannot check whether the global stack is empty, since other 2721 // tasks might be pushing objects to it concurrently. 2722 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2723 "only way to reach here"); 2724 while (!has_aborted()) { 2725 G1TaskQueueEntry entry; 2726 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2727 scan_task_entry(entry); 2728 2729 // And since we're towards the end, let's totally drain the 2730 // local queue and global stack. 2731 drain_local_queue(false); 2732 drain_global_stack(false); 2733 } else { 2734 break; 2735 } 2736 } 2737 } 2738 2739 // We still haven't aborted. Now, let's try to get into the 2740 // termination protocol. 2741 if (do_termination && !has_aborted()) { 2742 // We cannot check whether the global stack is empty, since other 2743 // tasks might be concurrently pushing objects on it. 2744 // Separated the asserts so that we know which one fires. 2745 assert(_cm->out_of_regions(), "only way to reach here"); 2746 assert(_task_queue->size() == 0, "only way to reach here"); 2747 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2748 2749 // The G1CMTask class also extends the TerminatorTerminator class, 2750 // hence its should_exit_termination() method will also decide 2751 // whether to exit the termination protocol or not. 2752 bool finished = (is_serial || 2753 _cm->terminator()->offer_termination(this)); 2754 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2755 _termination_time_ms += 2756 termination_end_time_ms - _termination_start_time_ms; 2757 2758 if (finished) { 2759 // We're all done. 2760 2761 // We can now guarantee that the global stack is empty, since 2762 // all other tasks have finished. We separated the guarantees so 2763 // that, if a condition is false, we can immediately find out 2764 // which one. 2765 guarantee(_cm->out_of_regions(), "only way to reach here"); 2766 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2767 guarantee(_task_queue->size() == 0, "only way to reach here"); 2768 guarantee(!_cm->has_overflown(), "only way to reach here"); 2769 } else { 2770 // Apparently there's more work to do. Let's abort this task. It 2771 // will restart it and we can hopefully find more things to do. 2772 set_has_aborted(); 2773 } 2774 } 2775 2776 // Mainly for debugging purposes to make sure that a pointer to the 2777 // closure which was statically allocated in this frame doesn't 2778 // escape it by accident. 2779 set_cm_oop_closure(NULL); 2780 double end_time_ms = os::elapsedVTime() * 1000.0; 2781 double elapsed_time_ms = end_time_ms - _start_time_ms; 2782 // Update the step history. 2783 _step_times_ms.add(elapsed_time_ms); 2784 2785 if (has_aborted()) { 2786 // The task was aborted for some reason. 2787 if (_has_timed_out) { 2788 double diff_ms = elapsed_time_ms - _time_target_ms; 2789 // Keep statistics of how well we did with respect to hitting 2790 // our target only if we actually timed out (if we aborted for 2791 // other reasons, then the results might get skewed). 2792 _marking_step_diffs_ms.add(diff_ms); 2793 } 2794 2795 if (_cm->has_overflown()) { 2796 // This is the interesting one. We aborted because a global 2797 // overflow was raised. This means we have to restart the 2798 // marking phase and start iterating over regions. However, in 2799 // order to do this we have to make sure that all tasks stop 2800 // what they are doing and re-initialize in a safe manner. We 2801 // will achieve this with the use of two barrier sync points. 2802 2803 if (!is_serial) { 2804 // We only need to enter the sync barrier if being called 2805 // from a parallel context 2806 _cm->enter_first_sync_barrier(_worker_id); 2807 2808 // When we exit this sync barrier we know that all tasks have 2809 // stopped doing marking work. So, it's now safe to 2810 // re-initialize our data structures. 2811 } 2812 2813 clear_region_fields(); 2814 flush_mark_stats_cache(); 2815 2816 if (!is_serial) { 2817 // If we're executing the concurrent phase of marking, reset the marking 2818 // state; otherwise the marking state is reset after reference processing, 2819 // during the remark pause. 2820 // If we reset here as a result of an overflow during the remark we will 2821 // see assertion failures from any subsequent set_concurrency_and_phase() 2822 // calls. 2823 if (_cm->concurrent() && _worker_id == 0) { 2824 // Worker 0 is responsible for clearing the global data structures because 2825 // of an overflow. During STW we should not clear the overflow flag (in 2826 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit 2827 // method to abort the pause and restart concurrent marking. 2828 _cm->reset_marking_for_restart(); 2829 2830 log_info(gc, marking)("Concurrent Mark reset for overflow"); 2831 } 2832 2833 // ...and enter the second barrier. 2834 _cm->enter_second_sync_barrier(_worker_id); 2835 } 2836 // At this point, if we're during the concurrent phase of 2837 // marking, everything has been re-initialized and we're 2838 // ready to restart. 2839 } 2840 } 2841 } 2842 2843 G1CMTask::G1CMTask(uint worker_id, 2844 G1ConcurrentMark* cm, 2845 G1CMTaskQueue* task_queue, 2846 G1RegionMarkStats* mark_stats, 2847 uint max_regions) : 2848 _objArray_processor(this), 2849 _worker_id(worker_id), 2850 _g1h(G1CollectedHeap::heap()), 2851 _cm(cm), 2852 _next_mark_bitmap(NULL), 2853 _task_queue(task_queue), 2854 _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize), 2855 _calls(0), 2856 _time_target_ms(0.0), 2857 _start_time_ms(0.0), 2858 _cm_oop_closure(NULL), 2859 _curr_region(NULL), 2860 _finger(NULL), 2861 _region_limit(NULL), 2862 _words_scanned(0), 2863 _words_scanned_limit(0), 2864 _real_words_scanned_limit(0), 2865 _refs_reached(0), 2866 _refs_reached_limit(0), 2867 _real_refs_reached_limit(0), 2868 _hash_seed(17), 2869 _has_aborted(false), 2870 _has_timed_out(false), 2871 _draining_satb_buffers(false), 2872 _step_times_ms(), 2873 _elapsed_time_ms(0.0), 2874 _termination_time_ms(0.0), 2875 _termination_start_time_ms(0.0), 2876 _marking_step_diffs_ms() 2877 { 2878 guarantee(task_queue != NULL, "invariant"); 2879 2880 _marking_step_diffs_ms.add(0.5); 2881 } 2882 2883 // These are formatting macros that are used below to ensure 2884 // consistent formatting. The *_H_* versions are used to format the 2885 // header for a particular value and they should be kept consistent 2886 // with the corresponding macro. Also note that most of the macros add 2887 // the necessary white space (as a prefix) which makes them a bit 2888 // easier to compose. 2889 2890 // All the output lines are prefixed with this string to be able to 2891 // identify them easily in a large log file. 2892 #define G1PPRL_LINE_PREFIX "###" 2893 2894 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2895 #ifdef _LP64 2896 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2897 #else // _LP64 2898 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2899 #endif // _LP64 2900 2901 // For per-region info 2902 #define G1PPRL_TYPE_FORMAT " %-4s" 2903 #define G1PPRL_TYPE_H_FORMAT " %4s" 2904 #define G1PPRL_STATE_FORMAT " %-5s" 2905 #define G1PPRL_STATE_H_FORMAT " %5s" 2906 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2907 #define G1PPRL_BYTE_H_FORMAT " %9s" 2908 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2909 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2910 2911 // For summary info 2912 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2913 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2914 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2915 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2916 2917 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) : 2918 _total_used_bytes(0), _total_capacity_bytes(0), 2919 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2920 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) 2921 { 2922 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2923 MemRegion g1_reserved = g1h->g1_reserved(); 2924 double now = os::elapsedTime(); 2925 2926 // Print the header of the output. 2927 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2928 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2929 G1PPRL_SUM_ADDR_FORMAT("reserved") 2930 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2931 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2932 HeapRegion::GrainBytes); 2933 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2934 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2935 G1PPRL_TYPE_H_FORMAT 2936 G1PPRL_ADDR_BASE_H_FORMAT 2937 G1PPRL_BYTE_H_FORMAT 2938 G1PPRL_BYTE_H_FORMAT 2939 G1PPRL_BYTE_H_FORMAT 2940 G1PPRL_DOUBLE_H_FORMAT 2941 G1PPRL_BYTE_H_FORMAT 2942 G1PPRL_STATE_H_FORMAT 2943 G1PPRL_BYTE_H_FORMAT, 2944 "type", "address-range", 2945 "used", "prev-live", "next-live", "gc-eff", 2946 "remset", "state", "code-roots"); 2947 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2948 G1PPRL_TYPE_H_FORMAT 2949 G1PPRL_ADDR_BASE_H_FORMAT 2950 G1PPRL_BYTE_H_FORMAT 2951 G1PPRL_BYTE_H_FORMAT 2952 G1PPRL_BYTE_H_FORMAT 2953 G1PPRL_DOUBLE_H_FORMAT 2954 G1PPRL_BYTE_H_FORMAT 2955 G1PPRL_STATE_H_FORMAT 2956 G1PPRL_BYTE_H_FORMAT, 2957 "", "", 2958 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2959 "(bytes)", "", "(bytes)"); 2960 } 2961 2962 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) { 2963 const char* type = r->get_type_str(); 2964 HeapWord* bottom = r->bottom(); 2965 HeapWord* end = r->end(); 2966 size_t capacity_bytes = r->capacity(); 2967 size_t used_bytes = r->used(); 2968 size_t prev_live_bytes = r->live_bytes(); 2969 size_t next_live_bytes = r->next_live_bytes(); 2970 double gc_eff = r->gc_efficiency(); 2971 size_t remset_bytes = r->rem_set()->mem_size(); 2972 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2973 const char* remset_type = r->rem_set()->get_short_state_str(); 2974 2975 _total_used_bytes += used_bytes; 2976 _total_capacity_bytes += capacity_bytes; 2977 _total_prev_live_bytes += prev_live_bytes; 2978 _total_next_live_bytes += next_live_bytes; 2979 _total_remset_bytes += remset_bytes; 2980 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2981 2982 // Print a line for this particular region. 2983 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2984 G1PPRL_TYPE_FORMAT 2985 G1PPRL_ADDR_BASE_FORMAT 2986 G1PPRL_BYTE_FORMAT 2987 G1PPRL_BYTE_FORMAT 2988 G1PPRL_BYTE_FORMAT 2989 G1PPRL_DOUBLE_FORMAT 2990 G1PPRL_BYTE_FORMAT 2991 G1PPRL_STATE_FORMAT 2992 G1PPRL_BYTE_FORMAT, 2993 type, p2i(bottom), p2i(end), 2994 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2995 remset_bytes, remset_type, strong_code_roots_bytes); 2996 2997 return false; 2998 } 2999 3000 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3001 // add static memory usages to remembered set sizes 3002 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3003 // Print the footer of the output. 3004 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3005 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3006 " SUMMARY" 3007 G1PPRL_SUM_MB_FORMAT("capacity") 3008 G1PPRL_SUM_MB_PERC_FORMAT("used") 3009 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3010 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3011 G1PPRL_SUM_MB_FORMAT("remset") 3012 G1PPRL_SUM_MB_FORMAT("code-roots"), 3013 bytes_to_mb(_total_capacity_bytes), 3014 bytes_to_mb(_total_used_bytes), 3015 percent_of(_total_used_bytes, _total_capacity_bytes), 3016 bytes_to_mb(_total_prev_live_bytes), 3017 percent_of(_total_prev_live_bytes, _total_capacity_bytes), 3018 bytes_to_mb(_total_next_live_bytes), 3019 percent_of(_total_next_live_bytes, _total_capacity_bytes), 3020 bytes_to_mb(_total_remset_bytes), 3021 bytes_to_mb(_total_strong_code_roots_bytes)); 3022 }