1 /* 2 * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1OopClosures.inline.hpp" 34 #include "gc/g1/g1Policy.hpp" 35 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp" 36 #include "gc/g1/g1StringDedup.hpp" 37 #include "gc/g1/heapRegion.inline.hpp" 38 #include "gc/g1/heapRegionRemSet.hpp" 39 #include "gc/g1/heapRegionSet.inline.hpp" 40 #include "gc/shared/adaptiveSizePolicy.hpp" 41 #include "gc/shared/gcId.hpp" 42 #include "gc/shared/gcTimer.hpp" 43 #include "gc/shared/gcTrace.hpp" 44 #include "gc/shared/gcTraceTime.inline.hpp" 45 #include "gc/shared/genOopClosures.inline.hpp" 46 #include "gc/shared/referencePolicy.hpp" 47 #include "gc/shared/strongRootsScope.hpp" 48 #include "gc/shared/suspendibleThreadSet.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "gc/shared/weakProcessor.hpp" 52 #include "include/jvm.h" 53 #include "logging/log.hpp" 54 #include "memory/allocation.hpp" 55 #include "memory/resourceArea.hpp" 56 #include "oops/access.inline.hpp" 57 #include "oops/oop.inline.hpp" 58 #include "runtime/atomic.hpp" 59 #include "runtime/handles.inline.hpp" 60 #include "runtime/java.hpp" 61 #include "runtime/prefetch.inline.hpp" 62 #include "services/memTracker.hpp" 63 #include "utilities/align.hpp" 64 #include "utilities/growableArray.hpp" 65 66 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 67 assert(addr < _cm->finger(), "invariant"); 68 assert(addr >= _task->finger(), "invariant"); 69 70 // We move that task's local finger along. 71 _task->move_finger_to(addr); 72 73 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 74 // we only partially drain the local queue and global stack 75 _task->drain_local_queue(true); 76 _task->drain_global_stack(true); 77 78 // if the has_aborted flag has been raised, we need to bail out of 79 // the iteration 80 return !_task->has_aborted(); 81 } 82 83 G1CMMarkStack::G1CMMarkStack() : 84 _max_chunk_capacity(0), 85 _base(NULL), 86 _chunk_capacity(0) { 87 set_empty(); 88 } 89 90 bool G1CMMarkStack::resize(size_t new_capacity) { 91 assert(is_empty(), "Only resize when stack is empty."); 92 assert(new_capacity <= _max_chunk_capacity, 93 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 94 95 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 96 97 if (new_base == NULL) { 98 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 99 return false; 100 } 101 // Release old mapping. 102 if (_base != NULL) { 103 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 104 } 105 106 _base = new_base; 107 _chunk_capacity = new_capacity; 108 set_empty(); 109 110 return true; 111 } 112 113 size_t G1CMMarkStack::capacity_alignment() { 114 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 115 } 116 117 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 118 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 119 120 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 121 122 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 123 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 124 125 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 126 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 127 _max_chunk_capacity, 128 initial_chunk_capacity); 129 130 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 131 initial_chunk_capacity, _max_chunk_capacity); 132 133 return resize(initial_chunk_capacity); 134 } 135 136 void G1CMMarkStack::expand() { 137 if (_chunk_capacity == _max_chunk_capacity) { 138 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 139 return; 140 } 141 size_t old_capacity = _chunk_capacity; 142 // Double capacity if possible 143 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 144 145 if (resize(new_capacity)) { 146 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 147 old_capacity, new_capacity); 148 } else { 149 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 150 old_capacity, new_capacity); 151 } 152 } 153 154 G1CMMarkStack::~G1CMMarkStack() { 155 if (_base != NULL) { 156 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 157 } 158 } 159 160 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 161 elem->next = *list; 162 *list = elem; 163 } 164 165 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 166 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 167 add_chunk_to_list(&_chunk_list, elem); 168 _chunks_in_chunk_list++; 169 } 170 171 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 172 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 173 add_chunk_to_list(&_free_list, elem); 174 } 175 176 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 177 TaskQueueEntryChunk* result = *list; 178 if (result != NULL) { 179 *list = (*list)->next; 180 } 181 return result; 182 } 183 184 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 185 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 186 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 187 if (result != NULL) { 188 _chunks_in_chunk_list--; 189 } 190 return result; 191 } 192 193 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 194 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 195 return remove_chunk_from_list(&_free_list); 196 } 197 198 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 199 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 200 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 201 // wraparound of _hwm. 202 if (_hwm >= _chunk_capacity) { 203 return NULL; 204 } 205 206 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 207 if (cur_idx >= _chunk_capacity) { 208 return NULL; 209 } 210 211 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 212 result->next = NULL; 213 return result; 214 } 215 216 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 217 // Get a new chunk. 218 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 219 220 if (new_chunk == NULL) { 221 // Did not get a chunk from the free list. Allocate from backing memory. 222 new_chunk = allocate_new_chunk(); 223 224 if (new_chunk == NULL) { 225 return false; 226 } 227 } 228 229 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 230 231 add_chunk_to_chunk_list(new_chunk); 232 233 return true; 234 } 235 236 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 237 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 238 239 if (cur == NULL) { 240 return false; 241 } 242 243 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 244 245 add_chunk_to_free_list(cur); 246 return true; 247 } 248 249 void G1CMMarkStack::set_empty() { 250 _chunks_in_chunk_list = 0; 251 _hwm = 0; 252 _chunk_list = NULL; 253 _free_list = NULL; 254 } 255 256 G1CMRootRegions::G1CMRootRegions() : 257 _survivors(NULL), _cm(NULL), _scan_in_progress(false), 258 _should_abort(false), _claimed_survivor_index(0) { } 259 260 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 261 _survivors = survivors; 262 _cm = cm; 263 } 264 265 void G1CMRootRegions::prepare_for_scan() { 266 assert(!scan_in_progress(), "pre-condition"); 267 268 // Currently, only survivors can be root regions. 269 _claimed_survivor_index = 0; 270 _scan_in_progress = _survivors->regions()->is_nonempty(); 271 _should_abort = false; 272 } 273 274 HeapRegion* G1CMRootRegions::claim_next() { 275 if (_should_abort) { 276 // If someone has set the should_abort flag, we return NULL to 277 // force the caller to bail out of their loop. 278 return NULL; 279 } 280 281 // Currently, only survivors can be root regions. 282 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 283 284 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 285 if (claimed_index < survivor_regions->length()) { 286 return survivor_regions->at(claimed_index); 287 } 288 return NULL; 289 } 290 291 uint G1CMRootRegions::num_root_regions() const { 292 return (uint)_survivors->regions()->length(); 293 } 294 295 void G1CMRootRegions::notify_scan_done() { 296 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 297 _scan_in_progress = false; 298 RootRegionScan_lock->notify_all(); 299 } 300 301 void G1CMRootRegions::cancel_scan() { 302 notify_scan_done(); 303 } 304 305 void G1CMRootRegions::scan_finished() { 306 assert(scan_in_progress(), "pre-condition"); 307 308 // Currently, only survivors can be root regions. 309 if (!_should_abort) { 310 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 311 assert((uint)_claimed_survivor_index >= _survivors->length(), 312 "we should have claimed all survivors, claimed index = %u, length = %u", 313 (uint)_claimed_survivor_index, _survivors->length()); 314 } 315 316 notify_scan_done(); 317 } 318 319 bool G1CMRootRegions::wait_until_scan_finished() { 320 if (!scan_in_progress()) { 321 return false; 322 } 323 324 { 325 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 326 while (scan_in_progress()) { 327 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 328 } 329 } 330 return true; 331 } 332 333 // Returns the maximum number of workers to be used in a concurrent 334 // phase based on the number of GC workers being used in a STW 335 // phase. 336 static uint scale_concurrent_worker_threads(uint num_gc_workers) { 337 return MAX2((num_gc_workers + 2) / 4, 1U); 338 } 339 340 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, 341 G1RegionToSpaceMapper* prev_bitmap_storage, 342 G1RegionToSpaceMapper* next_bitmap_storage) : 343 // _cm_thread set inside the constructor 344 _g1h(g1h), 345 _completed_initialization(false), 346 347 _mark_bitmap_1(), 348 _mark_bitmap_2(), 349 _prev_mark_bitmap(&_mark_bitmap_1), 350 _next_mark_bitmap(&_mark_bitmap_2), 351 352 _heap(_g1h->reserved_region()), 353 354 _root_regions(), 355 356 _global_mark_stack(), 357 358 // _finger set in set_non_marking_state 359 360 _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads), 361 _max_num_tasks(ParallelGCThreads), 362 // _num_active_tasks set in set_non_marking_state() 363 // _tasks set inside the constructor 364 365 _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)), 366 _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)), 367 368 _first_overflow_barrier_sync(), 369 _second_overflow_barrier_sync(), 370 371 _has_overflown(false), 372 _concurrent(false), 373 _has_aborted(false), 374 _restart_for_overflow(false), 375 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 376 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 377 378 // _verbose_level set below 379 380 _init_times(), 381 _remark_times(), 382 _remark_mark_times(), 383 _remark_weak_ref_times(), 384 _cleanup_times(), 385 _total_cleanup_time(0.0), 386 387 _accum_task_vtime(NULL), 388 389 _concurrent_workers(NULL), 390 _num_concurrent_workers(0), 391 _max_concurrent_workers(0), 392 393 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)), 394 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC)) 395 { 396 _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage); 397 _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage); 398 399 // Create & start ConcurrentMark thread. 400 _cm_thread = new ConcurrentMarkThread(this); 401 if (_cm_thread->osthread() == NULL) { 402 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 403 } 404 405 assert(CGC_lock != NULL, "CGC_lock must be initialized"); 406 407 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 408 satb_qs.set_buffer_size(G1SATBBufferSize); 409 410 _root_regions.init(_g1h->survivor(), this); 411 412 if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) { 413 // Calculate the number of concurrent worker threads by scaling 414 // the number of parallel GC threads. 415 uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads); 416 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 417 } 418 419 assert(ConcGCThreads > 0, "ConcGCThreads have been set."); 420 if (ConcGCThreads > ParallelGCThreads) { 421 log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).", 422 ConcGCThreads, ParallelGCThreads); 423 return; 424 } 425 426 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset); 427 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 428 429 _num_concurrent_workers = ConcGCThreads; 430 _max_concurrent_workers = _num_concurrent_workers; 431 432 _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true); 433 _concurrent_workers->initialize_workers(); 434 435 if (FLAG_IS_DEFAULT(MarkStackSize)) { 436 size_t mark_stack_size = 437 MIN2(MarkStackSizeMax, 438 MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE))); 439 // Verify that the calculated value for MarkStackSize is in range. 440 // It would be nice to use the private utility routine from Arguments. 441 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 442 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 443 "must be between 1 and " SIZE_FORMAT, 444 mark_stack_size, MarkStackSizeMax); 445 return; 446 } 447 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 448 } else { 449 // Verify MarkStackSize is in range. 450 if (FLAG_IS_CMDLINE(MarkStackSize)) { 451 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 452 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 453 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 454 "must be between 1 and " SIZE_FORMAT, 455 MarkStackSize, MarkStackSizeMax); 456 return; 457 } 458 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 459 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 460 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 461 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 462 MarkStackSize, MarkStackSizeMax); 463 return; 464 } 465 } 466 } 467 } 468 469 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 470 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 471 } 472 473 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC); 474 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC); 475 476 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 477 _num_active_tasks = _max_num_tasks; 478 479 for (uint i = 0; i < _max_num_tasks; ++i) { 480 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 481 task_queue->initialize(); 482 _task_queues->register_queue(i, task_queue); 483 484 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions()); 485 486 _accum_task_vtime[i] = 0.0; 487 } 488 489 reset_at_marking_complete(); 490 _completed_initialization = true; 491 } 492 493 void G1ConcurrentMark::reset() { 494 _has_aborted = false; 495 496 reset_marking_for_restart(); 497 498 // Reset all tasks, since different phases will use different number of active 499 // threads. So, it's easiest to have all of them ready. 500 for (uint i = 0; i < _max_num_tasks; ++i) { 501 _tasks[i]->reset(_next_mark_bitmap); 502 } 503 504 uint max_regions = _g1h->max_regions(); 505 for (uint i = 0; i < max_regions; i++) { 506 _top_at_rebuild_starts[i] = NULL; 507 _region_mark_stats[i].clear(); 508 } 509 } 510 511 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) { 512 for (uint j = 0; j < _max_num_tasks; ++j) { 513 _tasks[j]->clear_mark_stats_cache(region_idx); 514 } 515 _top_at_rebuild_starts[region_idx] = NULL; 516 _region_mark_stats[region_idx].clear(); 517 } 518 519 void G1ConcurrentMark::clear_statistics(HeapRegion* r) { 520 uint const region_idx = r->hrm_index(); 521 if (r->is_humongous()) { 522 assert(r->is_starts_humongous(), "Got humongous continues region here"); 523 uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size()); 524 for (uint j = region_idx; j < (region_idx + size_in_regions); j++) { 525 clear_statistics_in_region(j); 526 } 527 } else { 528 clear_statistics_in_region(region_idx); 529 } 530 } 531 532 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) { 533 if (bitmap->is_marked(addr)) { 534 bitmap->clear(addr); 535 } 536 } 537 538 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) { 539 assert_at_safepoint_on_vm_thread(); 540 541 // Need to clear all mark bits of the humongous object. 542 clear_mark_if_set(_prev_mark_bitmap, r->bottom()); 543 clear_mark_if_set(_next_mark_bitmap, r->bottom()); 544 545 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 546 return; 547 } 548 549 // Clear any statistics about the region gathered so far. 550 clear_statistics(r); 551 } 552 553 void G1ConcurrentMark::reset_marking_for_restart() { 554 _global_mark_stack.set_empty(); 555 556 // Expand the marking stack, if we have to and if we can. 557 if (has_overflown()) { 558 _global_mark_stack.expand(); 559 560 uint max_regions = _g1h->max_regions(); 561 for (uint i = 0; i < max_regions; i++) { 562 _region_mark_stats[i].clear_during_overflow(); 563 } 564 } 565 566 clear_has_overflown(); 567 _finger = _heap.start(); 568 569 for (uint i = 0; i < _max_num_tasks; ++i) { 570 G1CMTaskQueue* queue = _task_queues->queue(i); 571 queue->set_empty(); 572 } 573 } 574 575 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 576 assert(active_tasks <= _max_num_tasks, "we should not have more"); 577 578 _num_active_tasks = active_tasks; 579 // Need to update the three data structures below according to the 580 // number of active threads for this phase. 581 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 582 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 583 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 584 } 585 586 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 587 set_concurrency(active_tasks); 588 589 _concurrent = concurrent; 590 591 if (!concurrent) { 592 // At this point we should be in a STW phase, and completed marking. 593 assert_at_safepoint_on_vm_thread(); 594 assert(out_of_regions(), 595 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 596 p2i(_finger), p2i(_heap.end())); 597 } 598 } 599 600 void G1ConcurrentMark::reset_at_marking_complete() { 601 // We set the global marking state to some default values when we're 602 // not doing marking. 603 reset_marking_for_restart(); 604 _num_active_tasks = 0; 605 } 606 607 G1ConcurrentMark::~G1ConcurrentMark() { 608 FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts); 609 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats); 610 // The G1ConcurrentMark instance is never freed. 611 ShouldNotReachHere(); 612 } 613 614 class G1ClearBitMapTask : public AbstractGangTask { 615 public: 616 static size_t chunk_size() { return M; } 617 618 private: 619 // Heap region closure used for clearing the given mark bitmap. 620 class G1ClearBitmapHRClosure : public HeapRegionClosure { 621 private: 622 G1CMBitMap* _bitmap; 623 G1ConcurrentMark* _cm; 624 public: 625 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 626 } 627 628 virtual bool do_heap_region(HeapRegion* r) { 629 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 630 631 HeapWord* cur = r->bottom(); 632 HeapWord* const end = r->end(); 633 634 while (cur < end) { 635 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 636 _bitmap->clear_range(mr); 637 638 cur += chunk_size_in_words; 639 640 // Abort iteration if after yielding the marking has been aborted. 641 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 642 return true; 643 } 644 // Repeat the asserts from before the start of the closure. We will do them 645 // as asserts here to minimize their overhead on the product. However, we 646 // will have them as guarantees at the beginning / end of the bitmap 647 // clearing to get some checking in the product. 648 assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant"); 649 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 650 } 651 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 652 653 return false; 654 } 655 }; 656 657 G1ClearBitmapHRClosure _cl; 658 HeapRegionClaimer _hr_claimer; 659 bool _suspendible; // If the task is suspendible, workers must join the STS. 660 661 public: 662 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 663 AbstractGangTask("G1 Clear Bitmap"), 664 _cl(bitmap, suspendible ? cm : NULL), 665 _hr_claimer(n_workers), 666 _suspendible(suspendible) 667 { } 668 669 void work(uint worker_id) { 670 SuspendibleThreadSetJoiner sts_join(_suspendible); 671 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id); 672 } 673 674 bool is_complete() { 675 return _cl.is_complete(); 676 } 677 }; 678 679 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 680 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 681 682 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 683 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 684 685 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 686 687 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 688 689 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 690 workers->run_task(&cl, num_workers); 691 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 692 } 693 694 void G1ConcurrentMark::cleanup_for_next_mark() { 695 // Make sure that the concurrent mark thread looks to still be in 696 // the current cycle. 697 guarantee(cm_thread()->during_cycle(), "invariant"); 698 699 // We are finishing up the current cycle by clearing the next 700 // marking bitmap and getting it ready for the next cycle. During 701 // this time no other cycle can start. So, let's make sure that this 702 // is the case. 703 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 704 705 clear_bitmap(_next_mark_bitmap, _concurrent_workers, true); 706 707 // Repeat the asserts from above. 708 guarantee(cm_thread()->during_cycle(), "invariant"); 709 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 710 } 711 712 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 713 assert_at_safepoint_on_vm_thread(); 714 clear_bitmap(_prev_mark_bitmap, workers, false); 715 } 716 717 class CheckBitmapClearHRClosure : public HeapRegionClosure { 718 G1CMBitMap* _bitmap; 719 public: 720 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 721 } 722 723 virtual bool do_heap_region(HeapRegion* r) { 724 // This closure can be called concurrently to the mutator, so we must make sure 725 // that the result of the getNextMarkedWordAddress() call is compared to the 726 // value passed to it as limit to detect any found bits. 727 // end never changes in G1. 728 HeapWord* end = r->end(); 729 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 730 } 731 }; 732 733 bool G1ConcurrentMark::next_mark_bitmap_is_clear() { 734 CheckBitmapClearHRClosure cl(_next_mark_bitmap); 735 _g1h->heap_region_iterate(&cl); 736 return cl.is_complete(); 737 } 738 739 class NoteStartOfMarkHRClosure : public HeapRegionClosure { 740 public: 741 bool do_heap_region(HeapRegion* r) { 742 r->note_start_of_marking(); 743 return false; 744 } 745 }; 746 747 void G1ConcurrentMark::pre_initial_mark() { 748 // Initialize marking structures. This has to be done in a STW phase. 749 reset(); 750 751 // For each region note start of marking. 752 NoteStartOfMarkHRClosure startcl; 753 _g1h->heap_region_iterate(&startcl); 754 } 755 756 757 void G1ConcurrentMark::post_initial_mark() { 758 // Start Concurrent Marking weak-reference discovery. 759 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 760 // enable ("weak") refs discovery 761 rp->enable_discovery(); 762 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 763 764 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 765 // This is the start of the marking cycle, we're expected all 766 // threads to have SATB queues with active set to false. 767 satb_mq_set.set_active_all_threads(true, /* new active value */ 768 false /* expected_active */); 769 770 _root_regions.prepare_for_scan(); 771 772 // update_g1_committed() will be called at the end of an evac pause 773 // when marking is on. So, it's also called at the end of the 774 // initial-mark pause to update the heap end, if the heap expands 775 // during it. No need to call it here. 776 } 777 778 /* 779 * Notice that in the next two methods, we actually leave the STS 780 * during the barrier sync and join it immediately afterwards. If we 781 * do not do this, the following deadlock can occur: one thread could 782 * be in the barrier sync code, waiting for the other thread to also 783 * sync up, whereas another one could be trying to yield, while also 784 * waiting for the other threads to sync up too. 785 * 786 * Note, however, that this code is also used during remark and in 787 * this case we should not attempt to leave / enter the STS, otherwise 788 * we'll either hit an assert (debug / fastdebug) or deadlock 789 * (product). So we should only leave / enter the STS if we are 790 * operating concurrently. 791 * 792 * Because the thread that does the sync barrier has left the STS, it 793 * is possible to be suspended for a Full GC or an evacuation pause 794 * could occur. This is actually safe, since the entering the sync 795 * barrier is one of the last things do_marking_step() does, and it 796 * doesn't manipulate any data structures afterwards. 797 */ 798 799 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 800 bool barrier_aborted; 801 { 802 SuspendibleThreadSetLeaver sts_leave(concurrent()); 803 barrier_aborted = !_first_overflow_barrier_sync.enter(); 804 } 805 806 // at this point everyone should have synced up and not be doing any 807 // more work 808 809 if (barrier_aborted) { 810 // If the barrier aborted we ignore the overflow condition and 811 // just abort the whole marking phase as quickly as possible. 812 return; 813 } 814 } 815 816 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 817 SuspendibleThreadSetLeaver sts_leave(concurrent()); 818 _second_overflow_barrier_sync.enter(); 819 820 // at this point everything should be re-initialized and ready to go 821 } 822 823 class G1CMConcurrentMarkingTask : public AbstractGangTask { 824 G1ConcurrentMark* _cm; 825 public: 826 void work(uint worker_id) { 827 assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread"); 828 ResourceMark rm; 829 830 double start_vtime = os::elapsedVTime(); 831 832 { 833 SuspendibleThreadSetJoiner sts_join; 834 835 assert(worker_id < _cm->active_tasks(), "invariant"); 836 837 G1CMTask* task = _cm->task(worker_id); 838 task->record_start_time(); 839 if (!_cm->has_aborted()) { 840 do { 841 task->do_marking_step(G1ConcMarkStepDurationMillis, 842 true /* do_termination */, 843 false /* is_serial*/); 844 845 _cm->do_yield_check(); 846 } while (!_cm->has_aborted() && task->has_aborted()); 847 } 848 task->record_end_time(); 849 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant"); 850 } 851 852 double end_vtime = os::elapsedVTime(); 853 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 854 } 855 856 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) : 857 AbstractGangTask("Concurrent Mark"), _cm(cm) { } 858 859 ~G1CMConcurrentMarkingTask() { } 860 }; 861 862 uint G1ConcurrentMark::calc_active_marking_workers() { 863 uint result = 0; 864 if (!UseDynamicNumberOfGCThreads || 865 (!FLAG_IS_DEFAULT(ConcGCThreads) && 866 !ForceDynamicNumberOfGCThreads)) { 867 result = _max_concurrent_workers; 868 } else { 869 result = 870 AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers, 871 1, /* Minimum workers */ 872 _num_concurrent_workers, 873 Threads::number_of_non_daemon_threads()); 874 // Don't scale the result down by scale_concurrent_workers() because 875 // that scaling has already gone into "_max_concurrent_workers". 876 } 877 assert(result > 0 && result <= _max_concurrent_workers, 878 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u", 879 _max_concurrent_workers, result); 880 return result; 881 } 882 883 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) { 884 // Currently, only survivors can be root regions. 885 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 886 G1RootRegionScanClosure cl(_g1h, this, worker_id); 887 888 const uintx interval = PrefetchScanIntervalInBytes; 889 HeapWord* curr = hr->bottom(); 890 const HeapWord* end = hr->top(); 891 while (curr < end) { 892 Prefetch::read(curr, interval); 893 oop obj = oop(curr); 894 int size = obj->oop_iterate_size(&cl); 895 assert(size == obj->size(), "sanity"); 896 curr += size; 897 } 898 } 899 900 class G1CMRootRegionScanTask : public AbstractGangTask { 901 G1ConcurrentMark* _cm; 902 public: 903 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 904 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 905 906 void work(uint worker_id) { 907 assert(Thread::current()->is_ConcurrentGC_thread(), 908 "this should only be done by a conc GC thread"); 909 910 G1CMRootRegions* root_regions = _cm->root_regions(); 911 HeapRegion* hr = root_regions->claim_next(); 912 while (hr != NULL) { 913 _cm->scan_root_region(hr, worker_id); 914 hr = root_regions->claim_next(); 915 } 916 } 917 }; 918 919 void G1ConcurrentMark::scan_root_regions() { 920 // scan_in_progress() will have been set to true only if there was 921 // at least one root region to scan. So, if it's false, we 922 // should not attempt to do any further work. 923 if (root_regions()->scan_in_progress()) { 924 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 925 926 _num_concurrent_workers = MIN2(calc_active_marking_workers(), 927 // We distribute work on a per-region basis, so starting 928 // more threads than that is useless. 929 root_regions()->num_root_regions()); 930 assert(_num_concurrent_workers <= _max_concurrent_workers, 931 "Maximum number of marking threads exceeded"); 932 933 G1CMRootRegionScanTask task(this); 934 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 935 task.name(), _num_concurrent_workers, root_regions()->num_root_regions()); 936 _concurrent_workers->run_task(&task, _num_concurrent_workers); 937 938 // It's possible that has_aborted() is true here without actually 939 // aborting the survivor scan earlier. This is OK as it's 940 // mainly used for sanity checking. 941 root_regions()->scan_finished(); 942 } 943 } 944 945 void G1ConcurrentMark::concurrent_cycle_start() { 946 _gc_timer_cm->register_gc_start(); 947 948 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 949 950 _g1h->trace_heap_before_gc(_gc_tracer_cm); 951 } 952 953 void G1ConcurrentMark::concurrent_cycle_end() { 954 _g1h->collector_state()->set_clearing_next_bitmap(false); 955 956 _g1h->trace_heap_after_gc(_gc_tracer_cm); 957 958 if (has_aborted()) { 959 log_info(gc, marking)("Concurrent Mark Abort"); 960 _gc_tracer_cm->report_concurrent_mode_failure(); 961 } 962 963 _gc_timer_cm->register_gc_end(); 964 965 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 966 } 967 968 void G1ConcurrentMark::mark_from_roots() { 969 _restart_for_overflow = false; 970 971 _num_concurrent_workers = calc_active_marking_workers(); 972 973 uint active_workers = MAX2(1U, _num_concurrent_workers); 974 975 // Setting active workers is not guaranteed since fewer 976 // worker threads may currently exist and more may not be 977 // available. 978 active_workers = _concurrent_workers->update_active_workers(active_workers); 979 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers()); 980 981 // Parallel task terminator is set in "set_concurrency_and_phase()" 982 set_concurrency_and_phase(active_workers, true /* concurrent */); 983 984 G1CMConcurrentMarkingTask marking_task(this); 985 _concurrent_workers->run_task(&marking_task); 986 print_stats(); 987 } 988 989 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) { 990 G1HeapVerifier* verifier = _g1h->verifier(); 991 992 verifier->verify_region_sets_optional(); 993 994 if (VerifyDuringGC) { 995 GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm); 996 997 size_t const BufLen = 512; 998 char buffer[BufLen]; 999 1000 jio_snprintf(buffer, BufLen, "During GC (%s)", caller); 1001 verifier->verify(type, vo, buffer); 1002 } 1003 1004 verifier->check_bitmaps(caller); 1005 } 1006 1007 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure { 1008 G1CollectedHeap* _g1h; 1009 G1ConcurrentMark* _cm; 1010 1011 G1PrintRegionLivenessInfoClosure _cl; 1012 1013 uint _num_regions_selected_for_rebuild; // The number of regions actually selected for rebuild. 1014 1015 void update_remset_before_rebuild(HeapRegion * hr) { 1016 G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker(); 1017 1018 size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize; 1019 bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes); 1020 if (selected_for_rebuild) { 1021 _num_regions_selected_for_rebuild++; 1022 } 1023 _cm->update_top_at_rebuild_start(hr); 1024 } 1025 1026 void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) { 1027 uint const region_idx = hr->hrm_index(); 1028 uint num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(marked_words); 1029 1030 for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) { 1031 HeapRegion* const r = _g1h->region_at(i); 1032 size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words); 1033 assert(words_to_add > 0, "Out of space to distribute before end of humongous object in region %u (starts %u)", i, region_idx); 1034 1035 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to humongous region %u (%s)", 1036 words_to_add, i, r->get_type_str()); 1037 r->add_to_marked_bytes(words_to_add * HeapWordSize); 1038 marked_words -= words_to_add; 1039 } 1040 assert(marked_words == 0, 1041 SIZE_FORMAT " words left after distributing space across %u regions", 1042 marked_words, num_regions_in_humongous); 1043 } 1044 1045 void update_marked_bytes(HeapRegion* hr) { 1046 uint const region_idx = hr->hrm_index(); 1047 size_t marked_words = _cm->liveness(region_idx); 1048 // The marking attributes the object's size completely to the humongous starts 1049 // region. We need to distribute this value across the entire set of regions a 1050 // humongous object spans. 1051 if (hr->is_humongous()) { 1052 assert(hr->is_starts_humongous() || marked_words == 0, 1053 "Should not have marked words " SIZE_FORMAT " in non-starts humongous region %u (%s)", 1054 marked_words, region_idx, hr->get_type_str()); 1055 1056 if (marked_words > 0) { 1057 distribute_marked_bytes(hr, marked_words); 1058 } 1059 } else { 1060 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1061 hr->add_to_marked_bytes(marked_words * HeapWordSize); 1062 } 1063 } 1064 1065 public: 1066 G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) : 1067 _g1h(g1h), _cm(cm), _cl("Post-Marking"), _num_regions_selected_for_rebuild(0) { } 1068 1069 virtual bool do_heap_region(HeapRegion* r) { 1070 update_remset_before_rebuild(r); 1071 update_marked_bytes(r); 1072 if (log_is_enabled(Trace, gc, liveness)) { 1073 _cl.do_heap_region(r); 1074 } 1075 r->note_end_of_marking(); 1076 return false; 1077 } 1078 1079 uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; } 1080 }; 1081 1082 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure { 1083 G1CollectedHeap* _g1h; 1084 public: 1085 G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { } 1086 1087 virtual bool do_heap_region(HeapRegion* r) { 1088 _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r); 1089 return false; 1090 } 1091 }; 1092 1093 void G1ConcurrentMark::remark() { 1094 assert_at_safepoint_on_vm_thread(); 1095 1096 // If a full collection has happened, we should not continue. However we might 1097 // have ended up here as the Remark VM operation has been scheduled already. 1098 if (has_aborted()) { 1099 return; 1100 } 1101 1102 G1Policy* g1p = _g1h->g1_policy(); 1103 g1p->record_concurrent_mark_remark_start(); 1104 1105 double start = os::elapsedTime(); 1106 1107 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before"); 1108 1109 { 1110 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1111 finalize_marking(); 1112 } 1113 1114 double mark_work_end = os::elapsedTime(); 1115 1116 bool const mark_finished = !has_overflown(); 1117 if (mark_finished) { 1118 weak_refs_work(false /* clear_all_soft_refs */); 1119 1120 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1121 // We're done with marking. 1122 // This is the end of the marking cycle, we're expected all 1123 // threads to have SATB queues with active set to true. 1124 satb_mq_set.set_active_all_threads(false, /* new active value */ 1125 true /* expected_active */); 1126 1127 { 1128 GCTraceTime(Debug, gc, phases)("Flush Task Caches"); 1129 flush_all_task_caches(); 1130 } 1131 1132 // Install newly created mark bitmap as "prev". 1133 swap_mark_bitmaps(); 1134 { 1135 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild"); 1136 G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this); 1137 _g1h->heap_region_iterate(&cl); 1138 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u", 1139 _g1h->num_regions(), cl.num_selected_for_rebuild()); 1140 } 1141 { 1142 GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions"); 1143 reclaim_empty_regions(); 1144 } 1145 1146 // Clean out dead classes 1147 if (ClassUnloadingWithConcurrentMark) { 1148 GCTraceTime(Debug, gc, phases)("Purge Metaspace"); 1149 ClassLoaderDataGraph::purge(); 1150 } 1151 1152 compute_new_sizes(); 1153 1154 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after"); 1155 1156 assert(!restart_for_overflow(), "sanity"); 1157 // Completely reset the marking state since marking completed 1158 reset_at_marking_complete(); 1159 } else { 1160 // We overflowed. Restart concurrent marking. 1161 _restart_for_overflow = true; 1162 1163 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow"); 1164 1165 // Clear the marking state because we will be restarting 1166 // marking due to overflowing the global mark stack. 1167 reset_marking_for_restart(); 1168 } 1169 1170 { 1171 GCTraceTime(Debug, gc, phases)("Report Object Count"); 1172 report_object_count(mark_finished); 1173 } 1174 1175 // Statistics 1176 double now = os::elapsedTime(); 1177 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1178 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1179 _remark_times.add((now - start) * 1000.0); 1180 1181 g1p->record_concurrent_mark_remark_end(); 1182 } 1183 1184 class G1CleanupTask : public AbstractGangTask { 1185 // Per-region work during the Cleanup pause. 1186 class G1CleanupRegionsClosure : public HeapRegionClosure { 1187 G1CollectedHeap* _g1h; 1188 size_t _freed_bytes; 1189 FreeRegionList* _local_cleanup_list; 1190 uint _old_regions_removed; 1191 uint _humongous_regions_removed; 1192 HRRSCleanupTask* _hrrs_cleanup_task; 1193 1194 public: 1195 G1CleanupRegionsClosure(G1CollectedHeap* g1, 1196 FreeRegionList* local_cleanup_list, 1197 HRRSCleanupTask* hrrs_cleanup_task) : 1198 _g1h(g1), 1199 _freed_bytes(0), 1200 _local_cleanup_list(local_cleanup_list), 1201 _old_regions_removed(0), 1202 _humongous_regions_removed(0), 1203 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1204 1205 size_t freed_bytes() { return _freed_bytes; } 1206 const uint old_regions_removed() { return _old_regions_removed; } 1207 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1208 1209 bool do_heap_region(HeapRegion *hr) { 1210 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1211 _freed_bytes += hr->used(); 1212 hr->set_containing_set(NULL); 1213 if (hr->is_humongous()) { 1214 _humongous_regions_removed++; 1215 _g1h->free_humongous_region(hr, _local_cleanup_list); 1216 } else { 1217 _old_regions_removed++; 1218 _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */); 1219 } 1220 hr->clear_cardtable(); 1221 _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index()); 1222 log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom())); 1223 } else { 1224 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1225 } 1226 1227 return false; 1228 } 1229 }; 1230 1231 G1CollectedHeap* _g1h; 1232 FreeRegionList* _cleanup_list; 1233 HeapRegionClaimer _hrclaimer; 1234 1235 public: 1236 G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1237 AbstractGangTask("G1 Cleanup"), 1238 _g1h(g1h), 1239 _cleanup_list(cleanup_list), 1240 _hrclaimer(n_workers) { 1241 1242 HeapRegionRemSet::reset_for_cleanup_tasks(); 1243 } 1244 1245 void work(uint worker_id) { 1246 FreeRegionList local_cleanup_list("Local Cleanup List"); 1247 HRRSCleanupTask hrrs_cleanup_task; 1248 G1CleanupRegionsClosure cl(_g1h, 1249 &local_cleanup_list, 1250 &hrrs_cleanup_task); 1251 _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id); 1252 assert(cl.is_complete(), "Shouldn't have aborted!"); 1253 1254 // Now update the old/humongous region sets 1255 _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed()); 1256 { 1257 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1258 _g1h->decrement_summary_bytes(cl.freed_bytes()); 1259 1260 _cleanup_list->add_ordered(&local_cleanup_list); 1261 assert(local_cleanup_list.is_empty(), "post-condition"); 1262 1263 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1264 } 1265 } 1266 }; 1267 1268 void G1ConcurrentMark::reclaim_empty_regions() { 1269 WorkGang* workers = _g1h->workers(); 1270 FreeRegionList empty_regions_list("Empty Regions After Mark List"); 1271 1272 G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers()); 1273 workers->run_task(&cl); 1274 1275 if (!empty_regions_list.is_empty()) { 1276 log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length()); 1277 // Now print the empty regions list. 1278 G1HRPrinter* hrp = _g1h->hr_printer(); 1279 if (hrp->is_active()) { 1280 FreeRegionListIterator iter(&empty_regions_list); 1281 while (iter.more_available()) { 1282 HeapRegion* hr = iter.get_next(); 1283 hrp->cleanup(hr); 1284 } 1285 } 1286 // And actually make them available. 1287 _g1h->prepend_to_freelist(&empty_regions_list); 1288 } 1289 } 1290 1291 void G1ConcurrentMark::compute_new_sizes() { 1292 MetaspaceGC::compute_new_size(); 1293 1294 // Cleanup will have freed any regions completely full of garbage. 1295 // Update the soft reference policy with the new heap occupancy. 1296 Universe::update_heap_info_at_gc(); 1297 1298 // We reclaimed old regions so we should calculate the sizes to make 1299 // sure we update the old gen/space data. 1300 _g1h->g1mm()->update_sizes(); 1301 } 1302 1303 void G1ConcurrentMark::cleanup() { 1304 assert_at_safepoint_on_vm_thread(); 1305 1306 // If a full collection has happened, we shouldn't do this. 1307 if (has_aborted()) { 1308 return; 1309 } 1310 1311 G1Policy* g1p = _g1h->g1_policy(); 1312 g1p->record_concurrent_mark_cleanup_start(); 1313 1314 double start = os::elapsedTime(); 1315 1316 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before"); 1317 1318 { 1319 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild"); 1320 G1UpdateRemSetTrackingAfterRebuild cl(_g1h); 1321 _g1h->heap_region_iterate(&cl); 1322 } 1323 1324 if (log_is_enabled(Trace, gc, liveness)) { 1325 G1PrintRegionLivenessInfoClosure cl("Post-Cleanup"); 1326 _g1h->heap_region_iterate(&cl); 1327 } 1328 1329 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after"); 1330 1331 // We need to make this be a "collection" so any collection pause that 1332 // races with it goes around and waits for Cleanup to finish. 1333 _g1h->increment_total_collections(); 1334 1335 // Local statistics 1336 double recent_cleanup_time = (os::elapsedTime() - start); 1337 _total_cleanup_time += recent_cleanup_time; 1338 _cleanup_times.add(recent_cleanup_time); 1339 1340 { 1341 GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup"); 1342 _g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1343 } 1344 } 1345 1346 // Supporting Object and Oop closures for reference discovery 1347 // and processing in during marking 1348 1349 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1350 HeapWord* addr = (HeapWord*)obj; 1351 return addr != NULL && 1352 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj)); 1353 } 1354 1355 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1356 // Uses the G1CMTask associated with a worker thread (for serial reference 1357 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1358 // trace referent objects. 1359 // 1360 // Using the G1CMTask and embedded local queues avoids having the worker 1361 // threads operating on the global mark stack. This reduces the risk 1362 // of overflowing the stack - which we would rather avoid at this late 1363 // state. Also using the tasks' local queues removes the potential 1364 // of the workers interfering with each other that could occur if 1365 // operating on the global stack. 1366 1367 class G1CMKeepAliveAndDrainClosure : public OopClosure { 1368 G1ConcurrentMark* _cm; 1369 G1CMTask* _task; 1370 int _ref_counter_limit; 1371 int _ref_counter; 1372 bool _is_serial; 1373 public: 1374 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1375 _cm(cm), _task(task), _is_serial(is_serial), 1376 _ref_counter_limit(G1RefProcDrainInterval) { 1377 assert(_ref_counter_limit > 0, "sanity"); 1378 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1379 _ref_counter = _ref_counter_limit; 1380 } 1381 1382 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1383 virtual void do_oop( oop* p) { do_oop_work(p); } 1384 1385 template <class T> void do_oop_work(T* p) { 1386 if (!_cm->has_overflown()) { 1387 _task->deal_with_reference(p); 1388 _ref_counter--; 1389 1390 if (_ref_counter == 0) { 1391 // We have dealt with _ref_counter_limit references, pushing them 1392 // and objects reachable from them on to the local stack (and 1393 // possibly the global stack). Call G1CMTask::do_marking_step() to 1394 // process these entries. 1395 // 1396 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1397 // there's nothing more to do (i.e. we're done with the entries that 1398 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1399 // above) or we overflow. 1400 // 1401 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1402 // flag while there may still be some work to do. (See the comment at 1403 // the beginning of G1CMTask::do_marking_step() for those conditions - 1404 // one of which is reaching the specified time target.) It is only 1405 // when G1CMTask::do_marking_step() returns without setting the 1406 // has_aborted() flag that the marking step has completed. 1407 do { 1408 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1409 _task->do_marking_step(mark_step_duration_ms, 1410 false /* do_termination */, 1411 _is_serial); 1412 } while (_task->has_aborted() && !_cm->has_overflown()); 1413 _ref_counter = _ref_counter_limit; 1414 } 1415 } 1416 } 1417 }; 1418 1419 // 'Drain' oop closure used by both serial and parallel reference processing. 1420 // Uses the G1CMTask associated with a given worker thread (for serial 1421 // reference processing the G1CMtask for worker 0 is used). Calls the 1422 // do_marking_step routine, with an unbelievably large timeout value, 1423 // to drain the marking data structures of the remaining entries 1424 // added by the 'keep alive' oop closure above. 1425 1426 class G1CMDrainMarkingStackClosure : public VoidClosure { 1427 G1ConcurrentMark* _cm; 1428 G1CMTask* _task; 1429 bool _is_serial; 1430 public: 1431 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1432 _cm(cm), _task(task), _is_serial(is_serial) { 1433 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1434 } 1435 1436 void do_void() { 1437 do { 1438 // We call G1CMTask::do_marking_step() to completely drain the local 1439 // and global marking stacks of entries pushed by the 'keep alive' 1440 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1441 // 1442 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1443 // if there's nothing more to do (i.e. we've completely drained the 1444 // entries that were pushed as a a result of applying the 'keep alive' 1445 // closure to the entries on the discovered ref lists) or we overflow 1446 // the global marking stack. 1447 // 1448 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1449 // flag while there may still be some work to do. (See the comment at 1450 // the beginning of G1CMTask::do_marking_step() for those conditions - 1451 // one of which is reaching the specified time target.) It is only 1452 // when G1CMTask::do_marking_step() returns without setting the 1453 // has_aborted() flag that the marking step has completed. 1454 1455 _task->do_marking_step(1000000000.0 /* something very large */, 1456 true /* do_termination */, 1457 _is_serial); 1458 } while (_task->has_aborted() && !_cm->has_overflown()); 1459 } 1460 }; 1461 1462 // Implementation of AbstractRefProcTaskExecutor for parallel 1463 // reference processing at the end of G1 concurrent marking 1464 1465 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor { 1466 private: 1467 G1CollectedHeap* _g1h; 1468 G1ConcurrentMark* _cm; 1469 WorkGang* _workers; 1470 uint _active_workers; 1471 1472 public: 1473 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1474 G1ConcurrentMark* cm, 1475 WorkGang* workers, 1476 uint n_workers) : 1477 _g1h(g1h), _cm(cm), 1478 _workers(workers), _active_workers(n_workers) { } 1479 1480 // Executes the given task using concurrent marking worker threads. 1481 virtual void execute(ProcessTask& task); 1482 virtual void execute(EnqueueTask& task); 1483 }; 1484 1485 class G1CMRefProcTaskProxy : public AbstractGangTask { 1486 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1487 ProcessTask& _proc_task; 1488 G1CollectedHeap* _g1h; 1489 G1ConcurrentMark* _cm; 1490 1491 public: 1492 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1493 G1CollectedHeap* g1h, 1494 G1ConcurrentMark* cm) : 1495 AbstractGangTask("Process reference objects in parallel"), 1496 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1497 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1498 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1499 } 1500 1501 virtual void work(uint worker_id) { 1502 ResourceMark rm; 1503 HandleMark hm; 1504 G1CMTask* task = _cm->task(worker_id); 1505 G1CMIsAliveClosure g1_is_alive(_g1h); 1506 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1507 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1508 1509 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1510 } 1511 }; 1512 1513 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1514 assert(_workers != NULL, "Need parallel worker threads."); 1515 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1516 1517 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1518 1519 // We need to reset the concurrency level before each 1520 // proxy task execution, so that the termination protocol 1521 // and overflow handling in G1CMTask::do_marking_step() knows 1522 // how many workers to wait for. 1523 _cm->set_concurrency(_active_workers); 1524 _workers->run_task(&proc_task_proxy); 1525 } 1526 1527 class G1CMRefEnqueueTaskProxy : public AbstractGangTask { 1528 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1529 EnqueueTask& _enq_task; 1530 1531 public: 1532 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1533 AbstractGangTask("Enqueue reference objects in parallel"), 1534 _enq_task(enq_task) { } 1535 1536 virtual void work(uint worker_id) { 1537 _enq_task.work(worker_id); 1538 } 1539 }; 1540 1541 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1542 assert(_workers != NULL, "Need parallel worker threads."); 1543 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1544 1545 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1546 1547 // Not strictly necessary but... 1548 // 1549 // We need to reset the concurrency level before each 1550 // proxy task execution, so that the termination protocol 1551 // and overflow handling in G1CMTask::do_marking_step() knows 1552 // how many workers to wait for. 1553 _cm->set_concurrency(_active_workers); 1554 _workers->run_task(&enq_task_proxy); 1555 } 1556 1557 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) { 1558 ResourceMark rm; 1559 HandleMark hm; 1560 1561 // Is alive closure. 1562 G1CMIsAliveClosure g1_is_alive(_g1h); 1563 1564 // Inner scope to exclude the cleaning of the string and symbol 1565 // tables from the displayed time. 1566 { 1567 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1568 1569 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1570 1571 // See the comment in G1CollectedHeap::ref_processing_init() 1572 // about how reference processing currently works in G1. 1573 1574 // Set the soft reference policy 1575 rp->setup_policy(clear_all_soft_refs); 1576 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1577 1578 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1579 // in serial reference processing. Note these closures are also 1580 // used for serially processing (by the the current thread) the 1581 // JNI references during parallel reference processing. 1582 // 1583 // These closures do not need to synchronize with the worker 1584 // threads involved in parallel reference processing as these 1585 // instances are executed serially by the current thread (e.g. 1586 // reference processing is not multi-threaded and is thus 1587 // performed by the current thread instead of a gang worker). 1588 // 1589 // The gang tasks involved in parallel reference processing create 1590 // their own instances of these closures, which do their own 1591 // synchronization among themselves. 1592 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1593 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1594 1595 // We need at least one active thread. If reference processing 1596 // is not multi-threaded we use the current (VMThread) thread, 1597 // otherwise we use the work gang from the G1CollectedHeap and 1598 // we utilize all the worker threads we can. 1599 bool processing_is_mt = rp->processing_is_mt(); 1600 uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U); 1601 active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U); 1602 1603 // Parallel processing task executor. 1604 G1CMRefProcTaskExecutor par_task_executor(_g1h, this, 1605 _g1h->workers(), active_workers); 1606 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1607 1608 // Set the concurrency level. The phase was already set prior to 1609 // executing the remark task. 1610 set_concurrency(active_workers); 1611 1612 // Set the degree of MT processing here. If the discovery was done MT, 1613 // the number of threads involved during discovery could differ from 1614 // the number of active workers. This is OK as long as the discovered 1615 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1616 rp->set_active_mt_degree(active_workers); 1617 1618 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1619 1620 // Process the weak references. 1621 const ReferenceProcessorStats& stats = 1622 rp->process_discovered_references(&g1_is_alive, 1623 &g1_keep_alive, 1624 &g1_drain_mark_stack, 1625 executor, 1626 &pt); 1627 _gc_tracer_cm->report_gc_reference_stats(stats); 1628 pt.print_all_references(); 1629 1630 // The do_oop work routines of the keep_alive and drain_marking_stack 1631 // oop closures will set the has_overflown flag if we overflow the 1632 // global marking stack. 1633 1634 assert(has_overflown() || _global_mark_stack.is_empty(), 1635 "Mark stack should be empty (unless it has overflown)"); 1636 1637 assert(rp->num_q() == active_workers, "why not"); 1638 1639 rp->enqueue_discovered_references(executor, &pt); 1640 1641 rp->verify_no_references_recorded(); 1642 1643 pt.print_enqueue_phase(); 1644 1645 assert(!rp->discovery_enabled(), "Post condition"); 1646 } 1647 1648 assert(has_overflown() || _global_mark_stack.is_empty(), 1649 "Mark stack should be empty (unless it has overflown)"); 1650 1651 { 1652 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm); 1653 WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl); 1654 } 1655 1656 if (has_overflown()) { 1657 // We can not trust g1_is_alive if the marking stack overflowed 1658 return; 1659 } 1660 1661 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1662 1663 // Unload Klasses, String, Symbols, Code Cache, etc. 1664 if (ClassUnloadingWithConcurrentMark) { 1665 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1666 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1667 _g1h->complete_cleaning(&g1_is_alive, purged_classes); 1668 } else { 1669 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1670 // No need to clean string table and symbol table as they are treated as strong roots when 1671 // class unloading is disabled. 1672 _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1673 } 1674 } 1675 1676 // When sampling object counts, we already swapped the mark bitmaps, so we need to use 1677 // the prev bitmap determining liveness. 1678 class G1ObjectCountIsAliveClosure: public BoolObjectClosure { 1679 G1CollectedHeap* _g1; 1680 public: 1681 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { } 1682 1683 bool do_object_b(oop obj) { 1684 HeapWord* addr = (HeapWord*)obj; 1685 return addr != NULL && 1686 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_dead(obj)); 1687 } 1688 }; 1689 1690 void G1ConcurrentMark::report_object_count(bool mark_completed) { 1691 // Depending on the completion of the marking liveness needs to be determined 1692 // using either the next or prev bitmap. 1693 if (mark_completed) { 1694 G1ObjectCountIsAliveClosure is_alive(_g1h); 1695 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1696 } else { 1697 G1CMIsAliveClosure is_alive(_g1h); 1698 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1699 } 1700 } 1701 1702 1703 void G1ConcurrentMark::swap_mark_bitmaps() { 1704 G1CMBitMap* temp = _prev_mark_bitmap; 1705 _prev_mark_bitmap = _next_mark_bitmap; 1706 _next_mark_bitmap = temp; 1707 _g1h->collector_state()->set_clearing_next_bitmap(true); 1708 } 1709 1710 // Closure for marking entries in SATB buffers. 1711 class G1CMSATBBufferClosure : public SATBBufferClosure { 1712 private: 1713 G1CMTask* _task; 1714 G1CollectedHeap* _g1h; 1715 1716 // This is very similar to G1CMTask::deal_with_reference, but with 1717 // more relaxed requirements for the argument, so this must be more 1718 // circumspect about treating the argument as an object. 1719 void do_entry(void* entry) const { 1720 _task->increment_refs_reached(); 1721 oop const obj = static_cast<oop>(entry); 1722 _task->make_reference_grey(obj); 1723 } 1724 1725 public: 1726 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1727 : _task(task), _g1h(g1h) { } 1728 1729 virtual void do_buffer(void** buffer, size_t size) { 1730 for (size_t i = 0; i < size; ++i) { 1731 do_entry(buffer[i]); 1732 } 1733 } 1734 }; 1735 1736 class G1RemarkThreadsClosure : public ThreadClosure { 1737 G1CMSATBBufferClosure _cm_satb_cl; 1738 G1CMOopClosure _cm_cl; 1739 MarkingCodeBlobClosure _code_cl; 1740 int _thread_parity; 1741 1742 public: 1743 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1744 _cm_satb_cl(task, g1h), 1745 _cm_cl(g1h, task), 1746 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1747 _thread_parity(Threads::thread_claim_parity()) {} 1748 1749 void do_thread(Thread* thread) { 1750 if (thread->is_Java_thread()) { 1751 if (thread->claim_oops_do(true, _thread_parity)) { 1752 JavaThread* jt = (JavaThread*)thread; 1753 1754 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1755 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1756 // * Alive if on the stack of an executing method 1757 // * Weakly reachable otherwise 1758 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1759 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1760 jt->nmethods_do(&_code_cl); 1761 1762 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1763 } 1764 } else if (thread->is_VM_thread()) { 1765 if (thread->claim_oops_do(true, _thread_parity)) { 1766 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1767 } 1768 } 1769 } 1770 }; 1771 1772 class G1CMRemarkTask : public AbstractGangTask { 1773 G1ConcurrentMark* _cm; 1774 public: 1775 void work(uint worker_id) { 1776 G1CMTask* task = _cm->task(worker_id); 1777 task->record_start_time(); 1778 { 1779 ResourceMark rm; 1780 HandleMark hm; 1781 1782 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1783 Threads::threads_do(&threads_f); 1784 } 1785 1786 do { 1787 task->do_marking_step(1000000000.0 /* something very large */, 1788 true /* do_termination */, 1789 false /* is_serial */); 1790 } while (task->has_aborted() && !_cm->has_overflown()); 1791 // If we overflow, then we do not want to restart. We instead 1792 // want to abort remark and do concurrent marking again. 1793 task->record_end_time(); 1794 } 1795 1796 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1797 AbstractGangTask("Par Remark"), _cm(cm) { 1798 _cm->terminator()->reset_for_reuse(active_workers); 1799 } 1800 }; 1801 1802 void G1ConcurrentMark::finalize_marking() { 1803 ResourceMark rm; 1804 HandleMark hm; 1805 1806 _g1h->ensure_parsability(false); 1807 1808 // this is remark, so we'll use up all active threads 1809 uint active_workers = _g1h->workers()->active_workers(); 1810 set_concurrency_and_phase(active_workers, false /* concurrent */); 1811 // Leave _parallel_marking_threads at it's 1812 // value originally calculated in the G1ConcurrentMark 1813 // constructor and pass values of the active workers 1814 // through the gang in the task. 1815 1816 { 1817 StrongRootsScope srs(active_workers); 1818 1819 G1CMRemarkTask remarkTask(this, active_workers); 1820 // We will start all available threads, even if we decide that the 1821 // active_workers will be fewer. The extra ones will just bail out 1822 // immediately. 1823 _g1h->workers()->run_task(&remarkTask); 1824 } 1825 1826 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1827 guarantee(has_overflown() || 1828 satb_mq_set.completed_buffers_num() == 0, 1829 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1830 BOOL_TO_STR(has_overflown()), 1831 satb_mq_set.completed_buffers_num()); 1832 1833 print_stats(); 1834 } 1835 1836 void G1ConcurrentMark::flush_all_task_caches() { 1837 size_t hits = 0; 1838 size_t misses = 0; 1839 for (uint i = 0; i < _max_num_tasks; i++) { 1840 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache(); 1841 hits += stats.first; 1842 misses += stats.second; 1843 } 1844 size_t sum = hits + misses; 1845 log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf", 1846 hits, misses, percent_of(hits, sum)); 1847 } 1848 1849 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) { 1850 _prev_mark_bitmap->clear_range(mr); 1851 } 1852 1853 HeapRegion* 1854 G1ConcurrentMark::claim_region(uint worker_id) { 1855 // "checkpoint" the finger 1856 HeapWord* finger = _finger; 1857 1858 while (finger < _heap.end()) { 1859 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1860 1861 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1862 // Make sure that the reads below do not float before loading curr_region. 1863 OrderAccess::loadload(); 1864 // Above heap_region_containing may return NULL as we always scan claim 1865 // until the end of the heap. In this case, just jump to the next region. 1866 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1867 1868 // Is the gap between reading the finger and doing the CAS too long? 1869 HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); 1870 if (res == finger && curr_region != NULL) { 1871 // we succeeded 1872 HeapWord* bottom = curr_region->bottom(); 1873 HeapWord* limit = curr_region->next_top_at_mark_start(); 1874 1875 // notice that _finger == end cannot be guaranteed here since, 1876 // someone else might have moved the finger even further 1877 assert(_finger >= end, "the finger should have moved forward"); 1878 1879 if (limit > bottom) { 1880 return curr_region; 1881 } else { 1882 assert(limit == bottom, 1883 "the region limit should be at bottom"); 1884 // we return NULL and the caller should try calling 1885 // claim_region() again. 1886 return NULL; 1887 } 1888 } else { 1889 assert(_finger > finger, "the finger should have moved forward"); 1890 // read it again 1891 finger = _finger; 1892 } 1893 } 1894 1895 return NULL; 1896 } 1897 1898 #ifndef PRODUCT 1899 class VerifyNoCSetOops { 1900 G1CollectedHeap* _g1h; 1901 const char* _phase; 1902 int _info; 1903 1904 public: 1905 VerifyNoCSetOops(const char* phase, int info = -1) : 1906 _g1h(G1CollectedHeap::heap()), 1907 _phase(phase), 1908 _info(info) 1909 { } 1910 1911 void operator()(G1TaskQueueEntry task_entry) const { 1912 if (task_entry.is_array_slice()) { 1913 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1914 return; 1915 } 1916 guarantee(oopDesc::is_oop(task_entry.obj()), 1917 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1918 p2i(task_entry.obj()), _phase, _info); 1919 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1920 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1921 p2i(task_entry.obj()), _phase, _info); 1922 } 1923 }; 1924 1925 void G1ConcurrentMark::verify_no_cset_oops() { 1926 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1927 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 1928 return; 1929 } 1930 1931 // Verify entries on the global mark stack 1932 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1933 1934 // Verify entries on the task queues 1935 for (uint i = 0; i < _max_num_tasks; ++i) { 1936 G1CMTaskQueue* queue = _task_queues->queue(i); 1937 queue->iterate(VerifyNoCSetOops("Queue", i)); 1938 } 1939 1940 // Verify the global finger 1941 HeapWord* global_finger = finger(); 1942 if (global_finger != NULL && global_finger < _heap.end()) { 1943 // Since we always iterate over all regions, we might get a NULL HeapRegion 1944 // here. 1945 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1946 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1947 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1948 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1949 } 1950 1951 // Verify the task fingers 1952 assert(_num_concurrent_workers <= _max_num_tasks, "sanity"); 1953 for (uint i = 0; i < _num_concurrent_workers; ++i) { 1954 G1CMTask* task = _tasks[i]; 1955 HeapWord* task_finger = task->finger(); 1956 if (task_finger != NULL && task_finger < _heap.end()) { 1957 // See above note on the global finger verification. 1958 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1959 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1960 !task_hr->in_collection_set(), 1961 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1962 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1963 } 1964 } 1965 } 1966 #endif // PRODUCT 1967 1968 void G1ConcurrentMark::rebuild_rem_set_concurrently() { 1969 _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset); 1970 } 1971 1972 void G1ConcurrentMark::print_stats() { 1973 if (!log_is_enabled(Debug, gc, stats)) { 1974 return; 1975 } 1976 log_debug(gc, stats)("---------------------------------------------------------------------"); 1977 for (size_t i = 0; i < _num_active_tasks; ++i) { 1978 _tasks[i]->print_stats(); 1979 log_debug(gc, stats)("---------------------------------------------------------------------"); 1980 } 1981 } 1982 1983 void G1ConcurrentMark::concurrent_cycle_abort() { 1984 if (!cm_thread()->during_cycle() || _has_aborted) { 1985 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 1986 return; 1987 } 1988 1989 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 1990 // concurrent bitmap clearing. 1991 { 1992 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 1993 clear_bitmap(_next_mark_bitmap, _g1h->workers(), false); 1994 } 1995 // Note we cannot clear the previous marking bitmap here 1996 // since VerifyDuringGC verifies the objects marked during 1997 // a full GC against the previous bitmap. 1998 1999 // Empty mark stack 2000 reset_marking_for_restart(); 2001 for (uint i = 0; i < _max_num_tasks; ++i) { 2002 _tasks[i]->clear_region_fields(); 2003 } 2004 _first_overflow_barrier_sync.abort(); 2005 _second_overflow_barrier_sync.abort(); 2006 _has_aborted = true; 2007 2008 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2009 satb_mq_set.abandon_partial_marking(); 2010 // This can be called either during or outside marking, we'll read 2011 // the expected_active value from the SATB queue set. 2012 satb_mq_set.set_active_all_threads( 2013 false, /* new active value */ 2014 satb_mq_set.is_active() /* expected_active */); 2015 } 2016 2017 static void print_ms_time_info(const char* prefix, const char* name, 2018 NumberSeq& ns) { 2019 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2020 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2021 if (ns.num() > 0) { 2022 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2023 prefix, ns.sd(), ns.maximum()); 2024 } 2025 } 2026 2027 void G1ConcurrentMark::print_summary_info() { 2028 Log(gc, marking) log; 2029 if (!log.is_trace()) { 2030 return; 2031 } 2032 2033 log.trace(" Concurrent marking:"); 2034 print_ms_time_info(" ", "init marks", _init_times); 2035 print_ms_time_info(" ", "remarks", _remark_times); 2036 { 2037 print_ms_time_info(" ", "final marks", _remark_mark_times); 2038 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2039 2040 } 2041 print_ms_time_info(" ", "cleanups", _cleanup_times); 2042 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2043 _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2044 log.trace(" Total stop_world time = %8.2f s.", 2045 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2046 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2047 cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum()); 2048 } 2049 2050 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2051 _concurrent_workers->print_worker_threads_on(st); 2052 } 2053 2054 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2055 _concurrent_workers->threads_do(tc); 2056 } 2057 2058 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2059 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2060 p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap)); 2061 _prev_mark_bitmap->print_on_error(st, " Prev Bits: "); 2062 _next_mark_bitmap->print_on_error(st, " Next Bits: "); 2063 } 2064 2065 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2066 ReferenceProcessor* result = g1h->ref_processor_cm(); 2067 assert(result != NULL, "CM reference processor should not be NULL"); 2068 return result; 2069 } 2070 2071 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2072 G1CMTask* task) 2073 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2074 _g1h(g1h), _task(task) 2075 { } 2076 2077 void G1CMTask::setup_for_region(HeapRegion* hr) { 2078 assert(hr != NULL, 2079 "claim_region() should have filtered out NULL regions"); 2080 _curr_region = hr; 2081 _finger = hr->bottom(); 2082 update_region_limit(); 2083 } 2084 2085 void G1CMTask::update_region_limit() { 2086 HeapRegion* hr = _curr_region; 2087 HeapWord* bottom = hr->bottom(); 2088 HeapWord* limit = hr->next_top_at_mark_start(); 2089 2090 if (limit == bottom) { 2091 // The region was collected underneath our feet. 2092 // We set the finger to bottom to ensure that the bitmap 2093 // iteration that will follow this will not do anything. 2094 // (this is not a condition that holds when we set the region up, 2095 // as the region is not supposed to be empty in the first place) 2096 _finger = bottom; 2097 } else if (limit >= _region_limit) { 2098 assert(limit >= _finger, "peace of mind"); 2099 } else { 2100 assert(limit < _region_limit, "only way to get here"); 2101 // This can happen under some pretty unusual circumstances. An 2102 // evacuation pause empties the region underneath our feet (NTAMS 2103 // at bottom). We then do some allocation in the region (NTAMS 2104 // stays at bottom), followed by the region being used as a GC 2105 // alloc region (NTAMS will move to top() and the objects 2106 // originally below it will be grayed). All objects now marked in 2107 // the region are explicitly grayed, if below the global finger, 2108 // and we do not need in fact to scan anything else. So, we simply 2109 // set _finger to be limit to ensure that the bitmap iteration 2110 // doesn't do anything. 2111 _finger = limit; 2112 } 2113 2114 _region_limit = limit; 2115 } 2116 2117 void G1CMTask::giveup_current_region() { 2118 assert(_curr_region != NULL, "invariant"); 2119 clear_region_fields(); 2120 } 2121 2122 void G1CMTask::clear_region_fields() { 2123 // Values for these three fields that indicate that we're not 2124 // holding on to a region. 2125 _curr_region = NULL; 2126 _finger = NULL; 2127 _region_limit = NULL; 2128 } 2129 2130 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2131 if (cm_oop_closure == NULL) { 2132 assert(_cm_oop_closure != NULL, "invariant"); 2133 } else { 2134 assert(_cm_oop_closure == NULL, "invariant"); 2135 } 2136 _cm_oop_closure = cm_oop_closure; 2137 } 2138 2139 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { 2140 guarantee(next_mark_bitmap != NULL, "invariant"); 2141 _next_mark_bitmap = next_mark_bitmap; 2142 clear_region_fields(); 2143 2144 _calls = 0; 2145 _elapsed_time_ms = 0.0; 2146 _termination_time_ms = 0.0; 2147 _termination_start_time_ms = 0.0; 2148 2149 _mark_stats_cache.reset(); 2150 } 2151 2152 bool G1CMTask::should_exit_termination() { 2153 regular_clock_call(); 2154 // This is called when we are in the termination protocol. We should 2155 // quit if, for some reason, this task wants to abort or the global 2156 // stack is not empty (this means that we can get work from it). 2157 return !_cm->mark_stack_empty() || has_aborted(); 2158 } 2159 2160 void G1CMTask::reached_limit() { 2161 assert(_words_scanned >= _words_scanned_limit || 2162 _refs_reached >= _refs_reached_limit , 2163 "shouldn't have been called otherwise"); 2164 regular_clock_call(); 2165 } 2166 2167 void G1CMTask::regular_clock_call() { 2168 if (has_aborted()) { 2169 return; 2170 } 2171 2172 // First, we need to recalculate the words scanned and refs reached 2173 // limits for the next clock call. 2174 recalculate_limits(); 2175 2176 // During the regular clock call we do the following 2177 2178 // (1) If an overflow has been flagged, then we abort. 2179 if (_cm->has_overflown()) { 2180 set_has_aborted(); 2181 return; 2182 } 2183 2184 // If we are not concurrent (i.e. we're doing remark) we don't need 2185 // to check anything else. The other steps are only needed during 2186 // the concurrent marking phase. 2187 if (!_cm->concurrent()) { 2188 return; 2189 } 2190 2191 // (2) If marking has been aborted for Full GC, then we also abort. 2192 if (_cm->has_aborted()) { 2193 set_has_aborted(); 2194 return; 2195 } 2196 2197 double curr_time_ms = os::elapsedVTime() * 1000.0; 2198 2199 // (4) We check whether we should yield. If we have to, then we abort. 2200 if (SuspendibleThreadSet::should_yield()) { 2201 // We should yield. To do this we abort the task. The caller is 2202 // responsible for yielding. 2203 set_has_aborted(); 2204 return; 2205 } 2206 2207 // (5) We check whether we've reached our time quota. If we have, 2208 // then we abort. 2209 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2210 if (elapsed_time_ms > _time_target_ms) { 2211 set_has_aborted(); 2212 _has_timed_out = true; 2213 return; 2214 } 2215 2216 // (6) Finally, we check whether there are enough completed STAB 2217 // buffers available for processing. If there are, we abort. 2218 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2219 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2220 // we do need to process SATB buffers, we'll abort and restart 2221 // the marking task to do so 2222 set_has_aborted(); 2223 return; 2224 } 2225 } 2226 2227 void G1CMTask::recalculate_limits() { 2228 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2229 _words_scanned_limit = _real_words_scanned_limit; 2230 2231 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2232 _refs_reached_limit = _real_refs_reached_limit; 2233 } 2234 2235 void G1CMTask::decrease_limits() { 2236 // This is called when we believe that we're going to do an infrequent 2237 // operation which will increase the per byte scanned cost (i.e. move 2238 // entries to/from the global stack). It basically tries to decrease the 2239 // scanning limit so that the clock is called earlier. 2240 2241 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4; 2242 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4; 2243 } 2244 2245 void G1CMTask::move_entries_to_global_stack() { 2246 // Local array where we'll store the entries that will be popped 2247 // from the local queue. 2248 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2249 2250 size_t n = 0; 2251 G1TaskQueueEntry task_entry; 2252 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2253 buffer[n] = task_entry; 2254 ++n; 2255 } 2256 if (n < G1CMMarkStack::EntriesPerChunk) { 2257 buffer[n] = G1TaskQueueEntry(); 2258 } 2259 2260 if (n > 0) { 2261 if (!_cm->mark_stack_push(buffer)) { 2262 set_has_aborted(); 2263 } 2264 } 2265 2266 // This operation was quite expensive, so decrease the limits. 2267 decrease_limits(); 2268 } 2269 2270 bool G1CMTask::get_entries_from_global_stack() { 2271 // Local array where we'll store the entries that will be popped 2272 // from the global stack. 2273 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2274 2275 if (!_cm->mark_stack_pop(buffer)) { 2276 return false; 2277 } 2278 2279 // We did actually pop at least one entry. 2280 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2281 G1TaskQueueEntry task_entry = buffer[i]; 2282 if (task_entry.is_null()) { 2283 break; 2284 } 2285 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2286 bool success = _task_queue->push(task_entry); 2287 // We only call this when the local queue is empty or under a 2288 // given target limit. So, we do not expect this push to fail. 2289 assert(success, "invariant"); 2290 } 2291 2292 // This operation was quite expensive, so decrease the limits 2293 decrease_limits(); 2294 return true; 2295 } 2296 2297 void G1CMTask::drain_local_queue(bool partially) { 2298 if (has_aborted()) { 2299 return; 2300 } 2301 2302 // Decide what the target size is, depending whether we're going to 2303 // drain it partially (so that other tasks can steal if they run out 2304 // of things to do) or totally (at the very end). 2305 size_t target_size; 2306 if (partially) { 2307 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2308 } else { 2309 target_size = 0; 2310 } 2311 2312 if (_task_queue->size() > target_size) { 2313 G1TaskQueueEntry entry; 2314 bool ret = _task_queue->pop_local(entry); 2315 while (ret) { 2316 scan_task_entry(entry); 2317 if (_task_queue->size() <= target_size || has_aborted()) { 2318 ret = false; 2319 } else { 2320 ret = _task_queue->pop_local(entry); 2321 } 2322 } 2323 } 2324 } 2325 2326 void G1CMTask::drain_global_stack(bool partially) { 2327 if (has_aborted()) { 2328 return; 2329 } 2330 2331 // We have a policy to drain the local queue before we attempt to 2332 // drain the global stack. 2333 assert(partially || _task_queue->size() == 0, "invariant"); 2334 2335 // Decide what the target size is, depending whether we're going to 2336 // drain it partially (so that other tasks can steal if they run out 2337 // of things to do) or totally (at the very end). 2338 // Notice that when draining the global mark stack partially, due to the racyness 2339 // of the mark stack size update we might in fact drop below the target. But, 2340 // this is not a problem. 2341 // In case of total draining, we simply process until the global mark stack is 2342 // totally empty, disregarding the size counter. 2343 if (partially) { 2344 size_t const target_size = _cm->partial_mark_stack_size_target(); 2345 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2346 if (get_entries_from_global_stack()) { 2347 drain_local_queue(partially); 2348 } 2349 } 2350 } else { 2351 while (!has_aborted() && get_entries_from_global_stack()) { 2352 drain_local_queue(partially); 2353 } 2354 } 2355 } 2356 2357 // SATB Queue has several assumptions on whether to call the par or 2358 // non-par versions of the methods. this is why some of the code is 2359 // replicated. We should really get rid of the single-threaded version 2360 // of the code to simplify things. 2361 void G1CMTask::drain_satb_buffers() { 2362 if (has_aborted()) { 2363 return; 2364 } 2365 2366 // We set this so that the regular clock knows that we're in the 2367 // middle of draining buffers and doesn't set the abort flag when it 2368 // notices that SATB buffers are available for draining. It'd be 2369 // very counter productive if it did that. :-) 2370 _draining_satb_buffers = true; 2371 2372 G1CMSATBBufferClosure satb_cl(this, _g1h); 2373 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2374 2375 // This keeps claiming and applying the closure to completed buffers 2376 // until we run out of buffers or we need to abort. 2377 while (!has_aborted() && 2378 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2379 regular_clock_call(); 2380 } 2381 2382 _draining_satb_buffers = false; 2383 2384 assert(has_aborted() || 2385 _cm->concurrent() || 2386 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2387 2388 // again, this was a potentially expensive operation, decrease the 2389 // limits to get the regular clock call early 2390 decrease_limits(); 2391 } 2392 2393 void G1CMTask::clear_mark_stats_cache(uint region_idx) { 2394 _mark_stats_cache.reset(region_idx); 2395 } 2396 2397 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() { 2398 return _mark_stats_cache.evict_all(); 2399 } 2400 2401 void G1CMTask::print_stats() { 2402 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls); 2403 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2404 _elapsed_time_ms, _termination_time_ms); 2405 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms", 2406 _step_times_ms.num(), 2407 _step_times_ms.avg(), 2408 _step_times_ms.sd(), 2409 _step_times_ms.maximum(), 2410 _step_times_ms.sum()); 2411 size_t const hits = _mark_stats_cache.hits(); 2412 size_t const misses = _mark_stats_cache.misses(); 2413 log_debug(gc, stats)(" Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f", 2414 hits, misses, percent_of(hits, hits + misses)); 2415 } 2416 2417 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2418 return _task_queues->steal(worker_id, hash_seed, task_entry); 2419 } 2420 2421 /***************************************************************************** 2422 2423 The do_marking_step(time_target_ms, ...) method is the building 2424 block of the parallel marking framework. It can be called in parallel 2425 with other invocations of do_marking_step() on different tasks 2426 (but only one per task, obviously) and concurrently with the 2427 mutator threads, or during remark, hence it eliminates the need 2428 for two versions of the code. When called during remark, it will 2429 pick up from where the task left off during the concurrent marking 2430 phase. Interestingly, tasks are also claimable during evacuation 2431 pauses too, since do_marking_step() ensures that it aborts before 2432 it needs to yield. 2433 2434 The data structures that it uses to do marking work are the 2435 following: 2436 2437 (1) Marking Bitmap. If there are gray objects that appear only 2438 on the bitmap (this happens either when dealing with an overflow 2439 or when the initial marking phase has simply marked the roots 2440 and didn't push them on the stack), then tasks claim heap 2441 regions whose bitmap they then scan to find gray objects. A 2442 global finger indicates where the end of the last claimed region 2443 is. A local finger indicates how far into the region a task has 2444 scanned. The two fingers are used to determine how to gray an 2445 object (i.e. whether simply marking it is OK, as it will be 2446 visited by a task in the future, or whether it needs to be also 2447 pushed on a stack). 2448 2449 (2) Local Queue. The local queue of the task which is accessed 2450 reasonably efficiently by the task. Other tasks can steal from 2451 it when they run out of work. Throughout the marking phase, a 2452 task attempts to keep its local queue short but not totally 2453 empty, so that entries are available for stealing by other 2454 tasks. Only when there is no more work, a task will totally 2455 drain its local queue. 2456 2457 (3) Global Mark Stack. This handles local queue overflow. During 2458 marking only sets of entries are moved between it and the local 2459 queues, as access to it requires a mutex and more fine-grain 2460 interaction with it which might cause contention. If it 2461 overflows, then the marking phase should restart and iterate 2462 over the bitmap to identify gray objects. Throughout the marking 2463 phase, tasks attempt to keep the global mark stack at a small 2464 length but not totally empty, so that entries are available for 2465 popping by other tasks. Only when there is no more work, tasks 2466 will totally drain the global mark stack. 2467 2468 (4) SATB Buffer Queue. This is where completed SATB buffers are 2469 made available. Buffers are regularly removed from this queue 2470 and scanned for roots, so that the queue doesn't get too 2471 long. During remark, all completed buffers are processed, as 2472 well as the filled in parts of any uncompleted buffers. 2473 2474 The do_marking_step() method tries to abort when the time target 2475 has been reached. There are a few other cases when the 2476 do_marking_step() method also aborts: 2477 2478 (1) When the marking phase has been aborted (after a Full GC). 2479 2480 (2) When a global overflow (on the global stack) has been 2481 triggered. Before the task aborts, it will actually sync up with 2482 the other tasks to ensure that all the marking data structures 2483 (local queues, stacks, fingers etc.) are re-initialized so that 2484 when do_marking_step() completes, the marking phase can 2485 immediately restart. 2486 2487 (3) When enough completed SATB buffers are available. The 2488 do_marking_step() method only tries to drain SATB buffers right 2489 at the beginning. So, if enough buffers are available, the 2490 marking step aborts and the SATB buffers are processed at 2491 the beginning of the next invocation. 2492 2493 (4) To yield. when we have to yield then we abort and yield 2494 right at the end of do_marking_step(). This saves us from a lot 2495 of hassle as, by yielding we might allow a Full GC. If this 2496 happens then objects will be compacted underneath our feet, the 2497 heap might shrink, etc. We save checking for this by just 2498 aborting and doing the yield right at the end. 2499 2500 From the above it follows that the do_marking_step() method should 2501 be called in a loop (or, otherwise, regularly) until it completes. 2502 2503 If a marking step completes without its has_aborted() flag being 2504 true, it means it has completed the current marking phase (and 2505 also all other marking tasks have done so and have all synced up). 2506 2507 A method called regular_clock_call() is invoked "regularly" (in 2508 sub ms intervals) throughout marking. It is this clock method that 2509 checks all the abort conditions which were mentioned above and 2510 decides when the task should abort. A work-based scheme is used to 2511 trigger this clock method: when the number of object words the 2512 marking phase has scanned or the number of references the marking 2513 phase has visited reach a given limit. Additional invocations to 2514 the method clock have been planted in a few other strategic places 2515 too. The initial reason for the clock method was to avoid calling 2516 vtime too regularly, as it is quite expensive. So, once it was in 2517 place, it was natural to piggy-back all the other conditions on it 2518 too and not constantly check them throughout the code. 2519 2520 If do_termination is true then do_marking_step will enter its 2521 termination protocol. 2522 2523 The value of is_serial must be true when do_marking_step is being 2524 called serially (i.e. by the VMThread) and do_marking_step should 2525 skip any synchronization in the termination and overflow code. 2526 Examples include the serial remark code and the serial reference 2527 processing closures. 2528 2529 The value of is_serial must be false when do_marking_step is 2530 being called by any of the worker threads in a work gang. 2531 Examples include the concurrent marking code (CMMarkingTask), 2532 the MT remark code, and the MT reference processing closures. 2533 2534 *****************************************************************************/ 2535 2536 void G1CMTask::do_marking_step(double time_target_ms, 2537 bool do_termination, 2538 bool is_serial) { 2539 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2540 2541 _start_time_ms = os::elapsedVTime() * 1000.0; 2542 2543 // If do_stealing is true then do_marking_step will attempt to 2544 // steal work from the other G1CMTasks. It only makes sense to 2545 // enable stealing when the termination protocol is enabled 2546 // and do_marking_step() is not being called serially. 2547 bool do_stealing = do_termination && !is_serial; 2548 2549 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2550 _time_target_ms = time_target_ms - diff_prediction_ms; 2551 2552 // set up the variables that are used in the work-based scheme to 2553 // call the regular clock method 2554 _words_scanned = 0; 2555 _refs_reached = 0; 2556 recalculate_limits(); 2557 2558 // clear all flags 2559 clear_has_aborted(); 2560 _has_timed_out = false; 2561 _draining_satb_buffers = false; 2562 2563 ++_calls; 2564 2565 // Set up the bitmap and oop closures. Anything that uses them is 2566 // eventually called from this method, so it is OK to allocate these 2567 // statically. 2568 G1CMBitMapClosure bitmap_closure(this, _cm); 2569 G1CMOopClosure cm_oop_closure(_g1h, this); 2570 set_cm_oop_closure(&cm_oop_closure); 2571 2572 if (_cm->has_overflown()) { 2573 // This can happen if the mark stack overflows during a GC pause 2574 // and this task, after a yield point, restarts. We have to abort 2575 // as we need to get into the overflow protocol which happens 2576 // right at the end of this task. 2577 set_has_aborted(); 2578 } 2579 2580 // First drain any available SATB buffers. After this, we will not 2581 // look at SATB buffers before the next invocation of this method. 2582 // If enough completed SATB buffers are queued up, the regular clock 2583 // will abort this task so that it restarts. 2584 drain_satb_buffers(); 2585 // ...then partially drain the local queue and the global stack 2586 drain_local_queue(true); 2587 drain_global_stack(true); 2588 2589 do { 2590 if (!has_aborted() && _curr_region != NULL) { 2591 // This means that we're already holding on to a region. 2592 assert(_finger != NULL, "if region is not NULL, then the finger " 2593 "should not be NULL either"); 2594 2595 // We might have restarted this task after an evacuation pause 2596 // which might have evacuated the region we're holding on to 2597 // underneath our feet. Let's read its limit again to make sure 2598 // that we do not iterate over a region of the heap that 2599 // contains garbage (update_region_limit() will also move 2600 // _finger to the start of the region if it is found empty). 2601 update_region_limit(); 2602 // We will start from _finger not from the start of the region, 2603 // as we might be restarting this task after aborting half-way 2604 // through scanning this region. In this case, _finger points to 2605 // the address where we last found a marked object. If this is a 2606 // fresh region, _finger points to start(). 2607 MemRegion mr = MemRegion(_finger, _region_limit); 2608 2609 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2610 "humongous regions should go around loop once only"); 2611 2612 // Some special cases: 2613 // If the memory region is empty, we can just give up the region. 2614 // If the current region is humongous then we only need to check 2615 // the bitmap for the bit associated with the start of the object, 2616 // scan the object if it's live, and give up the region. 2617 // Otherwise, let's iterate over the bitmap of the part of the region 2618 // that is left. 2619 // If the iteration is successful, give up the region. 2620 if (mr.is_empty()) { 2621 giveup_current_region(); 2622 regular_clock_call(); 2623 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2624 if (_next_mark_bitmap->is_marked(mr.start())) { 2625 // The object is marked - apply the closure 2626 bitmap_closure.do_addr(mr.start()); 2627 } 2628 // Even if this task aborted while scanning the humongous object 2629 // we can (and should) give up the current region. 2630 giveup_current_region(); 2631 regular_clock_call(); 2632 } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) { 2633 giveup_current_region(); 2634 regular_clock_call(); 2635 } else { 2636 assert(has_aborted(), "currently the only way to do so"); 2637 // The only way to abort the bitmap iteration is to return 2638 // false from the do_bit() method. However, inside the 2639 // do_bit() method we move the _finger to point to the 2640 // object currently being looked at. So, if we bail out, we 2641 // have definitely set _finger to something non-null. 2642 assert(_finger != NULL, "invariant"); 2643 2644 // Region iteration was actually aborted. So now _finger 2645 // points to the address of the object we last scanned. If we 2646 // leave it there, when we restart this task, we will rescan 2647 // the object. It is easy to avoid this. We move the finger by 2648 // enough to point to the next possible object header. 2649 assert(_finger < _region_limit, "invariant"); 2650 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2651 // Check if bitmap iteration was aborted while scanning the last object 2652 if (new_finger >= _region_limit) { 2653 giveup_current_region(); 2654 } else { 2655 move_finger_to(new_finger); 2656 } 2657 } 2658 } 2659 // At this point we have either completed iterating over the 2660 // region we were holding on to, or we have aborted. 2661 2662 // We then partially drain the local queue and the global stack. 2663 // (Do we really need this?) 2664 drain_local_queue(true); 2665 drain_global_stack(true); 2666 2667 // Read the note on the claim_region() method on why it might 2668 // return NULL with potentially more regions available for 2669 // claiming and why we have to check out_of_regions() to determine 2670 // whether we're done or not. 2671 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2672 // We are going to try to claim a new region. We should have 2673 // given up on the previous one. 2674 // Separated the asserts so that we know which one fires. 2675 assert(_curr_region == NULL, "invariant"); 2676 assert(_finger == NULL, "invariant"); 2677 assert(_region_limit == NULL, "invariant"); 2678 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2679 if (claimed_region != NULL) { 2680 // Yes, we managed to claim one 2681 setup_for_region(claimed_region); 2682 assert(_curr_region == claimed_region, "invariant"); 2683 } 2684 // It is important to call the regular clock here. It might take 2685 // a while to claim a region if, for example, we hit a large 2686 // block of empty regions. So we need to call the regular clock 2687 // method once round the loop to make sure it's called 2688 // frequently enough. 2689 regular_clock_call(); 2690 } 2691 2692 if (!has_aborted() && _curr_region == NULL) { 2693 assert(_cm->out_of_regions(), 2694 "at this point we should be out of regions"); 2695 } 2696 } while ( _curr_region != NULL && !has_aborted()); 2697 2698 if (!has_aborted()) { 2699 // We cannot check whether the global stack is empty, since other 2700 // tasks might be pushing objects to it concurrently. 2701 assert(_cm->out_of_regions(), 2702 "at this point we should be out of regions"); 2703 // Try to reduce the number of available SATB buffers so that 2704 // remark has less work to do. 2705 drain_satb_buffers(); 2706 } 2707 2708 // Since we've done everything else, we can now totally drain the 2709 // local queue and global stack. 2710 drain_local_queue(false); 2711 drain_global_stack(false); 2712 2713 // Attempt at work stealing from other task's queues. 2714 if (do_stealing && !has_aborted()) { 2715 // We have not aborted. This means that we have finished all that 2716 // we could. Let's try to do some stealing... 2717 2718 // We cannot check whether the global stack is empty, since other 2719 // tasks might be pushing objects to it concurrently. 2720 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2721 "only way to reach here"); 2722 while (!has_aborted()) { 2723 G1TaskQueueEntry entry; 2724 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2725 scan_task_entry(entry); 2726 2727 // And since we're towards the end, let's totally drain the 2728 // local queue and global stack. 2729 drain_local_queue(false); 2730 drain_global_stack(false); 2731 } else { 2732 break; 2733 } 2734 } 2735 } 2736 2737 // We still haven't aborted. Now, let's try to get into the 2738 // termination protocol. 2739 if (do_termination && !has_aborted()) { 2740 // We cannot check whether the global stack is empty, since other 2741 // tasks might be concurrently pushing objects on it. 2742 // Separated the asserts so that we know which one fires. 2743 assert(_cm->out_of_regions(), "only way to reach here"); 2744 assert(_task_queue->size() == 0, "only way to reach here"); 2745 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2746 2747 // The G1CMTask class also extends the TerminatorTerminator class, 2748 // hence its should_exit_termination() method will also decide 2749 // whether to exit the termination protocol or not. 2750 bool finished = (is_serial || 2751 _cm->terminator()->offer_termination(this)); 2752 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2753 _termination_time_ms += 2754 termination_end_time_ms - _termination_start_time_ms; 2755 2756 if (finished) { 2757 // We're all done. 2758 2759 // We can now guarantee that the global stack is empty, since 2760 // all other tasks have finished. We separated the guarantees so 2761 // that, if a condition is false, we can immediately find out 2762 // which one. 2763 guarantee(_cm->out_of_regions(), "only way to reach here"); 2764 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2765 guarantee(_task_queue->size() == 0, "only way to reach here"); 2766 guarantee(!_cm->has_overflown(), "only way to reach here"); 2767 } else { 2768 // Apparently there's more work to do. Let's abort this task. It 2769 // will restart it and we can hopefully find more things to do. 2770 set_has_aborted(); 2771 } 2772 } 2773 2774 // Mainly for debugging purposes to make sure that a pointer to the 2775 // closure which was statically allocated in this frame doesn't 2776 // escape it by accident. 2777 set_cm_oop_closure(NULL); 2778 double end_time_ms = os::elapsedVTime() * 1000.0; 2779 double elapsed_time_ms = end_time_ms - _start_time_ms; 2780 // Update the step history. 2781 _step_times_ms.add(elapsed_time_ms); 2782 2783 if (has_aborted()) { 2784 // The task was aborted for some reason. 2785 if (_has_timed_out) { 2786 double diff_ms = elapsed_time_ms - _time_target_ms; 2787 // Keep statistics of how well we did with respect to hitting 2788 // our target only if we actually timed out (if we aborted for 2789 // other reasons, then the results might get skewed). 2790 _marking_step_diffs_ms.add(diff_ms); 2791 } 2792 2793 if (_cm->has_overflown()) { 2794 // This is the interesting one. We aborted because a global 2795 // overflow was raised. This means we have to restart the 2796 // marking phase and start iterating over regions. However, in 2797 // order to do this we have to make sure that all tasks stop 2798 // what they are doing and re-initialize in a safe manner. We 2799 // will achieve this with the use of two barrier sync points. 2800 2801 if (!is_serial) { 2802 // We only need to enter the sync barrier if being called 2803 // from a parallel context 2804 _cm->enter_first_sync_barrier(_worker_id); 2805 2806 // When we exit this sync barrier we know that all tasks have 2807 // stopped doing marking work. So, it's now safe to 2808 // re-initialize our data structures. 2809 } 2810 2811 clear_region_fields(); 2812 flush_mark_stats_cache(); 2813 2814 if (!is_serial) { 2815 // If we're executing the concurrent phase of marking, reset the marking 2816 // state; otherwise the marking state is reset after reference processing, 2817 // during the remark pause. 2818 // If we reset here as a result of an overflow during the remark we will 2819 // see assertion failures from any subsequent set_concurrency_and_phase() 2820 // calls. 2821 if (_cm->concurrent() && _worker_id == 0) { 2822 // Worker 0 is responsible for clearing the global data structures because 2823 // of an overflow. During STW we should not clear the overflow flag (in 2824 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit 2825 // method to abort the pause and restart concurrent marking. 2826 _cm->reset_marking_for_restart(); 2827 2828 log_info(gc, marking)("Concurrent Mark reset for overflow"); 2829 } 2830 2831 // ...and enter the second barrier. 2832 _cm->enter_second_sync_barrier(_worker_id); 2833 } 2834 // At this point, if we're during the concurrent phase of 2835 // marking, everything has been re-initialized and we're 2836 // ready to restart. 2837 } 2838 } 2839 } 2840 2841 G1CMTask::G1CMTask(uint worker_id, 2842 G1ConcurrentMark* cm, 2843 G1CMTaskQueue* task_queue, 2844 G1RegionMarkStats* mark_stats, 2845 uint max_regions) : 2846 _objArray_processor(this), 2847 _worker_id(worker_id), 2848 _g1h(G1CollectedHeap::heap()), 2849 _cm(cm), 2850 _next_mark_bitmap(NULL), 2851 _task_queue(task_queue), 2852 _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize), 2853 _calls(0), 2854 _time_target_ms(0.0), 2855 _start_time_ms(0.0), 2856 _cm_oop_closure(NULL), 2857 _curr_region(NULL), 2858 _finger(NULL), 2859 _region_limit(NULL), 2860 _words_scanned(0), 2861 _words_scanned_limit(0), 2862 _real_words_scanned_limit(0), 2863 _refs_reached(0), 2864 _refs_reached_limit(0), 2865 _real_refs_reached_limit(0), 2866 _hash_seed(17), 2867 _has_aborted(false), 2868 _has_timed_out(false), 2869 _draining_satb_buffers(false), 2870 _step_times_ms(), 2871 _elapsed_time_ms(0.0), 2872 _termination_time_ms(0.0), 2873 _termination_start_time_ms(0.0), 2874 _marking_step_diffs_ms() 2875 { 2876 guarantee(task_queue != NULL, "invariant"); 2877 2878 _marking_step_diffs_ms.add(0.5); 2879 } 2880 2881 // These are formatting macros that are used below to ensure 2882 // consistent formatting. The *_H_* versions are used to format the 2883 // header for a particular value and they should be kept consistent 2884 // with the corresponding macro. Also note that most of the macros add 2885 // the necessary white space (as a prefix) which makes them a bit 2886 // easier to compose. 2887 2888 // All the output lines are prefixed with this string to be able to 2889 // identify them easily in a large log file. 2890 #define G1PPRL_LINE_PREFIX "###" 2891 2892 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2893 #ifdef _LP64 2894 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2895 #else // _LP64 2896 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2897 #endif // _LP64 2898 2899 // For per-region info 2900 #define G1PPRL_TYPE_FORMAT " %-4s" 2901 #define G1PPRL_TYPE_H_FORMAT " %4s" 2902 #define G1PPRL_STATE_FORMAT " %-5s" 2903 #define G1PPRL_STATE_H_FORMAT " %5s" 2904 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2905 #define G1PPRL_BYTE_H_FORMAT " %9s" 2906 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2907 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2908 2909 // For summary info 2910 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2911 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2912 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2913 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2914 2915 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) : 2916 _total_used_bytes(0), _total_capacity_bytes(0), 2917 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2918 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) 2919 { 2920 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2921 MemRegion g1_reserved = g1h->g1_reserved(); 2922 double now = os::elapsedTime(); 2923 2924 // Print the header of the output. 2925 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2926 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2927 G1PPRL_SUM_ADDR_FORMAT("reserved") 2928 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2929 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2930 HeapRegion::GrainBytes); 2931 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2932 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2933 G1PPRL_TYPE_H_FORMAT 2934 G1PPRL_ADDR_BASE_H_FORMAT 2935 G1PPRL_BYTE_H_FORMAT 2936 G1PPRL_BYTE_H_FORMAT 2937 G1PPRL_BYTE_H_FORMAT 2938 G1PPRL_DOUBLE_H_FORMAT 2939 G1PPRL_BYTE_H_FORMAT 2940 G1PPRL_STATE_H_FORMAT 2941 G1PPRL_BYTE_H_FORMAT, 2942 "type", "address-range", 2943 "used", "prev-live", "next-live", "gc-eff", 2944 "remset", "state", "code-roots"); 2945 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2946 G1PPRL_TYPE_H_FORMAT 2947 G1PPRL_ADDR_BASE_H_FORMAT 2948 G1PPRL_BYTE_H_FORMAT 2949 G1PPRL_BYTE_H_FORMAT 2950 G1PPRL_BYTE_H_FORMAT 2951 G1PPRL_DOUBLE_H_FORMAT 2952 G1PPRL_BYTE_H_FORMAT 2953 G1PPRL_STATE_H_FORMAT 2954 G1PPRL_BYTE_H_FORMAT, 2955 "", "", 2956 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2957 "(bytes)", "", "(bytes)"); 2958 } 2959 2960 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) { 2961 const char* type = r->get_type_str(); 2962 HeapWord* bottom = r->bottom(); 2963 HeapWord* end = r->end(); 2964 size_t capacity_bytes = r->capacity(); 2965 size_t used_bytes = r->used(); 2966 size_t prev_live_bytes = r->live_bytes(); 2967 size_t next_live_bytes = r->next_live_bytes(); 2968 double gc_eff = r->gc_efficiency(); 2969 size_t remset_bytes = r->rem_set()->mem_size(); 2970 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2971 const char* remset_type = r->rem_set()->get_short_state_str(); 2972 2973 _total_used_bytes += used_bytes; 2974 _total_capacity_bytes += capacity_bytes; 2975 _total_prev_live_bytes += prev_live_bytes; 2976 _total_next_live_bytes += next_live_bytes; 2977 _total_remset_bytes += remset_bytes; 2978 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2979 2980 // Print a line for this particular region. 2981 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2982 G1PPRL_TYPE_FORMAT 2983 G1PPRL_ADDR_BASE_FORMAT 2984 G1PPRL_BYTE_FORMAT 2985 G1PPRL_BYTE_FORMAT 2986 G1PPRL_BYTE_FORMAT 2987 G1PPRL_DOUBLE_FORMAT 2988 G1PPRL_BYTE_FORMAT 2989 G1PPRL_STATE_FORMAT 2990 G1PPRL_BYTE_FORMAT, 2991 type, p2i(bottom), p2i(end), 2992 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2993 remset_bytes, remset_type, strong_code_roots_bytes); 2994 2995 return false; 2996 } 2997 2998 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 2999 // add static memory usages to remembered set sizes 3000 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3001 // Print the footer of the output. 3002 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3003 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3004 " SUMMARY" 3005 G1PPRL_SUM_MB_FORMAT("capacity") 3006 G1PPRL_SUM_MB_PERC_FORMAT("used") 3007 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3008 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3009 G1PPRL_SUM_MB_FORMAT("remset") 3010 G1PPRL_SUM_MB_FORMAT("code-roots"), 3011 bytes_to_mb(_total_capacity_bytes), 3012 bytes_to_mb(_total_used_bytes), 3013 percent_of(_total_used_bytes, _total_capacity_bytes), 3014 bytes_to_mb(_total_prev_live_bytes), 3015 percent_of(_total_prev_live_bytes, _total_capacity_bytes), 3016 bytes_to_mb(_total_next_live_bytes), 3017 percent_of(_total_next_live_bytes, _total_capacity_bytes), 3018 bytes_to_mb(_total_remset_bytes), 3019 bytes_to_mb(_total_strong_code_roots_bytes)); 3020 }