1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "gc/shared/weakProcessor.hpp" 52 #include "logging/log.hpp" 53 #include "memory/allocation.hpp" 54 #include "memory/resourceArea.hpp" 55 #include "oops/oop.inline.hpp" 56 #include "runtime/atomic.hpp" 57 #include "runtime/handles.inline.hpp" 58 #include "runtime/java.hpp" 59 #include "runtime/prefetch.inline.hpp" 60 #include "services/memTracker.hpp" 61 #include "utilities/align.hpp" 62 #include "utilities/growableArray.hpp" 63 64 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 65 assert(addr < _cm->finger(), "invariant"); 66 assert(addr >= _task->finger(), "invariant"); 67 68 // We move that task's local finger along. 69 _task->move_finger_to(addr); 70 71 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 72 // we only partially drain the local queue and global stack 73 _task->drain_local_queue(true); 74 _task->drain_global_stack(true); 75 76 // if the has_aborted flag has been raised, we need to bail out of 77 // the iteration 78 return !_task->has_aborted(); 79 } 80 81 G1CMMarkStack::G1CMMarkStack() : 82 _max_chunk_capacity(0), 83 _base(NULL), 84 _chunk_capacity(0) { 85 set_empty(); 86 } 87 88 bool G1CMMarkStack::resize(size_t new_capacity) { 89 assert(is_empty(), "Only resize when stack is empty."); 90 assert(new_capacity <= _max_chunk_capacity, 91 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 92 93 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 94 95 if (new_base == NULL) { 96 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 97 return false; 98 } 99 // Release old mapping. 100 if (_base != NULL) { 101 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 102 } 103 104 _base = new_base; 105 _chunk_capacity = new_capacity; 106 set_empty(); 107 108 return true; 109 } 110 111 size_t G1CMMarkStack::capacity_alignment() { 112 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 113 } 114 115 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 116 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 117 118 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 119 120 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 121 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 122 123 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 124 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 125 _max_chunk_capacity, 126 initial_chunk_capacity); 127 128 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 129 initial_chunk_capacity, _max_chunk_capacity); 130 131 return resize(initial_chunk_capacity); 132 } 133 134 void G1CMMarkStack::expand() { 135 if (_chunk_capacity == _max_chunk_capacity) { 136 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 137 return; 138 } 139 size_t old_capacity = _chunk_capacity; 140 // Double capacity if possible 141 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 142 143 if (resize(new_capacity)) { 144 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 145 old_capacity, new_capacity); 146 } else { 147 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 148 old_capacity, new_capacity); 149 } 150 } 151 152 G1CMMarkStack::~G1CMMarkStack() { 153 if (_base != NULL) { 154 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 155 } 156 } 157 158 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 159 elem->next = *list; 160 *list = elem; 161 } 162 163 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 164 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 165 add_chunk_to_list(&_chunk_list, elem); 166 _chunks_in_chunk_list++; 167 } 168 169 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 170 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 171 add_chunk_to_list(&_free_list, elem); 172 } 173 174 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 175 TaskQueueEntryChunk* result = *list; 176 if (result != NULL) { 177 *list = (*list)->next; 178 } 179 return result; 180 } 181 182 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 183 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 184 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 185 if (result != NULL) { 186 _chunks_in_chunk_list--; 187 } 188 return result; 189 } 190 191 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 192 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 193 return remove_chunk_from_list(&_free_list); 194 } 195 196 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 197 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 198 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 199 // wraparound of _hwm. 200 if (_hwm >= _chunk_capacity) { 201 return NULL; 202 } 203 204 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 205 if (cur_idx >= _chunk_capacity) { 206 return NULL; 207 } 208 209 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 210 result->next = NULL; 211 return result; 212 } 213 214 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 215 // Get a new chunk. 216 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 217 218 if (new_chunk == NULL) { 219 // Did not get a chunk from the free list. Allocate from backing memory. 220 new_chunk = allocate_new_chunk(); 221 222 if (new_chunk == NULL) { 223 return false; 224 } 225 } 226 227 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 228 229 add_chunk_to_chunk_list(new_chunk); 230 231 return true; 232 } 233 234 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 235 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 236 237 if (cur == NULL) { 238 return false; 239 } 240 241 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 242 243 add_chunk_to_free_list(cur); 244 return true; 245 } 246 247 void G1CMMarkStack::set_empty() { 248 _chunks_in_chunk_list = 0; 249 _hwm = 0; 250 _chunk_list = NULL; 251 _free_list = NULL; 252 } 253 254 G1CMRootRegions::G1CMRootRegions() : 255 _cm(NULL), _scan_in_progress(false), 256 _should_abort(false), _claimed_survivor_index(0) { } 257 258 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 259 _survivors = survivors; 260 _cm = cm; 261 } 262 263 void G1CMRootRegions::prepare_for_scan() { 264 assert(!scan_in_progress(), "pre-condition"); 265 266 // Currently, only survivors can be root regions. 267 _claimed_survivor_index = 0; 268 _scan_in_progress = _survivors->regions()->is_nonempty(); 269 _should_abort = false; 270 } 271 272 HeapRegion* G1CMRootRegions::claim_next() { 273 if (_should_abort) { 274 // If someone has set the should_abort flag, we return NULL to 275 // force the caller to bail out of their loop. 276 return NULL; 277 } 278 279 // Currently, only survivors can be root regions. 280 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 281 282 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 283 if (claimed_index < survivor_regions->length()) { 284 return survivor_regions->at(claimed_index); 285 } 286 return NULL; 287 } 288 289 uint G1CMRootRegions::num_root_regions() const { 290 return (uint)_survivors->regions()->length(); 291 } 292 293 void G1CMRootRegions::notify_scan_done() { 294 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 295 _scan_in_progress = false; 296 RootRegionScan_lock->notify_all(); 297 } 298 299 void G1CMRootRegions::cancel_scan() { 300 notify_scan_done(); 301 } 302 303 void G1CMRootRegions::scan_finished() { 304 assert(scan_in_progress(), "pre-condition"); 305 306 // Currently, only survivors can be root regions. 307 if (!_should_abort) { 308 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 309 assert((uint)_claimed_survivor_index >= _survivors->length(), 310 "we should have claimed all survivors, claimed index = %u, length = %u", 311 (uint)_claimed_survivor_index, _survivors->length()); 312 } 313 314 notify_scan_done(); 315 } 316 317 bool G1CMRootRegions::wait_until_scan_finished() { 318 if (!scan_in_progress()) return false; 319 320 { 321 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 322 while (scan_in_progress()) { 323 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 324 } 325 } 326 return true; 327 } 328 329 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 330 return MAX2((n_par_threads + 2) / 4, 1U); 331 } 332 333 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 334 _g1h(g1h), 335 _markBitMap1(), 336 _markBitMap2(), 337 _parallel_marking_threads(0), 338 _max_parallel_marking_threads(0), 339 _sleep_factor(0.0), 340 _marking_task_overhead(1.0), 341 _cleanup_list("Cleanup List"), 342 343 _prevMarkBitMap(&_markBitMap1), 344 _nextMarkBitMap(&_markBitMap2), 345 346 _global_mark_stack(), 347 // _finger set in set_non_marking_state 348 349 _max_worker_id(ParallelGCThreads), 350 // _active_tasks set in set_non_marking_state 351 // _tasks set inside the constructor 352 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 353 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 354 355 _has_overflown(false), 356 _concurrent(false), 357 _has_aborted(false), 358 _restart_for_overflow(false), 359 _concurrent_marking_in_progress(false), 360 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 361 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 362 363 // _verbose_level set below 364 365 _init_times(), 366 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 367 _cleanup_times(), 368 _total_counting_time(0.0), 369 _total_rs_scrub_time(0.0), 370 371 _parallel_workers(NULL), 372 373 _completed_initialization(false) { 374 375 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 376 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 377 378 // Create & start a ConcurrentMark thread. 379 _cmThread = new ConcurrentMarkThread(this); 380 assert(cmThread() != NULL, "CM Thread should have been created"); 381 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 382 if (_cmThread->osthread() == NULL) { 383 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 384 } 385 386 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 387 388 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 389 satb_qs.set_buffer_size(G1SATBBufferSize); 390 391 _root_regions.init(_g1h->survivor(), this); 392 393 if (ConcGCThreads > ParallelGCThreads) { 394 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 395 ConcGCThreads, ParallelGCThreads); 396 return; 397 } 398 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 399 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 400 // if both are set 401 _sleep_factor = 0.0; 402 _marking_task_overhead = 1.0; 403 } else if (G1MarkingOverheadPercent > 0) { 404 // We will calculate the number of parallel marking threads based 405 // on a target overhead with respect to the soft real-time goal 406 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 407 double overall_cm_overhead = 408 (double) MaxGCPauseMillis * marking_overhead / 409 (double) GCPauseIntervalMillis; 410 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 411 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 412 double marking_task_overhead = 413 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 414 double sleep_factor = 415 (1.0 - marking_task_overhead) / marking_task_overhead; 416 417 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 418 _sleep_factor = sleep_factor; 419 _marking_task_overhead = marking_task_overhead; 420 } else { 421 // Calculate the number of parallel marking threads by scaling 422 // the number of parallel GC threads. 423 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 424 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 425 _sleep_factor = 0.0; 426 _marking_task_overhead = 1.0; 427 } 428 429 assert(ConcGCThreads > 0, "Should have been set"); 430 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 431 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 432 _parallel_marking_threads = ConcGCThreads; 433 _max_parallel_marking_threads = _parallel_marking_threads; 434 435 _parallel_workers = new WorkGang("G1 Marker", 436 _max_parallel_marking_threads, false, true); 437 if (_parallel_workers == NULL) { 438 vm_exit_during_initialization("Failed necessary allocation."); 439 } else { 440 _parallel_workers->initialize_workers(); 441 } 442 443 if (FLAG_IS_DEFAULT(MarkStackSize)) { 444 size_t mark_stack_size = 445 MIN2(MarkStackSizeMax, 446 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 447 // Verify that the calculated value for MarkStackSize is in range. 448 // It would be nice to use the private utility routine from Arguments. 449 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 450 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 451 "must be between 1 and " SIZE_FORMAT, 452 mark_stack_size, MarkStackSizeMax); 453 return; 454 } 455 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 456 } else { 457 // Verify MarkStackSize is in range. 458 if (FLAG_IS_CMDLINE(MarkStackSize)) { 459 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 460 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 461 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 462 "must be between 1 and " SIZE_FORMAT, 463 MarkStackSize, MarkStackSizeMax); 464 return; 465 } 466 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 467 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 468 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 469 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 470 MarkStackSize, MarkStackSizeMax); 471 return; 472 } 473 } 474 } 475 } 476 477 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 478 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 479 } 480 481 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 482 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 483 484 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 485 _active_tasks = _max_worker_id; 486 487 for (uint i = 0; i < _max_worker_id; ++i) { 488 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 489 task_queue->initialize(); 490 _task_queues->register_queue(i, task_queue); 491 492 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 493 494 _accum_task_vtime[i] = 0.0; 495 } 496 497 // so that the call below can read a sensible value 498 _heap_start = g1h->reserved_region().start(); 499 set_non_marking_state(); 500 _completed_initialization = true; 501 } 502 503 void G1ConcurrentMark::reset() { 504 // Starting values for these two. This should be called in a STW 505 // phase. 506 MemRegion reserved = _g1h->g1_reserved(); 507 _heap_start = reserved.start(); 508 _heap_end = reserved.end(); 509 510 // Separated the asserts so that we know which one fires. 511 assert(_heap_start != NULL, "heap bounds should look ok"); 512 assert(_heap_end != NULL, "heap bounds should look ok"); 513 assert(_heap_start < _heap_end, "heap bounds should look ok"); 514 515 // Reset all the marking data structures and any necessary flags 516 reset_marking_state(); 517 518 // We do reset all of them, since different phases will use 519 // different number of active threads. So, it's easiest to have all 520 // of them ready. 521 for (uint i = 0; i < _max_worker_id; ++i) { 522 _tasks[i]->reset(_nextMarkBitMap); 523 } 524 525 // we need this to make sure that the flag is on during the evac 526 // pause with initial mark piggy-backed 527 set_concurrent_marking_in_progress(); 528 } 529 530 531 void G1ConcurrentMark::reset_marking_state() { 532 _global_mark_stack.set_empty(); 533 534 // Expand the marking stack, if we have to and if we can. 535 if (has_overflown()) { 536 _global_mark_stack.expand(); 537 } 538 539 clear_has_overflown(); 540 _finger = _heap_start; 541 542 for (uint i = 0; i < _max_worker_id; ++i) { 543 G1CMTaskQueue* queue = _task_queues->queue(i); 544 queue->set_empty(); 545 } 546 } 547 548 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 549 assert(active_tasks <= _max_worker_id, "we should not have more"); 550 551 _active_tasks = active_tasks; 552 // Need to update the three data structures below according to the 553 // number of active threads for this phase. 554 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 555 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 556 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 557 } 558 559 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 560 set_concurrency(active_tasks); 561 562 _concurrent = concurrent; 563 // We propagate this to all tasks, not just the active ones. 564 for (uint i = 0; i < _max_worker_id; ++i) 565 _tasks[i]->set_concurrent(concurrent); 566 567 if (concurrent) { 568 set_concurrent_marking_in_progress(); 569 } else { 570 // We currently assume that the concurrent flag has been set to 571 // false before we start remark. At this point we should also be 572 // in a STW phase. 573 assert(!concurrent_marking_in_progress(), "invariant"); 574 assert(out_of_regions(), 575 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 576 p2i(_finger), p2i(_heap_end)); 577 } 578 } 579 580 void G1ConcurrentMark::set_non_marking_state() { 581 // We set the global marking state to some default values when we're 582 // not doing marking. 583 reset_marking_state(); 584 _active_tasks = 0; 585 clear_concurrent_marking_in_progress(); 586 } 587 588 G1ConcurrentMark::~G1ConcurrentMark() { 589 // The G1ConcurrentMark instance is never freed. 590 ShouldNotReachHere(); 591 } 592 593 class G1ClearBitMapTask : public AbstractGangTask { 594 public: 595 static size_t chunk_size() { return M; } 596 597 private: 598 // Heap region closure used for clearing the given mark bitmap. 599 class G1ClearBitmapHRClosure : public HeapRegionClosure { 600 private: 601 G1CMBitMap* _bitmap; 602 G1ConcurrentMark* _cm; 603 public: 604 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 605 } 606 607 virtual bool doHeapRegion(HeapRegion* r) { 608 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 609 610 HeapWord* cur = r->bottom(); 611 HeapWord* const end = r->end(); 612 613 while (cur < end) { 614 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 615 _bitmap->clear_range(mr); 616 617 cur += chunk_size_in_words; 618 619 // Abort iteration if after yielding the marking has been aborted. 620 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 621 return true; 622 } 623 // Repeat the asserts from before the start of the closure. We will do them 624 // as asserts here to minimize their overhead on the product. However, we 625 // will have them as guarantees at the beginning / end of the bitmap 626 // clearing to get some checking in the product. 627 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 628 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 629 } 630 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 631 632 return false; 633 } 634 }; 635 636 G1ClearBitmapHRClosure _cl; 637 HeapRegionClaimer _hr_claimer; 638 bool _suspendible; // If the task is suspendible, workers must join the STS. 639 640 public: 641 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 642 AbstractGangTask("G1 Clear Bitmap"), 643 _cl(bitmap, suspendible ? cm : NULL), 644 _hr_claimer(n_workers), 645 _suspendible(suspendible) 646 { } 647 648 void work(uint worker_id) { 649 SuspendibleThreadSetJoiner sts_join(_suspendible); 650 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer); 651 } 652 653 bool is_complete() { 654 return _cl.complete(); 655 } 656 }; 657 658 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 659 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 660 661 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 662 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 663 664 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 665 666 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 667 668 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 669 workers->run_task(&cl, num_workers); 670 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 671 } 672 673 void G1ConcurrentMark::cleanup_for_next_mark() { 674 // Make sure that the concurrent mark thread looks to still be in 675 // the current cycle. 676 guarantee(cmThread()->during_cycle(), "invariant"); 677 678 // We are finishing up the current cycle by clearing the next 679 // marking bitmap and getting it ready for the next cycle. During 680 // this time no other cycle can start. So, let's make sure that this 681 // is the case. 682 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 683 684 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 685 686 // Clear the live count data. If the marking has been aborted, the abort() 687 // call already did that. 688 if (!has_aborted()) { 689 clear_live_data(_parallel_workers); 690 DEBUG_ONLY(verify_live_data_clear()); 691 } 692 693 // Repeat the asserts from above. 694 guarantee(cmThread()->during_cycle(), "invariant"); 695 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 696 } 697 698 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 699 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 700 clear_bitmap(_prevMarkBitMap, workers, false); 701 } 702 703 class CheckBitmapClearHRClosure : public HeapRegionClosure { 704 G1CMBitMap* _bitmap; 705 bool _error; 706 public: 707 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 708 } 709 710 virtual bool doHeapRegion(HeapRegion* r) { 711 // This closure can be called concurrently to the mutator, so we must make sure 712 // that the result of the getNextMarkedWordAddress() call is compared to the 713 // value passed to it as limit to detect any found bits. 714 // end never changes in G1. 715 HeapWord* end = r->end(); 716 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 717 } 718 }; 719 720 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 721 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 722 _g1h->heap_region_iterate(&cl); 723 return cl.complete(); 724 } 725 726 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 727 public: 728 bool doHeapRegion(HeapRegion* r) { 729 r->note_start_of_marking(); 730 return false; 731 } 732 }; 733 734 void G1ConcurrentMark::checkpointRootsInitialPre() { 735 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 736 737 _has_aborted = false; 738 739 // Initialize marking structures. This has to be done in a STW phase. 740 reset(); 741 742 // For each region note start of marking. 743 NoteStartOfMarkHRClosure startcl; 744 g1h->heap_region_iterate(&startcl); 745 } 746 747 748 void G1ConcurrentMark::checkpointRootsInitialPost() { 749 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 750 751 // Start Concurrent Marking weak-reference discovery. 752 ReferenceProcessor* rp = g1h->ref_processor_cm(); 753 // enable ("weak") refs discovery 754 rp->enable_discovery(); 755 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 756 757 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 758 // This is the start of the marking cycle, we're expected all 759 // threads to have SATB queues with active set to false. 760 satb_mq_set.set_active_all_threads(true, /* new active value */ 761 false /* expected_active */); 762 763 _root_regions.prepare_for_scan(); 764 765 // update_g1_committed() will be called at the end of an evac pause 766 // when marking is on. So, it's also called at the end of the 767 // initial-mark pause to update the heap end, if the heap expands 768 // during it. No need to call it here. 769 } 770 771 /* 772 * Notice that in the next two methods, we actually leave the STS 773 * during the barrier sync and join it immediately afterwards. If we 774 * do not do this, the following deadlock can occur: one thread could 775 * be in the barrier sync code, waiting for the other thread to also 776 * sync up, whereas another one could be trying to yield, while also 777 * waiting for the other threads to sync up too. 778 * 779 * Note, however, that this code is also used during remark and in 780 * this case we should not attempt to leave / enter the STS, otherwise 781 * we'll either hit an assert (debug / fastdebug) or deadlock 782 * (product). So we should only leave / enter the STS if we are 783 * operating concurrently. 784 * 785 * Because the thread that does the sync barrier has left the STS, it 786 * is possible to be suspended for a Full GC or an evacuation pause 787 * could occur. This is actually safe, since the entering the sync 788 * barrier is one of the last things do_marking_step() does, and it 789 * doesn't manipulate any data structures afterwards. 790 */ 791 792 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 793 bool barrier_aborted; 794 { 795 SuspendibleThreadSetLeaver sts_leave(concurrent()); 796 barrier_aborted = !_first_overflow_barrier_sync.enter(); 797 } 798 799 // at this point everyone should have synced up and not be doing any 800 // more work 801 802 if (barrier_aborted) { 803 // If the barrier aborted we ignore the overflow condition and 804 // just abort the whole marking phase as quickly as possible. 805 return; 806 } 807 808 // If we're executing the concurrent phase of marking, reset the marking 809 // state; otherwise the marking state is reset after reference processing, 810 // during the remark pause. 811 // If we reset here as a result of an overflow during the remark we will 812 // see assertion failures from any subsequent set_concurrency_and_phase() 813 // calls. 814 if (concurrent()) { 815 // let the task associated with with worker 0 do this 816 if (worker_id == 0) { 817 // task 0 is responsible for clearing the global data structures 818 // We should be here because of an overflow. During STW we should 819 // not clear the overflow flag since we rely on it being true when 820 // we exit this method to abort the pause and restart concurrent 821 // marking. 822 reset_marking_state(); 823 824 log_info(gc, marking)("Concurrent Mark reset for overflow"); 825 } 826 } 827 828 // after this, each task should reset its own data structures then 829 // then go into the second barrier 830 } 831 832 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 833 SuspendibleThreadSetLeaver sts_leave(concurrent()); 834 _second_overflow_barrier_sync.enter(); 835 836 // at this point everything should be re-initialized and ready to go 837 } 838 839 class G1CMConcurrentMarkingTask: public AbstractGangTask { 840 private: 841 G1ConcurrentMark* _cm; 842 ConcurrentMarkThread* _cmt; 843 844 public: 845 void work(uint worker_id) { 846 assert(Thread::current()->is_ConcurrentGC_thread(), 847 "this should only be done by a conc GC thread"); 848 ResourceMark rm; 849 850 double start_vtime = os::elapsedVTime(); 851 852 { 853 SuspendibleThreadSetJoiner sts_join; 854 855 assert(worker_id < _cm->active_tasks(), "invariant"); 856 G1CMTask* the_task = _cm->task(worker_id); 857 the_task->record_start_time(); 858 if (!_cm->has_aborted()) { 859 do { 860 double start_vtime_sec = os::elapsedVTime(); 861 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 862 863 the_task->do_marking_step(mark_step_duration_ms, 864 true /* do_termination */, 865 false /* is_serial*/); 866 867 double end_vtime_sec = os::elapsedVTime(); 868 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 869 _cm->do_yield_check(); 870 871 jlong sleep_time_ms; 872 if (!_cm->has_aborted() && the_task->has_aborted()) { 873 sleep_time_ms = 874 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 875 { 876 SuspendibleThreadSetLeaver sts_leave; 877 os::sleep(Thread::current(), sleep_time_ms, false); 878 } 879 } 880 } while (!_cm->has_aborted() && the_task->has_aborted()); 881 } 882 the_task->record_end_time(); 883 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 884 } 885 886 double end_vtime = os::elapsedVTime(); 887 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 888 } 889 890 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 891 ConcurrentMarkThread* cmt) : 892 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 893 894 ~G1CMConcurrentMarkingTask() { } 895 }; 896 897 // Calculates the number of active workers for a concurrent 898 // phase. 899 uint G1ConcurrentMark::calc_parallel_marking_threads() { 900 uint n_conc_workers = 0; 901 if (!UseDynamicNumberOfGCThreads || 902 (!FLAG_IS_DEFAULT(ConcGCThreads) && 903 !ForceDynamicNumberOfGCThreads)) { 904 n_conc_workers = max_parallel_marking_threads(); 905 } else { 906 n_conc_workers = 907 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 908 1, /* Minimum workers */ 909 parallel_marking_threads(), 910 Threads::number_of_non_daemon_threads()); 911 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 912 // that scaling has already gone into "_max_parallel_marking_threads". 913 } 914 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 915 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 916 max_parallel_marking_threads(), n_conc_workers); 917 return n_conc_workers; 918 } 919 920 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 921 // Currently, only survivors can be root regions. 922 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 923 G1RootRegionScanClosure cl(_g1h, this); 924 925 const uintx interval = PrefetchScanIntervalInBytes; 926 HeapWord* curr = hr->bottom(); 927 const HeapWord* end = hr->top(); 928 while (curr < end) { 929 Prefetch::read(curr, interval); 930 oop obj = oop(curr); 931 int size = obj->oop_iterate_size(&cl); 932 assert(size == obj->size(), "sanity"); 933 curr += size; 934 } 935 } 936 937 class G1CMRootRegionScanTask : public AbstractGangTask { 938 private: 939 G1ConcurrentMark* _cm; 940 941 public: 942 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 943 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 944 945 void work(uint worker_id) { 946 assert(Thread::current()->is_ConcurrentGC_thread(), 947 "this should only be done by a conc GC thread"); 948 949 G1CMRootRegions* root_regions = _cm->root_regions(); 950 HeapRegion* hr = root_regions->claim_next(); 951 while (hr != NULL) { 952 _cm->scanRootRegion(hr); 953 hr = root_regions->claim_next(); 954 } 955 } 956 }; 957 958 void G1ConcurrentMark::scan_root_regions() { 959 // scan_in_progress() will have been set to true only if there was 960 // at least one root region to scan. So, if it's false, we 961 // should not attempt to do any further work. 962 if (root_regions()->scan_in_progress()) { 963 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 964 965 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 966 // We distribute work on a per-region basis, so starting 967 // more threads than that is useless. 968 root_regions()->num_root_regions()); 969 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 970 "Maximum number of marking threads exceeded"); 971 972 G1CMRootRegionScanTask task(this); 973 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 974 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 975 _parallel_workers->run_task(&task, _parallel_marking_threads); 976 977 // It's possible that has_aborted() is true here without actually 978 // aborting the survivor scan earlier. This is OK as it's 979 // mainly used for sanity checking. 980 root_regions()->scan_finished(); 981 } 982 } 983 984 void G1ConcurrentMark::concurrent_cycle_start() { 985 _gc_timer_cm->register_gc_start(); 986 987 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 988 989 _g1h->trace_heap_before_gc(_gc_tracer_cm); 990 } 991 992 void G1ConcurrentMark::concurrent_cycle_end() { 993 _g1h->trace_heap_after_gc(_gc_tracer_cm); 994 995 if (has_aborted()) { 996 _gc_tracer_cm->report_concurrent_mode_failure(); 997 } 998 999 _gc_timer_cm->register_gc_end(); 1000 1001 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 1002 } 1003 1004 void G1ConcurrentMark::mark_from_roots() { 1005 // we might be tempted to assert that: 1006 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1007 // "inconsistent argument?"); 1008 // However that wouldn't be right, because it's possible that 1009 // a safepoint is indeed in progress as a younger generation 1010 // stop-the-world GC happens even as we mark in this generation. 1011 1012 _restart_for_overflow = false; 1013 1014 // _g1h has _n_par_threads 1015 _parallel_marking_threads = calc_parallel_marking_threads(); 1016 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1017 "Maximum number of marking threads exceeded"); 1018 1019 uint active_workers = MAX2(1U, parallel_marking_threads()); 1020 assert(active_workers > 0, "Should have been set"); 1021 1022 // Setting active workers is not guaranteed since fewer 1023 // worker threads may currently exist and more may not be 1024 // available. 1025 active_workers = _parallel_workers->update_active_workers(active_workers); 1026 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1027 1028 // Parallel task terminator is set in "set_concurrency_and_phase()" 1029 set_concurrency_and_phase(active_workers, true /* concurrent */); 1030 1031 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1032 _parallel_workers->run_task(&markingTask); 1033 print_stats(); 1034 } 1035 1036 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1037 // world is stopped at this checkpoint 1038 assert(SafepointSynchronize::is_at_safepoint(), 1039 "world should be stopped"); 1040 1041 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1042 1043 // If a full collection has happened, we shouldn't do this. 1044 if (has_aborted()) { 1045 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1046 return; 1047 } 1048 1049 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1050 1051 if (VerifyDuringGC) { 1052 HandleMark hm; // handle scope 1053 g1h->prepare_for_verify(); 1054 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1055 } 1056 g1h->verifier()->check_bitmaps("Remark Start"); 1057 1058 G1Policy* g1p = g1h->g1_policy(); 1059 g1p->record_concurrent_mark_remark_start(); 1060 1061 double start = os::elapsedTime(); 1062 1063 checkpointRootsFinalWork(); 1064 1065 double mark_work_end = os::elapsedTime(); 1066 1067 weakRefsWork(clear_all_soft_refs); 1068 1069 if (has_overflown()) { 1070 // We overflowed. Restart concurrent marking. 1071 _restart_for_overflow = true; 1072 1073 // Verify the heap w.r.t. the previous marking bitmap. 1074 if (VerifyDuringGC) { 1075 HandleMark hm; // handle scope 1076 g1h->prepare_for_verify(); 1077 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1078 } 1079 1080 // Clear the marking state because we will be restarting 1081 // marking due to overflowing the global mark stack. 1082 reset_marking_state(); 1083 } else { 1084 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1085 // We're done with marking. 1086 // This is the end of the marking cycle, we're expected all 1087 // threads to have SATB queues with active set to true. 1088 satb_mq_set.set_active_all_threads(false, /* new active value */ 1089 true /* expected_active */); 1090 1091 if (VerifyDuringGC) { 1092 HandleMark hm; // handle scope 1093 g1h->prepare_for_verify(); 1094 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1095 } 1096 g1h->verifier()->check_bitmaps("Remark End"); 1097 assert(!restart_for_overflow(), "sanity"); 1098 // Completely reset the marking state since marking completed 1099 set_non_marking_state(); 1100 } 1101 1102 // Statistics 1103 double now = os::elapsedTime(); 1104 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1105 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1106 _remark_times.add((now - start) * 1000.0); 1107 1108 g1p->record_concurrent_mark_remark_end(); 1109 1110 G1CMIsAliveClosure is_alive(g1h); 1111 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1112 } 1113 1114 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1115 G1CollectedHeap* _g1; 1116 size_t _freed_bytes; 1117 FreeRegionList* _local_cleanup_list; 1118 uint _old_regions_removed; 1119 uint _humongous_regions_removed; 1120 HRRSCleanupTask* _hrrs_cleanup_task; 1121 1122 public: 1123 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1124 FreeRegionList* local_cleanup_list, 1125 HRRSCleanupTask* hrrs_cleanup_task) : 1126 _g1(g1), 1127 _freed_bytes(0), 1128 _local_cleanup_list(local_cleanup_list), 1129 _old_regions_removed(0), 1130 _humongous_regions_removed(0), 1131 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1132 1133 size_t freed_bytes() { return _freed_bytes; } 1134 const uint old_regions_removed() { return _old_regions_removed; } 1135 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1136 1137 bool doHeapRegion(HeapRegion *hr) { 1138 _g1->reset_gc_time_stamps(hr); 1139 hr->note_end_of_marking(); 1140 1141 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1142 _freed_bytes += hr->used(); 1143 hr->set_containing_set(NULL); 1144 if (hr->is_humongous()) { 1145 _humongous_regions_removed++; 1146 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1147 } else { 1148 _old_regions_removed++; 1149 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1150 } 1151 } else { 1152 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1153 } 1154 1155 return false; 1156 } 1157 }; 1158 1159 class G1ParNoteEndTask: public AbstractGangTask { 1160 friend class G1NoteEndOfConcMarkClosure; 1161 1162 protected: 1163 G1CollectedHeap* _g1h; 1164 FreeRegionList* _cleanup_list; 1165 HeapRegionClaimer _hrclaimer; 1166 1167 public: 1168 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1169 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1170 } 1171 1172 void work(uint worker_id) { 1173 FreeRegionList local_cleanup_list("Local Cleanup List"); 1174 HRRSCleanupTask hrrs_cleanup_task; 1175 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1176 &hrrs_cleanup_task); 1177 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1178 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1179 1180 // Now update the lists 1181 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1182 { 1183 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1184 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1185 1186 // If we iterate over the global cleanup list at the end of 1187 // cleanup to do this printing we will not guarantee to only 1188 // generate output for the newly-reclaimed regions (the list 1189 // might not be empty at the beginning of cleanup; we might 1190 // still be working on its previous contents). So we do the 1191 // printing here, before we append the new regions to the global 1192 // cleanup list. 1193 1194 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1195 if (hr_printer->is_active()) { 1196 FreeRegionListIterator iter(&local_cleanup_list); 1197 while (iter.more_available()) { 1198 HeapRegion* hr = iter.get_next(); 1199 hr_printer->cleanup(hr); 1200 } 1201 } 1202 1203 _cleanup_list->add_ordered(&local_cleanup_list); 1204 assert(local_cleanup_list.is_empty(), "post-condition"); 1205 1206 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1207 } 1208 } 1209 }; 1210 1211 void G1ConcurrentMark::cleanup() { 1212 // world is stopped at this checkpoint 1213 assert(SafepointSynchronize::is_at_safepoint(), 1214 "world should be stopped"); 1215 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1216 1217 // If a full collection has happened, we shouldn't do this. 1218 if (has_aborted()) { 1219 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1220 return; 1221 } 1222 1223 g1h->verifier()->verify_region_sets_optional(); 1224 1225 if (VerifyDuringGC) { 1226 HandleMark hm; // handle scope 1227 g1h->prepare_for_verify(); 1228 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1229 } 1230 g1h->verifier()->check_bitmaps("Cleanup Start"); 1231 1232 G1Policy* g1p = g1h->g1_policy(); 1233 g1p->record_concurrent_mark_cleanup_start(); 1234 1235 double start = os::elapsedTime(); 1236 1237 HeapRegionRemSet::reset_for_cleanup_tasks(); 1238 1239 { 1240 GCTraceTime(Debug, gc)("Finalize Live Data"); 1241 finalize_live_data(); 1242 } 1243 1244 if (VerifyDuringGC) { 1245 GCTraceTime(Debug, gc)("Verify Live Data"); 1246 verify_live_data(); 1247 } 1248 1249 g1h->collector_state()->set_mark_in_progress(false); 1250 1251 double count_end = os::elapsedTime(); 1252 double this_final_counting_time = (count_end - start); 1253 _total_counting_time += this_final_counting_time; 1254 1255 if (log_is_enabled(Trace, gc, liveness)) { 1256 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1257 _g1h->heap_region_iterate(&cl); 1258 } 1259 1260 // Install newly created mark bitMap as "prev". 1261 swapMarkBitMaps(); 1262 1263 g1h->reset_gc_time_stamp(); 1264 1265 uint n_workers = _g1h->workers()->active_workers(); 1266 1267 // Note end of marking in all heap regions. 1268 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1269 g1h->workers()->run_task(&g1_par_note_end_task); 1270 g1h->check_gc_time_stamps(); 1271 1272 if (!cleanup_list_is_empty()) { 1273 // The cleanup list is not empty, so we'll have to process it 1274 // concurrently. Notify anyone else that might be wanting free 1275 // regions that there will be more free regions coming soon. 1276 g1h->set_free_regions_coming(); 1277 } 1278 1279 // call below, since it affects the metric by which we sort the heap 1280 // regions. 1281 if (G1ScrubRemSets) { 1282 double rs_scrub_start = os::elapsedTime(); 1283 g1h->scrub_rem_set(); 1284 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1285 } 1286 1287 // this will also free any regions totally full of garbage objects, 1288 // and sort the regions. 1289 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1290 1291 // Statistics. 1292 double end = os::elapsedTime(); 1293 _cleanup_times.add((end - start) * 1000.0); 1294 1295 // Clean up will have freed any regions completely full of garbage. 1296 // Update the soft reference policy with the new heap occupancy. 1297 Universe::update_heap_info_at_gc(); 1298 1299 if (VerifyDuringGC) { 1300 HandleMark hm; // handle scope 1301 g1h->prepare_for_verify(); 1302 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1303 } 1304 1305 g1h->verifier()->check_bitmaps("Cleanup End"); 1306 1307 g1h->verifier()->verify_region_sets_optional(); 1308 1309 // We need to make this be a "collection" so any collection pause that 1310 // races with it goes around and waits for completeCleanup to finish. 1311 g1h->increment_total_collections(); 1312 1313 // Clean out dead classes and update Metaspace sizes. 1314 if (ClassUnloadingWithConcurrentMark) { 1315 ClassLoaderDataGraph::purge(); 1316 } 1317 MetaspaceGC::compute_new_size(); 1318 1319 // We reclaimed old regions so we should calculate the sizes to make 1320 // sure we update the old gen/space data. 1321 g1h->g1mm()->update_sizes(); 1322 g1h->allocation_context_stats().update_after_mark(); 1323 } 1324 1325 void G1ConcurrentMark::complete_cleanup() { 1326 if (has_aborted()) return; 1327 1328 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1329 1330 _cleanup_list.verify_optional(); 1331 FreeRegionList tmp_free_list("Tmp Free List"); 1332 1333 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1334 "cleanup list has %u entries", 1335 _cleanup_list.length()); 1336 1337 // No one else should be accessing the _cleanup_list at this point, 1338 // so it is not necessary to take any locks 1339 while (!_cleanup_list.is_empty()) { 1340 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1341 assert(hr != NULL, "Got NULL from a non-empty list"); 1342 hr->par_clear(); 1343 tmp_free_list.add_ordered(hr); 1344 1345 // Instead of adding one region at a time to the secondary_free_list, 1346 // we accumulate them in the local list and move them a few at a 1347 // time. This also cuts down on the number of notify_all() calls 1348 // we do during this process. We'll also append the local list when 1349 // _cleanup_list is empty (which means we just removed the last 1350 // region from the _cleanup_list). 1351 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1352 _cleanup_list.is_empty()) { 1353 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1354 "appending %u entries to the secondary_free_list, " 1355 "cleanup list still has %u entries", 1356 tmp_free_list.length(), 1357 _cleanup_list.length()); 1358 1359 { 1360 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1361 g1h->secondary_free_list_add(&tmp_free_list); 1362 SecondaryFreeList_lock->notify_all(); 1363 } 1364 #ifndef PRODUCT 1365 if (G1StressConcRegionFreeing) { 1366 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1367 os::sleep(Thread::current(), (jlong) 1, false); 1368 } 1369 } 1370 #endif 1371 } 1372 } 1373 assert(tmp_free_list.is_empty(), "post-condition"); 1374 } 1375 1376 // Supporting Object and Oop closures for reference discovery 1377 // and processing in during marking 1378 1379 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1380 HeapWord* addr = (HeapWord*)obj; 1381 return addr != NULL && 1382 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1383 } 1384 1385 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1386 // Uses the G1CMTask associated with a worker thread (for serial reference 1387 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1388 // trace referent objects. 1389 // 1390 // Using the G1CMTask and embedded local queues avoids having the worker 1391 // threads operating on the global mark stack. This reduces the risk 1392 // of overflowing the stack - which we would rather avoid at this late 1393 // state. Also using the tasks' local queues removes the potential 1394 // of the workers interfering with each other that could occur if 1395 // operating on the global stack. 1396 1397 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1398 G1ConcurrentMark* _cm; 1399 G1CMTask* _task; 1400 int _ref_counter_limit; 1401 int _ref_counter; 1402 bool _is_serial; 1403 public: 1404 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1405 _cm(cm), _task(task), _is_serial(is_serial), 1406 _ref_counter_limit(G1RefProcDrainInterval) { 1407 assert(_ref_counter_limit > 0, "sanity"); 1408 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1409 _ref_counter = _ref_counter_limit; 1410 } 1411 1412 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1413 virtual void do_oop( oop* p) { do_oop_work(p); } 1414 1415 template <class T> void do_oop_work(T* p) { 1416 if (!_cm->has_overflown()) { 1417 oop obj = oopDesc::load_decode_heap_oop(p); 1418 _task->deal_with_reference(obj); 1419 _ref_counter--; 1420 1421 if (_ref_counter == 0) { 1422 // We have dealt with _ref_counter_limit references, pushing them 1423 // and objects reachable from them on to the local stack (and 1424 // possibly the global stack). Call G1CMTask::do_marking_step() to 1425 // process these entries. 1426 // 1427 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1428 // there's nothing more to do (i.e. we're done with the entries that 1429 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1430 // above) or we overflow. 1431 // 1432 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1433 // flag while there may still be some work to do. (See the comment at 1434 // the beginning of G1CMTask::do_marking_step() for those conditions - 1435 // one of which is reaching the specified time target.) It is only 1436 // when G1CMTask::do_marking_step() returns without setting the 1437 // has_aborted() flag that the marking step has completed. 1438 do { 1439 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1440 _task->do_marking_step(mark_step_duration_ms, 1441 false /* do_termination */, 1442 _is_serial); 1443 } while (_task->has_aborted() && !_cm->has_overflown()); 1444 _ref_counter = _ref_counter_limit; 1445 } 1446 } 1447 } 1448 }; 1449 1450 // 'Drain' oop closure used by both serial and parallel reference processing. 1451 // Uses the G1CMTask associated with a given worker thread (for serial 1452 // reference processing the G1CMtask for worker 0 is used). Calls the 1453 // do_marking_step routine, with an unbelievably large timeout value, 1454 // to drain the marking data structures of the remaining entries 1455 // added by the 'keep alive' oop closure above. 1456 1457 class G1CMDrainMarkingStackClosure: public VoidClosure { 1458 G1ConcurrentMark* _cm; 1459 G1CMTask* _task; 1460 bool _is_serial; 1461 public: 1462 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1463 _cm(cm), _task(task), _is_serial(is_serial) { 1464 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1465 } 1466 1467 void do_void() { 1468 do { 1469 // We call G1CMTask::do_marking_step() to completely drain the local 1470 // and global marking stacks of entries pushed by the 'keep alive' 1471 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1472 // 1473 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1474 // if there's nothing more to do (i.e. we've completely drained the 1475 // entries that were pushed as a a result of applying the 'keep alive' 1476 // closure to the entries on the discovered ref lists) or we overflow 1477 // the global marking stack. 1478 // 1479 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1480 // flag while there may still be some work to do. (See the comment at 1481 // the beginning of G1CMTask::do_marking_step() for those conditions - 1482 // one of which is reaching the specified time target.) It is only 1483 // when G1CMTask::do_marking_step() returns without setting the 1484 // has_aborted() flag that the marking step has completed. 1485 1486 _task->do_marking_step(1000000000.0 /* something very large */, 1487 true /* do_termination */, 1488 _is_serial); 1489 } while (_task->has_aborted() && !_cm->has_overflown()); 1490 } 1491 }; 1492 1493 // Implementation of AbstractRefProcTaskExecutor for parallel 1494 // reference processing at the end of G1 concurrent marking 1495 1496 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1497 private: 1498 G1CollectedHeap* _g1h; 1499 G1ConcurrentMark* _cm; 1500 WorkGang* _workers; 1501 uint _active_workers; 1502 1503 public: 1504 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1505 G1ConcurrentMark* cm, 1506 WorkGang* workers, 1507 uint n_workers) : 1508 _g1h(g1h), _cm(cm), 1509 _workers(workers), _active_workers(n_workers) { } 1510 1511 // Executes the given task using concurrent marking worker threads. 1512 virtual void execute(ProcessTask& task); 1513 virtual void execute(EnqueueTask& task); 1514 }; 1515 1516 class G1CMRefProcTaskProxy: public AbstractGangTask { 1517 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1518 ProcessTask& _proc_task; 1519 G1CollectedHeap* _g1h; 1520 G1ConcurrentMark* _cm; 1521 1522 public: 1523 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1524 G1CollectedHeap* g1h, 1525 G1ConcurrentMark* cm) : 1526 AbstractGangTask("Process reference objects in parallel"), 1527 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1528 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1529 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1530 } 1531 1532 virtual void work(uint worker_id) { 1533 ResourceMark rm; 1534 HandleMark hm; 1535 G1CMTask* task = _cm->task(worker_id); 1536 G1CMIsAliveClosure g1_is_alive(_g1h); 1537 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1538 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1539 1540 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1541 } 1542 }; 1543 1544 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1545 assert(_workers != NULL, "Need parallel worker threads."); 1546 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1547 1548 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1549 1550 // We need to reset the concurrency level before each 1551 // proxy task execution, so that the termination protocol 1552 // and overflow handling in G1CMTask::do_marking_step() knows 1553 // how many workers to wait for. 1554 _cm->set_concurrency(_active_workers); 1555 _workers->run_task(&proc_task_proxy); 1556 } 1557 1558 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1559 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1560 EnqueueTask& _enq_task; 1561 1562 public: 1563 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1564 AbstractGangTask("Enqueue reference objects in parallel"), 1565 _enq_task(enq_task) { } 1566 1567 virtual void work(uint worker_id) { 1568 _enq_task.work(worker_id); 1569 } 1570 }; 1571 1572 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1573 assert(_workers != NULL, "Need parallel worker threads."); 1574 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1575 1576 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1577 1578 // Not strictly necessary but... 1579 // 1580 // We need to reset the concurrency level before each 1581 // proxy task execution, so that the termination protocol 1582 // and overflow handling in G1CMTask::do_marking_step() knows 1583 // how many workers to wait for. 1584 _cm->set_concurrency(_active_workers); 1585 _workers->run_task(&enq_task_proxy); 1586 } 1587 1588 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1589 if (has_overflown()) { 1590 // Skip processing the discovered references if we have 1591 // overflown the global marking stack. Reference objects 1592 // only get discovered once so it is OK to not 1593 // de-populate the discovered reference lists. We could have, 1594 // but the only benefit would be that, when marking restarts, 1595 // less reference objects are discovered. 1596 return; 1597 } 1598 1599 ResourceMark rm; 1600 HandleMark hm; 1601 1602 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1603 1604 // Is alive closure. 1605 G1CMIsAliveClosure g1_is_alive(g1h); 1606 1607 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1608 // in serial reference processing. Note these closures are also 1609 // used for serially processing (by the the current thread) the 1610 // JNI references during parallel reference processing. 1611 // 1612 // These closures do not need to synchronize with the worker 1613 // threads involved in parallel reference processing as these 1614 // instances are executed serially by the current thread (e.g. 1615 // reference processing is not multi-threaded and is thus 1616 // performed by the current thread instead of a gang worker). 1617 // 1618 // The gang tasks involved in parallel reference processing create 1619 // their own instances of these closures, which do their own 1620 // synchronization among themselves. 1621 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1622 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1623 1624 // Inner scope to exclude the cleaning of the string and symbol 1625 // tables from the displayed time. 1626 { 1627 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1628 1629 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1630 1631 // See the comment in G1CollectedHeap::ref_processing_init() 1632 // about how reference processing currently works in G1. 1633 1634 // Set the soft reference policy 1635 rp->setup_policy(clear_all_soft_refs); 1636 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1637 1638 // We need at least one active thread. If reference processing 1639 // is not multi-threaded we use the current (VMThread) thread, 1640 // otherwise we use the work gang from the G1CollectedHeap and 1641 // we utilize all the worker threads we can. 1642 bool processing_is_mt = rp->processing_is_mt(); 1643 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1644 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1645 1646 // Parallel processing task executor. 1647 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1648 g1h->workers(), active_workers); 1649 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1650 1651 // Set the concurrency level. The phase was already set prior to 1652 // executing the remark task. 1653 set_concurrency(active_workers); 1654 1655 // Set the degree of MT processing here. If the discovery was done MT, 1656 // the number of threads involved during discovery could differ from 1657 // the number of active workers. This is OK as long as the discovered 1658 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1659 rp->set_active_mt_degree(active_workers); 1660 1661 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1662 1663 // Process the weak references. 1664 const ReferenceProcessorStats& stats = 1665 rp->process_discovered_references(&g1_is_alive, 1666 &g1_keep_alive, 1667 &g1_drain_mark_stack, 1668 executor, 1669 &pt); 1670 _gc_tracer_cm->report_gc_reference_stats(stats); 1671 pt.print_all_references(); 1672 1673 // The do_oop work routines of the keep_alive and drain_marking_stack 1674 // oop closures will set the has_overflown flag if we overflow the 1675 // global marking stack. 1676 1677 assert(has_overflown() || _global_mark_stack.is_empty(), 1678 "Mark stack should be empty (unless it has overflown)"); 1679 1680 assert(rp->num_q() == active_workers, "why not"); 1681 1682 rp->enqueue_discovered_references(executor, &pt); 1683 1684 rp->verify_no_references_recorded(); 1685 1686 pt.print_enqueue_phase(); 1687 1688 assert(!rp->discovery_enabled(), "Post condition"); 1689 } 1690 1691 { 1692 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm); 1693 WeakProcessor::unlink_or_oops_do(&g1_is_alive, &g1_keep_alive, &g1_drain_mark_stack); 1694 } 1695 1696 if (has_overflown()) { 1697 // We can not trust g1_is_alive if the marking stack overflowed 1698 return; 1699 } 1700 1701 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1702 1703 // Unload Klasses, String, Symbols, Code Cache, etc. 1704 if (ClassUnloadingWithConcurrentMark) { 1705 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1706 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1707 g1h->complete_cleaning(&g1_is_alive, purged_classes); 1708 } else { 1709 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1710 // No need to clean string table and symbol table as they are treated as strong roots when 1711 // class unloading is disabled. 1712 g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1713 1714 } 1715 } 1716 1717 void G1ConcurrentMark::swapMarkBitMaps() { 1718 G1CMBitMap* temp = _prevMarkBitMap; 1719 _prevMarkBitMap = _nextMarkBitMap; 1720 _nextMarkBitMap = temp; 1721 } 1722 1723 // Closure for marking entries in SATB buffers. 1724 class G1CMSATBBufferClosure : public SATBBufferClosure { 1725 private: 1726 G1CMTask* _task; 1727 G1CollectedHeap* _g1h; 1728 1729 // This is very similar to G1CMTask::deal_with_reference, but with 1730 // more relaxed requirements for the argument, so this must be more 1731 // circumspect about treating the argument as an object. 1732 void do_entry(void* entry) const { 1733 _task->increment_refs_reached(); 1734 oop const obj = static_cast<oop>(entry); 1735 _task->make_reference_grey(obj); 1736 } 1737 1738 public: 1739 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1740 : _task(task), _g1h(g1h) { } 1741 1742 virtual void do_buffer(void** buffer, size_t size) { 1743 for (size_t i = 0; i < size; ++i) { 1744 do_entry(buffer[i]); 1745 } 1746 } 1747 }; 1748 1749 class G1RemarkThreadsClosure : public ThreadClosure { 1750 G1CMSATBBufferClosure _cm_satb_cl; 1751 G1CMOopClosure _cm_cl; 1752 MarkingCodeBlobClosure _code_cl; 1753 int _thread_parity; 1754 1755 public: 1756 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1757 _cm_satb_cl(task, g1h), 1758 _cm_cl(g1h, g1h->concurrent_mark(), task), 1759 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1760 _thread_parity(Threads::thread_claim_parity()) {} 1761 1762 void do_thread(Thread* thread) { 1763 if (thread->is_Java_thread()) { 1764 if (thread->claim_oops_do(true, _thread_parity)) { 1765 JavaThread* jt = (JavaThread*)thread; 1766 1767 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1768 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1769 // * Alive if on the stack of an executing method 1770 // * Weakly reachable otherwise 1771 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1772 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1773 jt->nmethods_do(&_code_cl); 1774 1775 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1776 } 1777 } else if (thread->is_VM_thread()) { 1778 if (thread->claim_oops_do(true, _thread_parity)) { 1779 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1780 } 1781 } 1782 } 1783 }; 1784 1785 class G1CMRemarkTask: public AbstractGangTask { 1786 private: 1787 G1ConcurrentMark* _cm; 1788 public: 1789 void work(uint worker_id) { 1790 // Since all available tasks are actually started, we should 1791 // only proceed if we're supposed to be active. 1792 if (worker_id < _cm->active_tasks()) { 1793 G1CMTask* task = _cm->task(worker_id); 1794 task->record_start_time(); 1795 { 1796 ResourceMark rm; 1797 HandleMark hm; 1798 1799 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1800 Threads::threads_do(&threads_f); 1801 } 1802 1803 do { 1804 task->do_marking_step(1000000000.0 /* something very large */, 1805 true /* do_termination */, 1806 false /* is_serial */); 1807 } while (task->has_aborted() && !_cm->has_overflown()); 1808 // If we overflow, then we do not want to restart. We instead 1809 // want to abort remark and do concurrent marking again. 1810 task->record_end_time(); 1811 } 1812 } 1813 1814 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1815 AbstractGangTask("Par Remark"), _cm(cm) { 1816 _cm->terminator()->reset_for_reuse(active_workers); 1817 } 1818 }; 1819 1820 void G1ConcurrentMark::checkpointRootsFinalWork() { 1821 ResourceMark rm; 1822 HandleMark hm; 1823 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1824 1825 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1826 1827 g1h->ensure_parsability(false); 1828 1829 // this is remark, so we'll use up all active threads 1830 uint active_workers = g1h->workers()->active_workers(); 1831 set_concurrency_and_phase(active_workers, false /* concurrent */); 1832 // Leave _parallel_marking_threads at it's 1833 // value originally calculated in the G1ConcurrentMark 1834 // constructor and pass values of the active workers 1835 // through the gang in the task. 1836 1837 { 1838 StrongRootsScope srs(active_workers); 1839 1840 G1CMRemarkTask remarkTask(this, active_workers); 1841 // We will start all available threads, even if we decide that the 1842 // active_workers will be fewer. The extra ones will just bail out 1843 // immediately. 1844 g1h->workers()->run_task(&remarkTask); 1845 } 1846 1847 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1848 guarantee(has_overflown() || 1849 satb_mq_set.completed_buffers_num() == 0, 1850 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1851 BOOL_TO_STR(has_overflown()), 1852 satb_mq_set.completed_buffers_num()); 1853 1854 print_stats(); 1855 } 1856 1857 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1858 _prevMarkBitMap->clear_range(mr); 1859 } 1860 1861 HeapRegion* 1862 G1ConcurrentMark::claim_region(uint worker_id) { 1863 // "checkpoint" the finger 1864 HeapWord* finger = _finger; 1865 1866 // _heap_end will not change underneath our feet; it only changes at 1867 // yield points. 1868 while (finger < _heap_end) { 1869 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1870 1871 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1872 // Make sure that the reads below do not float before loading curr_region. 1873 OrderAccess::loadload(); 1874 // Above heap_region_containing may return NULL as we always scan claim 1875 // until the end of the heap. In this case, just jump to the next region. 1876 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1877 1878 // Is the gap between reading the finger and doing the CAS too long? 1879 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1880 if (res == finger && curr_region != NULL) { 1881 // we succeeded 1882 HeapWord* bottom = curr_region->bottom(); 1883 HeapWord* limit = curr_region->next_top_at_mark_start(); 1884 1885 // notice that _finger == end cannot be guaranteed here since, 1886 // someone else might have moved the finger even further 1887 assert(_finger >= end, "the finger should have moved forward"); 1888 1889 if (limit > bottom) { 1890 return curr_region; 1891 } else { 1892 assert(limit == bottom, 1893 "the region limit should be at bottom"); 1894 // we return NULL and the caller should try calling 1895 // claim_region() again. 1896 return NULL; 1897 } 1898 } else { 1899 assert(_finger > finger, "the finger should have moved forward"); 1900 // read it again 1901 finger = _finger; 1902 } 1903 } 1904 1905 return NULL; 1906 } 1907 1908 #ifndef PRODUCT 1909 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1910 private: 1911 G1CollectedHeap* _g1h; 1912 const char* _phase; 1913 int _info; 1914 1915 public: 1916 VerifyNoCSetOops(const char* phase, int info = -1) : 1917 _g1h(G1CollectedHeap::heap()), 1918 _phase(phase), 1919 _info(info) 1920 { } 1921 1922 void operator()(G1TaskQueueEntry task_entry) const { 1923 if (task_entry.is_array_slice()) { 1924 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1925 return; 1926 } 1927 guarantee(oopDesc::is_oop(task_entry.obj()), 1928 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1929 p2i(task_entry.obj()), _phase, _info); 1930 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1931 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1932 p2i(task_entry.obj()), _phase, _info); 1933 } 1934 }; 1935 1936 void G1ConcurrentMark::verify_no_cset_oops() { 1937 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1938 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 1939 return; 1940 } 1941 1942 // Verify entries on the global mark stack 1943 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1944 1945 // Verify entries on the task queues 1946 for (uint i = 0; i < _max_worker_id; ++i) { 1947 G1CMTaskQueue* queue = _task_queues->queue(i); 1948 queue->iterate(VerifyNoCSetOops("Queue", i)); 1949 } 1950 1951 // Verify the global finger 1952 HeapWord* global_finger = finger(); 1953 if (global_finger != NULL && global_finger < _heap_end) { 1954 // Since we always iterate over all regions, we might get a NULL HeapRegion 1955 // here. 1956 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1957 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1958 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1959 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1960 } 1961 1962 // Verify the task fingers 1963 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 1964 for (uint i = 0; i < parallel_marking_threads(); ++i) { 1965 G1CMTask* task = _tasks[i]; 1966 HeapWord* task_finger = task->finger(); 1967 if (task_finger != NULL && task_finger < _heap_end) { 1968 // See above note on the global finger verification. 1969 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1970 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1971 !task_hr->in_collection_set(), 1972 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1973 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1974 } 1975 } 1976 } 1977 #endif // PRODUCT 1978 void G1ConcurrentMark::create_live_data() { 1979 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 1980 } 1981 1982 void G1ConcurrentMark::finalize_live_data() { 1983 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 1984 } 1985 1986 void G1ConcurrentMark::verify_live_data() { 1987 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 1988 } 1989 1990 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 1991 _g1h->g1_rem_set()->clear_card_live_data(workers); 1992 } 1993 1994 #ifdef ASSERT 1995 void G1ConcurrentMark::verify_live_data_clear() { 1996 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 1997 } 1998 #endif 1999 2000 void G1ConcurrentMark::print_stats() { 2001 if (!log_is_enabled(Debug, gc, stats)) { 2002 return; 2003 } 2004 log_debug(gc, stats)("---------------------------------------------------------------------"); 2005 for (size_t i = 0; i < _active_tasks; ++i) { 2006 _tasks[i]->print_stats(); 2007 log_debug(gc, stats)("---------------------------------------------------------------------"); 2008 } 2009 } 2010 2011 void G1ConcurrentMark::abort() { 2012 if (!cmThread()->during_cycle() || _has_aborted) { 2013 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2014 return; 2015 } 2016 2017 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2018 // concurrent bitmap clearing. 2019 { 2020 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2021 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2022 } 2023 // Note we cannot clear the previous marking bitmap here 2024 // since VerifyDuringGC verifies the objects marked during 2025 // a full GC against the previous bitmap. 2026 2027 { 2028 GCTraceTime(Debug, gc)("Clear Live Data"); 2029 clear_live_data(_g1h->workers()); 2030 } 2031 DEBUG_ONLY({ 2032 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2033 verify_live_data_clear(); 2034 }) 2035 // Empty mark stack 2036 reset_marking_state(); 2037 for (uint i = 0; i < _max_worker_id; ++i) { 2038 _tasks[i]->clear_region_fields(); 2039 } 2040 _first_overflow_barrier_sync.abort(); 2041 _second_overflow_barrier_sync.abort(); 2042 _has_aborted = true; 2043 2044 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2045 satb_mq_set.abandon_partial_marking(); 2046 // This can be called either during or outside marking, we'll read 2047 // the expected_active value from the SATB queue set. 2048 satb_mq_set.set_active_all_threads( 2049 false, /* new active value */ 2050 satb_mq_set.is_active() /* expected_active */); 2051 } 2052 2053 static void print_ms_time_info(const char* prefix, const char* name, 2054 NumberSeq& ns) { 2055 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2056 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2057 if (ns.num() > 0) { 2058 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2059 prefix, ns.sd(), ns.maximum()); 2060 } 2061 } 2062 2063 void G1ConcurrentMark::print_summary_info() { 2064 Log(gc, marking) log; 2065 if (!log.is_trace()) { 2066 return; 2067 } 2068 2069 log.trace(" Concurrent marking:"); 2070 print_ms_time_info(" ", "init marks", _init_times); 2071 print_ms_time_info(" ", "remarks", _remark_times); 2072 { 2073 print_ms_time_info(" ", "final marks", _remark_mark_times); 2074 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2075 2076 } 2077 print_ms_time_info(" ", "cleanups", _cleanup_times); 2078 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2079 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2080 if (G1ScrubRemSets) { 2081 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2082 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2083 } 2084 log.trace(" Total stop_world time = %8.2f s.", 2085 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2086 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2087 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2088 } 2089 2090 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2091 _parallel_workers->print_worker_threads_on(st); 2092 } 2093 2094 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2095 _parallel_workers->threads_do(tc); 2096 } 2097 2098 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2099 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2100 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2101 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2102 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2103 } 2104 2105 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2106 ReferenceProcessor* result = g1h->ref_processor_cm(); 2107 assert(result != NULL, "CM reference processor should not be NULL"); 2108 return result; 2109 } 2110 2111 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2112 G1ConcurrentMark* cm, 2113 G1CMTask* task) 2114 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2115 _g1h(g1h), _cm(cm), _task(task) 2116 { } 2117 2118 void G1CMTask::setup_for_region(HeapRegion* hr) { 2119 assert(hr != NULL, 2120 "claim_region() should have filtered out NULL regions"); 2121 _curr_region = hr; 2122 _finger = hr->bottom(); 2123 update_region_limit(); 2124 } 2125 2126 void G1CMTask::update_region_limit() { 2127 HeapRegion* hr = _curr_region; 2128 HeapWord* bottom = hr->bottom(); 2129 HeapWord* limit = hr->next_top_at_mark_start(); 2130 2131 if (limit == bottom) { 2132 // The region was collected underneath our feet. 2133 // We set the finger to bottom to ensure that the bitmap 2134 // iteration that will follow this will not do anything. 2135 // (this is not a condition that holds when we set the region up, 2136 // as the region is not supposed to be empty in the first place) 2137 _finger = bottom; 2138 } else if (limit >= _region_limit) { 2139 assert(limit >= _finger, "peace of mind"); 2140 } else { 2141 assert(limit < _region_limit, "only way to get here"); 2142 // This can happen under some pretty unusual circumstances. An 2143 // evacuation pause empties the region underneath our feet (NTAMS 2144 // at bottom). We then do some allocation in the region (NTAMS 2145 // stays at bottom), followed by the region being used as a GC 2146 // alloc region (NTAMS will move to top() and the objects 2147 // originally below it will be grayed). All objects now marked in 2148 // the region are explicitly grayed, if below the global finger, 2149 // and we do not need in fact to scan anything else. So, we simply 2150 // set _finger to be limit to ensure that the bitmap iteration 2151 // doesn't do anything. 2152 _finger = limit; 2153 } 2154 2155 _region_limit = limit; 2156 } 2157 2158 void G1CMTask::giveup_current_region() { 2159 assert(_curr_region != NULL, "invariant"); 2160 clear_region_fields(); 2161 } 2162 2163 void G1CMTask::clear_region_fields() { 2164 // Values for these three fields that indicate that we're not 2165 // holding on to a region. 2166 _curr_region = NULL; 2167 _finger = NULL; 2168 _region_limit = NULL; 2169 } 2170 2171 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2172 if (cm_oop_closure == NULL) { 2173 assert(_cm_oop_closure != NULL, "invariant"); 2174 } else { 2175 assert(_cm_oop_closure == NULL, "invariant"); 2176 } 2177 _cm_oop_closure = cm_oop_closure; 2178 } 2179 2180 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2181 guarantee(nextMarkBitMap != NULL, "invariant"); 2182 _nextMarkBitMap = nextMarkBitMap; 2183 clear_region_fields(); 2184 2185 _calls = 0; 2186 _elapsed_time_ms = 0.0; 2187 _termination_time_ms = 0.0; 2188 _termination_start_time_ms = 0.0; 2189 } 2190 2191 bool G1CMTask::should_exit_termination() { 2192 regular_clock_call(); 2193 // This is called when we are in the termination protocol. We should 2194 // quit if, for some reason, this task wants to abort or the global 2195 // stack is not empty (this means that we can get work from it). 2196 return !_cm->mark_stack_empty() || has_aborted(); 2197 } 2198 2199 void G1CMTask::reached_limit() { 2200 assert(_words_scanned >= _words_scanned_limit || 2201 _refs_reached >= _refs_reached_limit , 2202 "shouldn't have been called otherwise"); 2203 regular_clock_call(); 2204 } 2205 2206 void G1CMTask::regular_clock_call() { 2207 if (has_aborted()) return; 2208 2209 // First, we need to recalculate the words scanned and refs reached 2210 // limits for the next clock call. 2211 recalculate_limits(); 2212 2213 // During the regular clock call we do the following 2214 2215 // (1) If an overflow has been flagged, then we abort. 2216 if (_cm->has_overflown()) { 2217 set_has_aborted(); 2218 return; 2219 } 2220 2221 // If we are not concurrent (i.e. we're doing remark) we don't need 2222 // to check anything else. The other steps are only needed during 2223 // the concurrent marking phase. 2224 if (!concurrent()) return; 2225 2226 // (2) If marking has been aborted for Full GC, then we also abort. 2227 if (_cm->has_aborted()) { 2228 set_has_aborted(); 2229 return; 2230 } 2231 2232 double curr_time_ms = os::elapsedVTime() * 1000.0; 2233 2234 // (4) We check whether we should yield. If we have to, then we abort. 2235 if (SuspendibleThreadSet::should_yield()) { 2236 // We should yield. To do this we abort the task. The caller is 2237 // responsible for yielding. 2238 set_has_aborted(); 2239 return; 2240 } 2241 2242 // (5) We check whether we've reached our time quota. If we have, 2243 // then we abort. 2244 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2245 if (elapsed_time_ms > _time_target_ms) { 2246 set_has_aborted(); 2247 _has_timed_out = true; 2248 return; 2249 } 2250 2251 // (6) Finally, we check whether there are enough completed STAB 2252 // buffers available for processing. If there are, we abort. 2253 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2254 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2255 // we do need to process SATB buffers, we'll abort and restart 2256 // the marking task to do so 2257 set_has_aborted(); 2258 return; 2259 } 2260 } 2261 2262 void G1CMTask::recalculate_limits() { 2263 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2264 _words_scanned_limit = _real_words_scanned_limit; 2265 2266 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2267 _refs_reached_limit = _real_refs_reached_limit; 2268 } 2269 2270 void G1CMTask::decrease_limits() { 2271 // This is called when we believe that we're going to do an infrequent 2272 // operation which will increase the per byte scanned cost (i.e. move 2273 // entries to/from the global stack). It basically tries to decrease the 2274 // scanning limit so that the clock is called earlier. 2275 2276 _words_scanned_limit = _real_words_scanned_limit - 2277 3 * words_scanned_period / 4; 2278 _refs_reached_limit = _real_refs_reached_limit - 2279 3 * refs_reached_period / 4; 2280 } 2281 2282 void G1CMTask::move_entries_to_global_stack() { 2283 // Local array where we'll store the entries that will be popped 2284 // from the local queue. 2285 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2286 2287 size_t n = 0; 2288 G1TaskQueueEntry task_entry; 2289 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2290 buffer[n] = task_entry; 2291 ++n; 2292 } 2293 if (n < G1CMMarkStack::EntriesPerChunk) { 2294 buffer[n] = G1TaskQueueEntry(); 2295 } 2296 2297 if (n > 0) { 2298 if (!_cm->mark_stack_push(buffer)) { 2299 set_has_aborted(); 2300 } 2301 } 2302 2303 // This operation was quite expensive, so decrease the limits. 2304 decrease_limits(); 2305 } 2306 2307 bool G1CMTask::get_entries_from_global_stack() { 2308 // Local array where we'll store the entries that will be popped 2309 // from the global stack. 2310 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2311 2312 if (!_cm->mark_stack_pop(buffer)) { 2313 return false; 2314 } 2315 2316 // We did actually pop at least one entry. 2317 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2318 G1TaskQueueEntry task_entry = buffer[i]; 2319 if (task_entry.is_null()) { 2320 break; 2321 } 2322 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2323 bool success = _task_queue->push(task_entry); 2324 // We only call this when the local queue is empty or under a 2325 // given target limit. So, we do not expect this push to fail. 2326 assert(success, "invariant"); 2327 } 2328 2329 // This operation was quite expensive, so decrease the limits 2330 decrease_limits(); 2331 return true; 2332 } 2333 2334 void G1CMTask::drain_local_queue(bool partially) { 2335 if (has_aborted()) { 2336 return; 2337 } 2338 2339 // Decide what the target size is, depending whether we're going to 2340 // drain it partially (so that other tasks can steal if they run out 2341 // of things to do) or totally (at the very end). 2342 size_t target_size; 2343 if (partially) { 2344 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2345 } else { 2346 target_size = 0; 2347 } 2348 2349 if (_task_queue->size() > target_size) { 2350 G1TaskQueueEntry entry; 2351 bool ret = _task_queue->pop_local(entry); 2352 while (ret) { 2353 scan_task_entry(entry); 2354 if (_task_queue->size() <= target_size || has_aborted()) { 2355 ret = false; 2356 } else { 2357 ret = _task_queue->pop_local(entry); 2358 } 2359 } 2360 } 2361 } 2362 2363 void G1CMTask::drain_global_stack(bool partially) { 2364 if (has_aborted()) return; 2365 2366 // We have a policy to drain the local queue before we attempt to 2367 // drain the global stack. 2368 assert(partially || _task_queue->size() == 0, "invariant"); 2369 2370 // Decide what the target size is, depending whether we're going to 2371 // drain it partially (so that other tasks can steal if they run out 2372 // of things to do) or totally (at the very end). 2373 // Notice that when draining the global mark stack partially, due to the racyness 2374 // of the mark stack size update we might in fact drop below the target. But, 2375 // this is not a problem. 2376 // In case of total draining, we simply process until the global mark stack is 2377 // totally empty, disregarding the size counter. 2378 if (partially) { 2379 size_t const target_size = _cm->partial_mark_stack_size_target(); 2380 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2381 if (get_entries_from_global_stack()) { 2382 drain_local_queue(partially); 2383 } 2384 } 2385 } else { 2386 while (!has_aborted() && get_entries_from_global_stack()) { 2387 drain_local_queue(partially); 2388 } 2389 } 2390 } 2391 2392 // SATB Queue has several assumptions on whether to call the par or 2393 // non-par versions of the methods. this is why some of the code is 2394 // replicated. We should really get rid of the single-threaded version 2395 // of the code to simplify things. 2396 void G1CMTask::drain_satb_buffers() { 2397 if (has_aborted()) return; 2398 2399 // We set this so that the regular clock knows that we're in the 2400 // middle of draining buffers and doesn't set the abort flag when it 2401 // notices that SATB buffers are available for draining. It'd be 2402 // very counter productive if it did that. :-) 2403 _draining_satb_buffers = true; 2404 2405 G1CMSATBBufferClosure satb_cl(this, _g1h); 2406 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2407 2408 // This keeps claiming and applying the closure to completed buffers 2409 // until we run out of buffers or we need to abort. 2410 while (!has_aborted() && 2411 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2412 regular_clock_call(); 2413 } 2414 2415 _draining_satb_buffers = false; 2416 2417 assert(has_aborted() || 2418 concurrent() || 2419 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2420 2421 // again, this was a potentially expensive operation, decrease the 2422 // limits to get the regular clock call early 2423 decrease_limits(); 2424 } 2425 2426 void G1CMTask::print_stats() { 2427 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2428 _worker_id, _calls); 2429 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2430 _elapsed_time_ms, _termination_time_ms); 2431 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2432 _step_times_ms.num(), _step_times_ms.avg(), 2433 _step_times_ms.sd()); 2434 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2435 _step_times_ms.maximum(), _step_times_ms.sum()); 2436 } 2437 2438 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2439 return _task_queues->steal(worker_id, hash_seed, task_entry); 2440 } 2441 2442 /***************************************************************************** 2443 2444 The do_marking_step(time_target_ms, ...) method is the building 2445 block of the parallel marking framework. It can be called in parallel 2446 with other invocations of do_marking_step() on different tasks 2447 (but only one per task, obviously) and concurrently with the 2448 mutator threads, or during remark, hence it eliminates the need 2449 for two versions of the code. When called during remark, it will 2450 pick up from where the task left off during the concurrent marking 2451 phase. Interestingly, tasks are also claimable during evacuation 2452 pauses too, since do_marking_step() ensures that it aborts before 2453 it needs to yield. 2454 2455 The data structures that it uses to do marking work are the 2456 following: 2457 2458 (1) Marking Bitmap. If there are gray objects that appear only 2459 on the bitmap (this happens either when dealing with an overflow 2460 or when the initial marking phase has simply marked the roots 2461 and didn't push them on the stack), then tasks claim heap 2462 regions whose bitmap they then scan to find gray objects. A 2463 global finger indicates where the end of the last claimed region 2464 is. A local finger indicates how far into the region a task has 2465 scanned. The two fingers are used to determine how to gray an 2466 object (i.e. whether simply marking it is OK, as it will be 2467 visited by a task in the future, or whether it needs to be also 2468 pushed on a stack). 2469 2470 (2) Local Queue. The local queue of the task which is accessed 2471 reasonably efficiently by the task. Other tasks can steal from 2472 it when they run out of work. Throughout the marking phase, a 2473 task attempts to keep its local queue short but not totally 2474 empty, so that entries are available for stealing by other 2475 tasks. Only when there is no more work, a task will totally 2476 drain its local queue. 2477 2478 (3) Global Mark Stack. This handles local queue overflow. During 2479 marking only sets of entries are moved between it and the local 2480 queues, as access to it requires a mutex and more fine-grain 2481 interaction with it which might cause contention. If it 2482 overflows, then the marking phase should restart and iterate 2483 over the bitmap to identify gray objects. Throughout the marking 2484 phase, tasks attempt to keep the global mark stack at a small 2485 length but not totally empty, so that entries are available for 2486 popping by other tasks. Only when there is no more work, tasks 2487 will totally drain the global mark stack. 2488 2489 (4) SATB Buffer Queue. This is where completed SATB buffers are 2490 made available. Buffers are regularly removed from this queue 2491 and scanned for roots, so that the queue doesn't get too 2492 long. During remark, all completed buffers are processed, as 2493 well as the filled in parts of any uncompleted buffers. 2494 2495 The do_marking_step() method tries to abort when the time target 2496 has been reached. There are a few other cases when the 2497 do_marking_step() method also aborts: 2498 2499 (1) When the marking phase has been aborted (after a Full GC). 2500 2501 (2) When a global overflow (on the global stack) has been 2502 triggered. Before the task aborts, it will actually sync up with 2503 the other tasks to ensure that all the marking data structures 2504 (local queues, stacks, fingers etc.) are re-initialized so that 2505 when do_marking_step() completes, the marking phase can 2506 immediately restart. 2507 2508 (3) When enough completed SATB buffers are available. The 2509 do_marking_step() method only tries to drain SATB buffers right 2510 at the beginning. So, if enough buffers are available, the 2511 marking step aborts and the SATB buffers are processed at 2512 the beginning of the next invocation. 2513 2514 (4) To yield. when we have to yield then we abort and yield 2515 right at the end of do_marking_step(). This saves us from a lot 2516 of hassle as, by yielding we might allow a Full GC. If this 2517 happens then objects will be compacted underneath our feet, the 2518 heap might shrink, etc. We save checking for this by just 2519 aborting and doing the yield right at the end. 2520 2521 From the above it follows that the do_marking_step() method should 2522 be called in a loop (or, otherwise, regularly) until it completes. 2523 2524 If a marking step completes without its has_aborted() flag being 2525 true, it means it has completed the current marking phase (and 2526 also all other marking tasks have done so and have all synced up). 2527 2528 A method called regular_clock_call() is invoked "regularly" (in 2529 sub ms intervals) throughout marking. It is this clock method that 2530 checks all the abort conditions which were mentioned above and 2531 decides when the task should abort. A work-based scheme is used to 2532 trigger this clock method: when the number of object words the 2533 marking phase has scanned or the number of references the marking 2534 phase has visited reach a given limit. Additional invocations to 2535 the method clock have been planted in a few other strategic places 2536 too. The initial reason for the clock method was to avoid calling 2537 vtime too regularly, as it is quite expensive. So, once it was in 2538 place, it was natural to piggy-back all the other conditions on it 2539 too and not constantly check them throughout the code. 2540 2541 If do_termination is true then do_marking_step will enter its 2542 termination protocol. 2543 2544 The value of is_serial must be true when do_marking_step is being 2545 called serially (i.e. by the VMThread) and do_marking_step should 2546 skip any synchronization in the termination and overflow code. 2547 Examples include the serial remark code and the serial reference 2548 processing closures. 2549 2550 The value of is_serial must be false when do_marking_step is 2551 being called by any of the worker threads in a work gang. 2552 Examples include the concurrent marking code (CMMarkingTask), 2553 the MT remark code, and the MT reference processing closures. 2554 2555 *****************************************************************************/ 2556 2557 void G1CMTask::do_marking_step(double time_target_ms, 2558 bool do_termination, 2559 bool is_serial) { 2560 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2561 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2562 2563 G1Policy* g1_policy = _g1h->g1_policy(); 2564 assert(_task_queues != NULL, "invariant"); 2565 assert(_task_queue != NULL, "invariant"); 2566 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2567 2568 assert(!_claimed, 2569 "only one thread should claim this task at any one time"); 2570 2571 // OK, this doesn't safeguard again all possible scenarios, as it is 2572 // possible for two threads to set the _claimed flag at the same 2573 // time. But it is only for debugging purposes anyway and it will 2574 // catch most problems. 2575 _claimed = true; 2576 2577 _start_time_ms = os::elapsedVTime() * 1000.0; 2578 2579 // If do_stealing is true then do_marking_step will attempt to 2580 // steal work from the other G1CMTasks. It only makes sense to 2581 // enable stealing when the termination protocol is enabled 2582 // and do_marking_step() is not being called serially. 2583 bool do_stealing = do_termination && !is_serial; 2584 2585 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2586 _time_target_ms = time_target_ms - diff_prediction_ms; 2587 2588 // set up the variables that are used in the work-based scheme to 2589 // call the regular clock method 2590 _words_scanned = 0; 2591 _refs_reached = 0; 2592 recalculate_limits(); 2593 2594 // clear all flags 2595 clear_has_aborted(); 2596 _has_timed_out = false; 2597 _draining_satb_buffers = false; 2598 2599 ++_calls; 2600 2601 // Set up the bitmap and oop closures. Anything that uses them is 2602 // eventually called from this method, so it is OK to allocate these 2603 // statically. 2604 G1CMBitMapClosure bitmap_closure(this, _cm); 2605 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2606 set_cm_oop_closure(&cm_oop_closure); 2607 2608 if (_cm->has_overflown()) { 2609 // This can happen if the mark stack overflows during a GC pause 2610 // and this task, after a yield point, restarts. We have to abort 2611 // as we need to get into the overflow protocol which happens 2612 // right at the end of this task. 2613 set_has_aborted(); 2614 } 2615 2616 // First drain any available SATB buffers. After this, we will not 2617 // look at SATB buffers before the next invocation of this method. 2618 // If enough completed SATB buffers are queued up, the regular clock 2619 // will abort this task so that it restarts. 2620 drain_satb_buffers(); 2621 // ...then partially drain the local queue and the global stack 2622 drain_local_queue(true); 2623 drain_global_stack(true); 2624 2625 do { 2626 if (!has_aborted() && _curr_region != NULL) { 2627 // This means that we're already holding on to a region. 2628 assert(_finger != NULL, "if region is not NULL, then the finger " 2629 "should not be NULL either"); 2630 2631 // We might have restarted this task after an evacuation pause 2632 // which might have evacuated the region we're holding on to 2633 // underneath our feet. Let's read its limit again to make sure 2634 // that we do not iterate over a region of the heap that 2635 // contains garbage (update_region_limit() will also move 2636 // _finger to the start of the region if it is found empty). 2637 update_region_limit(); 2638 // We will start from _finger not from the start of the region, 2639 // as we might be restarting this task after aborting half-way 2640 // through scanning this region. In this case, _finger points to 2641 // the address where we last found a marked object. If this is a 2642 // fresh region, _finger points to start(). 2643 MemRegion mr = MemRegion(_finger, _region_limit); 2644 2645 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2646 "humongous regions should go around loop once only"); 2647 2648 // Some special cases: 2649 // If the memory region is empty, we can just give up the region. 2650 // If the current region is humongous then we only need to check 2651 // the bitmap for the bit associated with the start of the object, 2652 // scan the object if it's live, and give up the region. 2653 // Otherwise, let's iterate over the bitmap of the part of the region 2654 // that is left. 2655 // If the iteration is successful, give up the region. 2656 if (mr.is_empty()) { 2657 giveup_current_region(); 2658 regular_clock_call(); 2659 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2660 if (_nextMarkBitMap->is_marked(mr.start())) { 2661 // The object is marked - apply the closure 2662 bitmap_closure.do_addr(mr.start()); 2663 } 2664 // Even if this task aborted while scanning the humongous object 2665 // we can (and should) give up the current region. 2666 giveup_current_region(); 2667 regular_clock_call(); 2668 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2669 giveup_current_region(); 2670 regular_clock_call(); 2671 } else { 2672 assert(has_aborted(), "currently the only way to do so"); 2673 // The only way to abort the bitmap iteration is to return 2674 // false from the do_bit() method. However, inside the 2675 // do_bit() method we move the _finger to point to the 2676 // object currently being looked at. So, if we bail out, we 2677 // have definitely set _finger to something non-null. 2678 assert(_finger != NULL, "invariant"); 2679 2680 // Region iteration was actually aborted. So now _finger 2681 // points to the address of the object we last scanned. If we 2682 // leave it there, when we restart this task, we will rescan 2683 // the object. It is easy to avoid this. We move the finger by 2684 // enough to point to the next possible object header. 2685 assert(_finger < _region_limit, "invariant"); 2686 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2687 // Check if bitmap iteration was aborted while scanning the last object 2688 if (new_finger >= _region_limit) { 2689 giveup_current_region(); 2690 } else { 2691 move_finger_to(new_finger); 2692 } 2693 } 2694 } 2695 // At this point we have either completed iterating over the 2696 // region we were holding on to, or we have aborted. 2697 2698 // We then partially drain the local queue and the global stack. 2699 // (Do we really need this?) 2700 drain_local_queue(true); 2701 drain_global_stack(true); 2702 2703 // Read the note on the claim_region() method on why it might 2704 // return NULL with potentially more regions available for 2705 // claiming and why we have to check out_of_regions() to determine 2706 // whether we're done or not. 2707 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2708 // We are going to try to claim a new region. We should have 2709 // given up on the previous one. 2710 // Separated the asserts so that we know which one fires. 2711 assert(_curr_region == NULL, "invariant"); 2712 assert(_finger == NULL, "invariant"); 2713 assert(_region_limit == NULL, "invariant"); 2714 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2715 if (claimed_region != NULL) { 2716 // Yes, we managed to claim one 2717 setup_for_region(claimed_region); 2718 assert(_curr_region == claimed_region, "invariant"); 2719 } 2720 // It is important to call the regular clock here. It might take 2721 // a while to claim a region if, for example, we hit a large 2722 // block of empty regions. So we need to call the regular clock 2723 // method once round the loop to make sure it's called 2724 // frequently enough. 2725 regular_clock_call(); 2726 } 2727 2728 if (!has_aborted() && _curr_region == NULL) { 2729 assert(_cm->out_of_regions(), 2730 "at this point we should be out of regions"); 2731 } 2732 } while ( _curr_region != NULL && !has_aborted()); 2733 2734 if (!has_aborted()) { 2735 // We cannot check whether the global stack is empty, since other 2736 // tasks might be pushing objects to it concurrently. 2737 assert(_cm->out_of_regions(), 2738 "at this point we should be out of regions"); 2739 // Try to reduce the number of available SATB buffers so that 2740 // remark has less work to do. 2741 drain_satb_buffers(); 2742 } 2743 2744 // Since we've done everything else, we can now totally drain the 2745 // local queue and global stack. 2746 drain_local_queue(false); 2747 drain_global_stack(false); 2748 2749 // Attempt at work stealing from other task's queues. 2750 if (do_stealing && !has_aborted()) { 2751 // We have not aborted. This means that we have finished all that 2752 // we could. Let's try to do some stealing... 2753 2754 // We cannot check whether the global stack is empty, since other 2755 // tasks might be pushing objects to it concurrently. 2756 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2757 "only way to reach here"); 2758 while (!has_aborted()) { 2759 G1TaskQueueEntry entry; 2760 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2761 scan_task_entry(entry); 2762 2763 // And since we're towards the end, let's totally drain the 2764 // local queue and global stack. 2765 drain_local_queue(false); 2766 drain_global_stack(false); 2767 } else { 2768 break; 2769 } 2770 } 2771 } 2772 2773 // We still haven't aborted. Now, let's try to get into the 2774 // termination protocol. 2775 if (do_termination && !has_aborted()) { 2776 // We cannot check whether the global stack is empty, since other 2777 // tasks might be concurrently pushing objects on it. 2778 // Separated the asserts so that we know which one fires. 2779 assert(_cm->out_of_regions(), "only way to reach here"); 2780 assert(_task_queue->size() == 0, "only way to reach here"); 2781 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2782 2783 // The G1CMTask class also extends the TerminatorTerminator class, 2784 // hence its should_exit_termination() method will also decide 2785 // whether to exit the termination protocol or not. 2786 bool finished = (is_serial || 2787 _cm->terminator()->offer_termination(this)); 2788 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2789 _termination_time_ms += 2790 termination_end_time_ms - _termination_start_time_ms; 2791 2792 if (finished) { 2793 // We're all done. 2794 2795 if (_worker_id == 0) { 2796 // let's allow task 0 to do this 2797 if (concurrent()) { 2798 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2799 // we need to set this to false before the next 2800 // safepoint. This way we ensure that the marking phase 2801 // doesn't observe any more heap expansions. 2802 _cm->clear_concurrent_marking_in_progress(); 2803 } 2804 } 2805 2806 // We can now guarantee that the global stack is empty, since 2807 // all other tasks have finished. We separated the guarantees so 2808 // that, if a condition is false, we can immediately find out 2809 // which one. 2810 guarantee(_cm->out_of_regions(), "only way to reach here"); 2811 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2812 guarantee(_task_queue->size() == 0, "only way to reach here"); 2813 guarantee(!_cm->has_overflown(), "only way to reach here"); 2814 } else { 2815 // Apparently there's more work to do. Let's abort this task. It 2816 // will restart it and we can hopefully find more things to do. 2817 set_has_aborted(); 2818 } 2819 } 2820 2821 // Mainly for debugging purposes to make sure that a pointer to the 2822 // closure which was statically allocated in this frame doesn't 2823 // escape it by accident. 2824 set_cm_oop_closure(NULL); 2825 double end_time_ms = os::elapsedVTime() * 1000.0; 2826 double elapsed_time_ms = end_time_ms - _start_time_ms; 2827 // Update the step history. 2828 _step_times_ms.add(elapsed_time_ms); 2829 2830 if (has_aborted()) { 2831 // The task was aborted for some reason. 2832 if (_has_timed_out) { 2833 double diff_ms = elapsed_time_ms - _time_target_ms; 2834 // Keep statistics of how well we did with respect to hitting 2835 // our target only if we actually timed out (if we aborted for 2836 // other reasons, then the results might get skewed). 2837 _marking_step_diffs_ms.add(diff_ms); 2838 } 2839 2840 if (_cm->has_overflown()) { 2841 // This is the interesting one. We aborted because a global 2842 // overflow was raised. This means we have to restart the 2843 // marking phase and start iterating over regions. However, in 2844 // order to do this we have to make sure that all tasks stop 2845 // what they are doing and re-initialize in a safe manner. We 2846 // will achieve this with the use of two barrier sync points. 2847 2848 if (!is_serial) { 2849 // We only need to enter the sync barrier if being called 2850 // from a parallel context 2851 _cm->enter_first_sync_barrier(_worker_id); 2852 2853 // When we exit this sync barrier we know that all tasks have 2854 // stopped doing marking work. So, it's now safe to 2855 // re-initialize our data structures. At the end of this method, 2856 // task 0 will clear the global data structures. 2857 } 2858 2859 // We clear the local state of this task... 2860 clear_region_fields(); 2861 2862 if (!is_serial) { 2863 // ...and enter the second barrier. 2864 _cm->enter_second_sync_barrier(_worker_id); 2865 } 2866 // At this point, if we're during the concurrent phase of 2867 // marking, everything has been re-initialized and we're 2868 // ready to restart. 2869 } 2870 } 2871 2872 _claimed = false; 2873 } 2874 2875 G1CMTask::G1CMTask(uint worker_id, 2876 G1ConcurrentMark* cm, 2877 G1CMTaskQueue* task_queue, 2878 G1CMTaskQueueSet* task_queues) 2879 : _g1h(G1CollectedHeap::heap()), 2880 _worker_id(worker_id), _cm(cm), 2881 _objArray_processor(this), 2882 _claimed(false), 2883 _nextMarkBitMap(NULL), _hash_seed(17), 2884 _task_queue(task_queue), 2885 _task_queues(task_queues), 2886 _cm_oop_closure(NULL) { 2887 guarantee(task_queue != NULL, "invariant"); 2888 guarantee(task_queues != NULL, "invariant"); 2889 2890 _marking_step_diffs_ms.add(0.5); 2891 } 2892 2893 // These are formatting macros that are used below to ensure 2894 // consistent formatting. The *_H_* versions are used to format the 2895 // header for a particular value and they should be kept consistent 2896 // with the corresponding macro. Also note that most of the macros add 2897 // the necessary white space (as a prefix) which makes them a bit 2898 // easier to compose. 2899 2900 // All the output lines are prefixed with this string to be able to 2901 // identify them easily in a large log file. 2902 #define G1PPRL_LINE_PREFIX "###" 2903 2904 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2905 #ifdef _LP64 2906 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2907 #else // _LP64 2908 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2909 #endif // _LP64 2910 2911 // For per-region info 2912 #define G1PPRL_TYPE_FORMAT " %-4s" 2913 #define G1PPRL_TYPE_H_FORMAT " %4s" 2914 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2915 #define G1PPRL_BYTE_H_FORMAT " %9s" 2916 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2917 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2918 2919 // For summary info 2920 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2921 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2922 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2923 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2924 2925 G1PrintRegionLivenessInfoClosure:: 2926 G1PrintRegionLivenessInfoClosure(const char* phase_name) 2927 : _total_used_bytes(0), _total_capacity_bytes(0), 2928 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2929 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 2930 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2931 MemRegion g1_reserved = g1h->g1_reserved(); 2932 double now = os::elapsedTime(); 2933 2934 // Print the header of the output. 2935 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2936 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2937 G1PPRL_SUM_ADDR_FORMAT("reserved") 2938 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2939 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2940 HeapRegion::GrainBytes); 2941 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2942 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2943 G1PPRL_TYPE_H_FORMAT 2944 G1PPRL_ADDR_BASE_H_FORMAT 2945 G1PPRL_BYTE_H_FORMAT 2946 G1PPRL_BYTE_H_FORMAT 2947 G1PPRL_BYTE_H_FORMAT 2948 G1PPRL_DOUBLE_H_FORMAT 2949 G1PPRL_BYTE_H_FORMAT 2950 G1PPRL_BYTE_H_FORMAT, 2951 "type", "address-range", 2952 "used", "prev-live", "next-live", "gc-eff", 2953 "remset", "code-roots"); 2954 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2955 G1PPRL_TYPE_H_FORMAT 2956 G1PPRL_ADDR_BASE_H_FORMAT 2957 G1PPRL_BYTE_H_FORMAT 2958 G1PPRL_BYTE_H_FORMAT 2959 G1PPRL_BYTE_H_FORMAT 2960 G1PPRL_DOUBLE_H_FORMAT 2961 G1PPRL_BYTE_H_FORMAT 2962 G1PPRL_BYTE_H_FORMAT, 2963 "", "", 2964 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2965 "(bytes)", "(bytes)"); 2966 } 2967 2968 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 2969 const char* type = r->get_type_str(); 2970 HeapWord* bottom = r->bottom(); 2971 HeapWord* end = r->end(); 2972 size_t capacity_bytes = r->capacity(); 2973 size_t used_bytes = r->used(); 2974 size_t prev_live_bytes = r->live_bytes(); 2975 size_t next_live_bytes = r->next_live_bytes(); 2976 double gc_eff = r->gc_efficiency(); 2977 size_t remset_bytes = r->rem_set()->mem_size(); 2978 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2979 2980 _total_used_bytes += used_bytes; 2981 _total_capacity_bytes += capacity_bytes; 2982 _total_prev_live_bytes += prev_live_bytes; 2983 _total_next_live_bytes += next_live_bytes; 2984 _total_remset_bytes += remset_bytes; 2985 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2986 2987 // Print a line for this particular region. 2988 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2989 G1PPRL_TYPE_FORMAT 2990 G1PPRL_ADDR_BASE_FORMAT 2991 G1PPRL_BYTE_FORMAT 2992 G1PPRL_BYTE_FORMAT 2993 G1PPRL_BYTE_FORMAT 2994 G1PPRL_DOUBLE_FORMAT 2995 G1PPRL_BYTE_FORMAT 2996 G1PPRL_BYTE_FORMAT, 2997 type, p2i(bottom), p2i(end), 2998 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2999 remset_bytes, strong_code_roots_bytes); 3000 3001 return false; 3002 } 3003 3004 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3005 // add static memory usages to remembered set sizes 3006 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3007 // Print the footer of the output. 3008 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3009 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3010 " SUMMARY" 3011 G1PPRL_SUM_MB_FORMAT("capacity") 3012 G1PPRL_SUM_MB_PERC_FORMAT("used") 3013 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3014 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3015 G1PPRL_SUM_MB_FORMAT("remset") 3016 G1PPRL_SUM_MB_FORMAT("code-roots"), 3017 bytes_to_mb(_total_capacity_bytes), 3018 bytes_to_mb(_total_used_bytes), 3019 perc(_total_used_bytes, _total_capacity_bytes), 3020 bytes_to_mb(_total_prev_live_bytes), 3021 perc(_total_prev_live_bytes, _total_capacity_bytes), 3022 bytes_to_mb(_total_next_live_bytes), 3023 perc(_total_next_live_bytes, _total_capacity_bytes), 3024 bytes_to_mb(_total_remset_bytes), 3025 bytes_to_mb(_total_strong_code_roots_bytes)); 3026 }