1 /* 2 * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/g1CollectedHeap.inline.hpp" 30 #include "gc/g1/g1CollectorState.hpp" 31 #include "gc/g1/g1ConcurrentMark.inline.hpp" 32 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1Policy.hpp" 36 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/shared/adaptiveSizePolicy.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/suspendibleThreadSet.hpp" 50 #include "gc/shared/taskqueue.inline.hpp" 51 #include "gc/shared/vmGCOperations.hpp" 52 #include "gc/shared/weakProcessor.hpp" 53 #include "include/jvm.h" 54 #include "logging/log.hpp" 55 #include "memory/allocation.hpp" 56 #include "memory/resourceArea.hpp" 57 #include "oops/access.inline.hpp" 58 #include "oops/oop.inline.hpp" 59 #include "runtime/atomic.hpp" 60 #include "runtime/handles.inline.hpp" 61 #include "runtime/java.hpp" 62 #include "runtime/prefetch.inline.hpp" 63 #include "services/memTracker.hpp" 64 #include "utilities/align.hpp" 65 #include "utilities/growableArray.hpp" 66 67 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 68 assert(addr < _cm->finger(), "invariant"); 69 assert(addr >= _task->finger(), "invariant"); 70 71 // We move that task's local finger along. 72 _task->move_finger_to(addr); 73 74 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 75 // we only partially drain the local queue and global stack 76 _task->drain_local_queue(true); 77 _task->drain_global_stack(true); 78 79 // if the has_aborted flag has been raised, we need to bail out of 80 // the iteration 81 return !_task->has_aborted(); 82 } 83 84 G1CMMarkStack::G1CMMarkStack() : 85 _max_chunk_capacity(0), 86 _base(NULL), 87 _chunk_capacity(0) { 88 set_empty(); 89 } 90 91 bool G1CMMarkStack::resize(size_t new_capacity) { 92 assert(is_empty(), "Only resize when stack is empty."); 93 assert(new_capacity <= _max_chunk_capacity, 94 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 95 96 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 97 98 if (new_base == NULL) { 99 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 100 return false; 101 } 102 // Release old mapping. 103 if (_base != NULL) { 104 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 105 } 106 107 _base = new_base; 108 _chunk_capacity = new_capacity; 109 set_empty(); 110 111 return true; 112 } 113 114 size_t G1CMMarkStack::capacity_alignment() { 115 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 116 } 117 118 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 119 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 120 121 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 122 123 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 124 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 125 126 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 127 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 128 _max_chunk_capacity, 129 initial_chunk_capacity); 130 131 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 132 initial_chunk_capacity, _max_chunk_capacity); 133 134 return resize(initial_chunk_capacity); 135 } 136 137 void G1CMMarkStack::expand() { 138 if (_chunk_capacity == _max_chunk_capacity) { 139 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 140 return; 141 } 142 size_t old_capacity = _chunk_capacity; 143 // Double capacity if possible 144 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 145 146 if (resize(new_capacity)) { 147 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 148 old_capacity, new_capacity); 149 } else { 150 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 151 old_capacity, new_capacity); 152 } 153 } 154 155 G1CMMarkStack::~G1CMMarkStack() { 156 if (_base != NULL) { 157 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 158 } 159 } 160 161 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 162 elem->next = *list; 163 *list = elem; 164 } 165 166 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 167 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 168 add_chunk_to_list(&_chunk_list, elem); 169 _chunks_in_chunk_list++; 170 } 171 172 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 173 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 174 add_chunk_to_list(&_free_list, elem); 175 } 176 177 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 178 TaskQueueEntryChunk* result = *list; 179 if (result != NULL) { 180 *list = (*list)->next; 181 } 182 return result; 183 } 184 185 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 186 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 187 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 188 if (result != NULL) { 189 _chunks_in_chunk_list--; 190 } 191 return result; 192 } 193 194 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 195 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 196 return remove_chunk_from_list(&_free_list); 197 } 198 199 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 200 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 201 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 202 // wraparound of _hwm. 203 if (_hwm >= _chunk_capacity) { 204 return NULL; 205 } 206 207 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 208 if (cur_idx >= _chunk_capacity) { 209 return NULL; 210 } 211 212 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 213 result->next = NULL; 214 return result; 215 } 216 217 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 218 // Get a new chunk. 219 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 220 221 if (new_chunk == NULL) { 222 // Did not get a chunk from the free list. Allocate from backing memory. 223 new_chunk = allocate_new_chunk(); 224 225 if (new_chunk == NULL) { 226 return false; 227 } 228 } 229 230 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 231 232 add_chunk_to_chunk_list(new_chunk); 233 234 return true; 235 } 236 237 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 238 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 239 240 if (cur == NULL) { 241 return false; 242 } 243 244 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 245 246 add_chunk_to_free_list(cur); 247 return true; 248 } 249 250 void G1CMMarkStack::set_empty() { 251 _chunks_in_chunk_list = 0; 252 _hwm = 0; 253 _chunk_list = NULL; 254 _free_list = NULL; 255 } 256 257 G1CMRootRegions::G1CMRootRegions() : 258 _survivors(NULL), _cm(NULL), _scan_in_progress(false), 259 _should_abort(false), _claimed_survivor_index(0) { } 260 261 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 262 _survivors = survivors; 263 _cm = cm; 264 } 265 266 void G1CMRootRegions::prepare_for_scan() { 267 assert(!scan_in_progress(), "pre-condition"); 268 269 // Currently, only survivors can be root regions. 270 _claimed_survivor_index = 0; 271 _scan_in_progress = _survivors->regions()->is_nonempty(); 272 _should_abort = false; 273 } 274 275 HeapRegion* G1CMRootRegions::claim_next() { 276 if (_should_abort) { 277 // If someone has set the should_abort flag, we return NULL to 278 // force the caller to bail out of their loop. 279 return NULL; 280 } 281 282 // Currently, only survivors can be root regions. 283 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 284 285 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 286 if (claimed_index < survivor_regions->length()) { 287 return survivor_regions->at(claimed_index); 288 } 289 return NULL; 290 } 291 292 uint G1CMRootRegions::num_root_regions() const { 293 return (uint)_survivors->regions()->length(); 294 } 295 296 void G1CMRootRegions::notify_scan_done() { 297 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 298 _scan_in_progress = false; 299 RootRegionScan_lock->notify_all(); 300 } 301 302 void G1CMRootRegions::cancel_scan() { 303 notify_scan_done(); 304 } 305 306 void G1CMRootRegions::scan_finished() { 307 assert(scan_in_progress(), "pre-condition"); 308 309 // Currently, only survivors can be root regions. 310 if (!_should_abort) { 311 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 312 assert((uint)_claimed_survivor_index >= _survivors->length(), 313 "we should have claimed all survivors, claimed index = %u, length = %u", 314 (uint)_claimed_survivor_index, _survivors->length()); 315 } 316 317 notify_scan_done(); 318 } 319 320 bool G1CMRootRegions::wait_until_scan_finished() { 321 if (!scan_in_progress()) { 322 return false; 323 } 324 325 { 326 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 327 while (scan_in_progress()) { 328 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 329 } 330 } 331 return true; 332 } 333 334 // Returns the maximum number of workers to be used in a concurrent 335 // phase based on the number of GC workers being used in a STW 336 // phase. 337 static uint scale_concurrent_worker_threads(uint num_gc_workers) { 338 return MAX2((num_gc_workers + 2) / 4, 1U); 339 } 340 341 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, 342 G1RegionToSpaceMapper* prev_bitmap_storage, 343 G1RegionToSpaceMapper* next_bitmap_storage) : 344 // _cm_thread set inside the constructor 345 _g1h(g1h), 346 _completed_initialization(false), 347 348 _mark_bitmap_1(), 349 _mark_bitmap_2(), 350 _prev_mark_bitmap(&_mark_bitmap_1), 351 _next_mark_bitmap(&_mark_bitmap_2), 352 353 _heap(_g1h->reserved_region()), 354 355 _root_regions(), 356 357 _global_mark_stack(), 358 359 // _finger set in set_non_marking_state 360 361 _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads), 362 _max_num_tasks(ParallelGCThreads), 363 // _num_active_tasks set in set_non_marking_state() 364 // _tasks set inside the constructor 365 366 _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)), 367 _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)), 368 369 _first_overflow_barrier_sync(), 370 _second_overflow_barrier_sync(), 371 372 _has_overflown(false), 373 _concurrent(false), 374 _has_aborted(false), 375 _restart_for_overflow(false), 376 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 377 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 378 379 // _verbose_level set below 380 381 _init_times(), 382 _remark_times(), 383 _remark_mark_times(), 384 _remark_weak_ref_times(), 385 _cleanup_times(), 386 _total_cleanup_time(0.0), 387 388 _accum_task_vtime(NULL), 389 390 _concurrent_workers(NULL), 391 _num_concurrent_workers(0), 392 _max_concurrent_workers(0), 393 394 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)), 395 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC)) 396 { 397 _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage); 398 _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage); 399 400 // Create & start ConcurrentMark thread. 401 _cm_thread = new G1ConcurrentMarkThread(this); 402 if (_cm_thread->osthread() == NULL) { 403 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 404 } 405 406 assert(CGC_lock != NULL, "CGC_lock must be initialized"); 407 408 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 409 satb_qs.set_buffer_size(G1SATBBufferSize); 410 411 _root_regions.init(_g1h->survivor(), this); 412 413 if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) { 414 // Calculate the number of concurrent worker threads by scaling 415 // the number of parallel GC threads. 416 uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads); 417 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 418 } 419 420 assert(ConcGCThreads > 0, "ConcGCThreads have been set."); 421 if (ConcGCThreads > ParallelGCThreads) { 422 log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).", 423 ConcGCThreads, ParallelGCThreads); 424 return; 425 } 426 427 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset); 428 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 429 430 _num_concurrent_workers = ConcGCThreads; 431 _max_concurrent_workers = _num_concurrent_workers; 432 433 _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true); 434 _concurrent_workers->initialize_workers(); 435 436 if (FLAG_IS_DEFAULT(MarkStackSize)) { 437 size_t mark_stack_size = 438 MIN2(MarkStackSizeMax, 439 MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE))); 440 // Verify that the calculated value for MarkStackSize is in range. 441 // It would be nice to use the private utility routine from Arguments. 442 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 443 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 444 "must be between 1 and " SIZE_FORMAT, 445 mark_stack_size, MarkStackSizeMax); 446 return; 447 } 448 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 449 } else { 450 // Verify MarkStackSize is in range. 451 if (FLAG_IS_CMDLINE(MarkStackSize)) { 452 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 453 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 454 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 455 "must be between 1 and " SIZE_FORMAT, 456 MarkStackSize, MarkStackSizeMax); 457 return; 458 } 459 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 460 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 461 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 462 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 463 MarkStackSize, MarkStackSizeMax); 464 return; 465 } 466 } 467 } 468 } 469 470 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 471 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 472 } 473 474 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC); 475 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC); 476 477 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 478 _num_active_tasks = _max_num_tasks; 479 480 for (uint i = 0; i < _max_num_tasks; ++i) { 481 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 482 task_queue->initialize(); 483 _task_queues->register_queue(i, task_queue); 484 485 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions()); 486 487 _accum_task_vtime[i] = 0.0; 488 } 489 490 reset_at_marking_complete(); 491 _completed_initialization = true; 492 } 493 494 void G1ConcurrentMark::reset() { 495 _has_aborted = false; 496 497 reset_marking_for_restart(); 498 499 // Reset all tasks, since different phases will use different number of active 500 // threads. So, it's easiest to have all of them ready. 501 for (uint i = 0; i < _max_num_tasks; ++i) { 502 _tasks[i]->reset(_next_mark_bitmap); 503 } 504 505 uint max_regions = _g1h->max_regions(); 506 for (uint i = 0; i < max_regions; i++) { 507 _top_at_rebuild_starts[i] = NULL; 508 _region_mark_stats[i].clear(); 509 } 510 } 511 512 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) { 513 for (uint j = 0; j < _max_num_tasks; ++j) { 514 _tasks[j]->clear_mark_stats_cache(region_idx); 515 } 516 _top_at_rebuild_starts[region_idx] = NULL; 517 _region_mark_stats[region_idx].clear(); 518 } 519 520 void G1ConcurrentMark::clear_statistics(HeapRegion* r) { 521 uint const region_idx = r->hrm_index(); 522 if (r->is_humongous()) { 523 assert(r->is_starts_humongous(), "Got humongous continues region here"); 524 uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size()); 525 for (uint j = region_idx; j < (region_idx + size_in_regions); j++) { 526 clear_statistics_in_region(j); 527 } 528 } else { 529 clear_statistics_in_region(region_idx); 530 } 531 } 532 533 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) { 534 if (bitmap->is_marked(addr)) { 535 bitmap->clear(addr); 536 } 537 } 538 539 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) { 540 assert_at_safepoint_on_vm_thread(); 541 542 // Need to clear all mark bits of the humongous object. 543 clear_mark_if_set(_prev_mark_bitmap, r->bottom()); 544 clear_mark_if_set(_next_mark_bitmap, r->bottom()); 545 546 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 547 return; 548 } 549 550 // Clear any statistics about the region gathered so far. 551 clear_statistics(r); 552 } 553 554 void G1ConcurrentMark::reset_marking_for_restart() { 555 _global_mark_stack.set_empty(); 556 557 // Expand the marking stack, if we have to and if we can. 558 if (has_overflown()) { 559 _global_mark_stack.expand(); 560 561 uint max_regions = _g1h->max_regions(); 562 for (uint i = 0; i < max_regions; i++) { 563 _region_mark_stats[i].clear_during_overflow(); 564 } 565 } 566 567 clear_has_overflown(); 568 _finger = _heap.start(); 569 570 for (uint i = 0; i < _max_num_tasks; ++i) { 571 G1CMTaskQueue* queue = _task_queues->queue(i); 572 queue->set_empty(); 573 } 574 } 575 576 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 577 assert(active_tasks <= _max_num_tasks, "we should not have more"); 578 579 _num_active_tasks = active_tasks; 580 // Need to update the three data structures below according to the 581 // number of active threads for this phase. 582 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 583 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 584 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 585 } 586 587 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 588 set_concurrency(active_tasks); 589 590 _concurrent = concurrent; 591 592 if (!concurrent) { 593 // At this point we should be in a STW phase, and completed marking. 594 assert_at_safepoint_on_vm_thread(); 595 assert(out_of_regions(), 596 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 597 p2i(_finger), p2i(_heap.end())); 598 } 599 } 600 601 void G1ConcurrentMark::reset_at_marking_complete() { 602 // We set the global marking state to some default values when we're 603 // not doing marking. 604 reset_marking_for_restart(); 605 _num_active_tasks = 0; 606 } 607 608 G1ConcurrentMark::~G1ConcurrentMark() { 609 FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts); 610 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats); 611 // The G1ConcurrentMark instance is never freed. 612 ShouldNotReachHere(); 613 } 614 615 class G1ClearBitMapTask : public AbstractGangTask { 616 public: 617 static size_t chunk_size() { return M; } 618 619 private: 620 // Heap region closure used for clearing the given mark bitmap. 621 class G1ClearBitmapHRClosure : public HeapRegionClosure { 622 private: 623 G1CMBitMap* _bitmap; 624 G1ConcurrentMark* _cm; 625 public: 626 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 627 } 628 629 virtual bool do_heap_region(HeapRegion* r) { 630 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 631 632 HeapWord* cur = r->bottom(); 633 HeapWord* const end = r->end(); 634 635 while (cur < end) { 636 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 637 _bitmap->clear_range(mr); 638 639 cur += chunk_size_in_words; 640 641 // Abort iteration if after yielding the marking has been aborted. 642 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 643 return true; 644 } 645 // Repeat the asserts from before the start of the closure. We will do them 646 // as asserts here to minimize their overhead on the product. However, we 647 // will have them as guarantees at the beginning / end of the bitmap 648 // clearing to get some checking in the product. 649 assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant"); 650 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 651 } 652 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 653 654 return false; 655 } 656 }; 657 658 G1ClearBitmapHRClosure _cl; 659 HeapRegionClaimer _hr_claimer; 660 bool _suspendible; // If the task is suspendible, workers must join the STS. 661 662 public: 663 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 664 AbstractGangTask("G1 Clear Bitmap"), 665 _cl(bitmap, suspendible ? cm : NULL), 666 _hr_claimer(n_workers), 667 _suspendible(suspendible) 668 { } 669 670 void work(uint worker_id) { 671 SuspendibleThreadSetJoiner sts_join(_suspendible); 672 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id); 673 } 674 675 bool is_complete() { 676 return _cl.is_complete(); 677 } 678 }; 679 680 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 681 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 682 683 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 684 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 685 686 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 687 688 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 689 690 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 691 workers->run_task(&cl, num_workers); 692 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 693 } 694 695 void G1ConcurrentMark::cleanup_for_next_mark() { 696 // Make sure that the concurrent mark thread looks to still be in 697 // the current cycle. 698 guarantee(cm_thread()->during_cycle(), "invariant"); 699 700 // We are finishing up the current cycle by clearing the next 701 // marking bitmap and getting it ready for the next cycle. During 702 // this time no other cycle can start. So, let's make sure that this 703 // is the case. 704 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 705 706 clear_bitmap(_next_mark_bitmap, _concurrent_workers, true); 707 708 // Repeat the asserts from above. 709 guarantee(cm_thread()->during_cycle(), "invariant"); 710 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 711 } 712 713 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 714 assert_at_safepoint_on_vm_thread(); 715 clear_bitmap(_prev_mark_bitmap, workers, false); 716 } 717 718 class CheckBitmapClearHRClosure : public HeapRegionClosure { 719 G1CMBitMap* _bitmap; 720 public: 721 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 722 } 723 724 virtual bool do_heap_region(HeapRegion* r) { 725 // This closure can be called concurrently to the mutator, so we must make sure 726 // that the result of the getNextMarkedWordAddress() call is compared to the 727 // value passed to it as limit to detect any found bits. 728 // end never changes in G1. 729 HeapWord* end = r->end(); 730 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 731 } 732 }; 733 734 bool G1ConcurrentMark::next_mark_bitmap_is_clear() { 735 CheckBitmapClearHRClosure cl(_next_mark_bitmap); 736 _g1h->heap_region_iterate(&cl); 737 return cl.is_complete(); 738 } 739 740 class NoteStartOfMarkHRClosure : public HeapRegionClosure { 741 public: 742 bool do_heap_region(HeapRegion* r) { 743 r->note_start_of_marking(); 744 return false; 745 } 746 }; 747 748 void G1ConcurrentMark::pre_initial_mark() { 749 // Initialize marking structures. This has to be done in a STW phase. 750 reset(); 751 752 // For each region note start of marking. 753 NoteStartOfMarkHRClosure startcl; 754 _g1h->heap_region_iterate(&startcl); 755 } 756 757 758 void G1ConcurrentMark::post_initial_mark() { 759 // Start Concurrent Marking weak-reference discovery. 760 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 761 // enable ("weak") refs discovery 762 rp->enable_discovery(); 763 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 764 765 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 766 // This is the start of the marking cycle, we're expected all 767 // threads to have SATB queues with active set to false. 768 satb_mq_set.set_active_all_threads(true, /* new active value */ 769 false /* expected_active */); 770 771 _root_regions.prepare_for_scan(); 772 773 // update_g1_committed() will be called at the end of an evac pause 774 // when marking is on. So, it's also called at the end of the 775 // initial-mark pause to update the heap end, if the heap expands 776 // during it. No need to call it here. 777 } 778 779 /* 780 * Notice that in the next two methods, we actually leave the STS 781 * during the barrier sync and join it immediately afterwards. If we 782 * do not do this, the following deadlock can occur: one thread could 783 * be in the barrier sync code, waiting for the other thread to also 784 * sync up, whereas another one could be trying to yield, while also 785 * waiting for the other threads to sync up too. 786 * 787 * Note, however, that this code is also used during remark and in 788 * this case we should not attempt to leave / enter the STS, otherwise 789 * we'll either hit an assert (debug / fastdebug) or deadlock 790 * (product). So we should only leave / enter the STS if we are 791 * operating concurrently. 792 * 793 * Because the thread that does the sync barrier has left the STS, it 794 * is possible to be suspended for a Full GC or an evacuation pause 795 * could occur. This is actually safe, since the entering the sync 796 * barrier is one of the last things do_marking_step() does, and it 797 * doesn't manipulate any data structures afterwards. 798 */ 799 800 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 801 bool barrier_aborted; 802 { 803 SuspendibleThreadSetLeaver sts_leave(concurrent()); 804 barrier_aborted = !_first_overflow_barrier_sync.enter(); 805 } 806 807 // at this point everyone should have synced up and not be doing any 808 // more work 809 810 if (barrier_aborted) { 811 // If the barrier aborted we ignore the overflow condition and 812 // just abort the whole marking phase as quickly as possible. 813 return; 814 } 815 } 816 817 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 818 SuspendibleThreadSetLeaver sts_leave(concurrent()); 819 _second_overflow_barrier_sync.enter(); 820 821 // at this point everything should be re-initialized and ready to go 822 } 823 824 class G1CMConcurrentMarkingTask : public AbstractGangTask { 825 G1ConcurrentMark* _cm; 826 827 public: 828 void work(uint worker_id) { 829 assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread"); 830 ResourceMark rm; 831 832 double start_vtime = os::elapsedVTime(); 833 834 { 835 SuspendibleThreadSetJoiner sts_join; 836 837 assert(worker_id < _cm->active_tasks(), "invariant"); 838 839 G1CMTask* task = _cm->task(worker_id); 840 task->record_start_time(); 841 if (!_cm->has_aborted()) { 842 do { 843 task->do_marking_step(G1ConcMarkStepDurationMillis, 844 true /* do_termination */, 845 false /* is_serial*/); 846 847 _cm->do_yield_check(); 848 } while (!_cm->has_aborted() && task->has_aborted()); 849 } 850 task->record_end_time(); 851 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant"); 852 } 853 854 double end_vtime = os::elapsedVTime(); 855 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 856 } 857 858 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) : 859 AbstractGangTask("Concurrent Mark"), _cm(cm) { } 860 861 ~G1CMConcurrentMarkingTask() { } 862 }; 863 864 uint G1ConcurrentMark::calc_active_marking_workers() { 865 uint result = 0; 866 if (!UseDynamicNumberOfGCThreads || 867 (!FLAG_IS_DEFAULT(ConcGCThreads) && 868 !ForceDynamicNumberOfGCThreads)) { 869 result = _max_concurrent_workers; 870 } else { 871 result = 872 AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers, 873 1, /* Minimum workers */ 874 _num_concurrent_workers, 875 Threads::number_of_non_daemon_threads()); 876 // Don't scale the result down by scale_concurrent_workers() because 877 // that scaling has already gone into "_max_concurrent_workers". 878 } 879 assert(result > 0 && result <= _max_concurrent_workers, 880 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u", 881 _max_concurrent_workers, result); 882 return result; 883 } 884 885 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) { 886 // Currently, only survivors can be root regions. 887 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 888 G1RootRegionScanClosure cl(_g1h, this, worker_id); 889 890 const uintx interval = PrefetchScanIntervalInBytes; 891 HeapWord* curr = hr->bottom(); 892 const HeapWord* end = hr->top(); 893 while (curr < end) { 894 Prefetch::read(curr, interval); 895 oop obj = oop(curr); 896 int size = obj->oop_iterate_size(&cl); 897 assert(size == obj->size(), "sanity"); 898 curr += size; 899 } 900 } 901 902 class G1CMRootRegionScanTask : public AbstractGangTask { 903 G1ConcurrentMark* _cm; 904 public: 905 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 906 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 907 908 void work(uint worker_id) { 909 assert(Thread::current()->is_ConcurrentGC_thread(), 910 "this should only be done by a conc GC thread"); 911 912 G1CMRootRegions* root_regions = _cm->root_regions(); 913 HeapRegion* hr = root_regions->claim_next(); 914 while (hr != NULL) { 915 _cm->scan_root_region(hr, worker_id); 916 hr = root_regions->claim_next(); 917 } 918 } 919 }; 920 921 void G1ConcurrentMark::scan_root_regions() { 922 // scan_in_progress() will have been set to true only if there was 923 // at least one root region to scan. So, if it's false, we 924 // should not attempt to do any further work. 925 if (root_regions()->scan_in_progress()) { 926 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 927 928 _num_concurrent_workers = MIN2(calc_active_marking_workers(), 929 // We distribute work on a per-region basis, so starting 930 // more threads than that is useless. 931 root_regions()->num_root_regions()); 932 assert(_num_concurrent_workers <= _max_concurrent_workers, 933 "Maximum number of marking threads exceeded"); 934 935 G1CMRootRegionScanTask task(this); 936 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 937 task.name(), _num_concurrent_workers, root_regions()->num_root_regions()); 938 _concurrent_workers->run_task(&task, _num_concurrent_workers); 939 940 // It's possible that has_aborted() is true here without actually 941 // aborting the survivor scan earlier. This is OK as it's 942 // mainly used for sanity checking. 943 root_regions()->scan_finished(); 944 } 945 } 946 947 void G1ConcurrentMark::concurrent_cycle_start() { 948 _gc_timer_cm->register_gc_start(); 949 950 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 951 952 _g1h->trace_heap_before_gc(_gc_tracer_cm); 953 } 954 955 void G1ConcurrentMark::concurrent_cycle_end() { 956 _g1h->collector_state()->set_clearing_next_bitmap(false); 957 958 _g1h->trace_heap_after_gc(_gc_tracer_cm); 959 960 if (has_aborted()) { 961 log_info(gc, marking)("Concurrent Mark Abort"); 962 _gc_tracer_cm->report_concurrent_mode_failure(); 963 } 964 965 _gc_timer_cm->register_gc_end(); 966 967 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 968 } 969 970 void G1ConcurrentMark::mark_from_roots() { 971 _restart_for_overflow = false; 972 973 _num_concurrent_workers = calc_active_marking_workers(); 974 975 uint active_workers = MAX2(1U, _num_concurrent_workers); 976 977 // Setting active workers is not guaranteed since fewer 978 // worker threads may currently exist and more may not be 979 // available. 980 active_workers = _concurrent_workers->update_active_workers(active_workers); 981 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers()); 982 983 // Parallel task terminator is set in "set_concurrency_and_phase()" 984 set_concurrency_and_phase(active_workers, true /* concurrent */); 985 986 G1CMConcurrentMarkingTask marking_task(this); 987 _concurrent_workers->run_task(&marking_task); 988 print_stats(); 989 } 990 991 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) { 992 G1HeapVerifier* verifier = _g1h->verifier(); 993 994 verifier->verify_region_sets_optional(); 995 996 if (VerifyDuringGC) { 997 GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm); 998 999 size_t const BufLen = 512; 1000 char buffer[BufLen]; 1001 1002 jio_snprintf(buffer, BufLen, "During GC (%s)", caller); 1003 verifier->verify(type, vo, buffer); 1004 } 1005 1006 verifier->check_bitmaps(caller); 1007 } 1008 1009 class G1UpdateRemSetTrackingBeforeRebuildTask : public AbstractGangTask { 1010 G1CollectedHeap* _g1h; 1011 G1ConcurrentMark* _cm; 1012 HeapRegionClaimer _hrclaimer; 1013 uint _total_selected_for_rebuild; 1014 1015 G1PrintRegionLivenessInfoClosure _cl; 1016 1017 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure { 1018 G1CollectedHeap* _g1h; 1019 G1ConcurrentMark* _cm; 1020 1021 G1PrintRegionLivenessInfoClosure* _cl; 1022 1023 uint _num_regions_selected_for_rebuild; // The number of regions actually selected for rebuild. 1024 1025 void update_remset_before_rebuild(HeapRegion * hr) { 1026 G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker(); 1027 1028 size_t const live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize; 1029 bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes); 1030 if (selected_for_rebuild) { 1031 _num_regions_selected_for_rebuild++; 1032 } 1033 _cm->update_top_at_rebuild_start(hr); 1034 } 1035 1036 // Distribute the given words across the humongous object starting with hr and 1037 // note end of marking. 1038 void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) { 1039 uint const region_idx = hr->hrm_index(); 1040 size_t const obj_size_in_words = (size_t)oop(hr->bottom())->size(); 1041 uint const num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(obj_size_in_words); 1042 1043 // "Distributing" zero words means that we only note end of marking for these 1044 // regions. 1045 assert(marked_words == 0 || obj_size_in_words == marked_words, 1046 "Marked words should either be 0 or the same as humongous object (" SIZE_FORMAT ") but is " SIZE_FORMAT, 1047 obj_size_in_words, marked_words); 1048 1049 for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) { 1050 HeapRegion* const r = _g1h->region_at(i); 1051 size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words); 1052 1053 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to humongous region %u (%s)", 1054 words_to_add, i, r->get_type_str()); 1055 add_marked_bytes_and_note_end(r, words_to_add * HeapWordSize); 1056 marked_words -= words_to_add; 1057 } 1058 assert(marked_words == 0, 1059 SIZE_FORMAT " words left after distributing space across %u regions", 1060 marked_words, num_regions_in_humongous); 1061 } 1062 1063 void update_marked_bytes(HeapRegion* hr) { 1064 uint const region_idx = hr->hrm_index(); 1065 size_t const marked_words = _cm->liveness(region_idx); 1066 // The marking attributes the object's size completely to the humongous starts 1067 // region. We need to distribute this value across the entire set of regions a 1068 // humongous object spans. 1069 if (hr->is_humongous()) { 1070 assert(hr->is_starts_humongous() || marked_words == 0, 1071 "Should not have marked words " SIZE_FORMAT " in non-starts humongous region %u (%s)", 1072 marked_words, region_idx, hr->get_type_str()); 1073 if (hr->is_starts_humongous()) { 1074 distribute_marked_bytes(hr, marked_words); 1075 } 1076 } else { 1077 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1078 add_marked_bytes_and_note_end(hr, marked_words * HeapWordSize); 1079 } 1080 } 1081 1082 void add_marked_bytes_and_note_end(HeapRegion* hr, size_t marked_bytes) { 1083 hr->add_to_marked_bytes(marked_bytes); 1084 _cl->do_heap_region(hr); 1085 hr->note_end_of_marking(); 1086 } 1087 1088 public: 1089 G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm, G1PrintRegionLivenessInfoClosure* cl) : 1090 _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0), _cl(cl) { } 1091 1092 virtual bool do_heap_region(HeapRegion* r) { 1093 update_remset_before_rebuild(r); 1094 update_marked_bytes(r); 1095 1096 return false; 1097 } 1098 1099 uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; } 1100 }; 1101 1102 public: 1103 G1UpdateRemSetTrackingBeforeRebuildTask(G1CollectedHeap* g1h, G1ConcurrentMark* cm, uint num_workers) : 1104 AbstractGangTask("G1 Update RemSet Tracking Before Rebuild"), 1105 _g1h(g1h), _cm(cm), _hrclaimer(num_workers), _total_selected_for_rebuild(0), _cl("Post-Marking") { } 1106 1107 virtual void work(uint worker_id) { 1108 G1UpdateRemSetTrackingBeforeRebuild update_cl(_g1h, _cm, &_cl); 1109 _g1h->heap_region_par_iterate_from_worker_offset(&update_cl, &_hrclaimer, worker_id); 1110 Atomic::add(update_cl.num_selected_for_rebuild(), &_total_selected_for_rebuild); 1111 } 1112 1113 uint total_selected_for_rebuild() const { return _total_selected_for_rebuild; } 1114 1115 // Number of regions for which roughly one thread should be spawned for this work. 1116 static const uint RegionsPerThread = 384; 1117 }; 1118 1119 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure { 1120 G1CollectedHeap* _g1h; 1121 public: 1122 G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { } 1123 1124 virtual bool do_heap_region(HeapRegion* r) { 1125 _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r); 1126 return false; 1127 } 1128 }; 1129 1130 void G1ConcurrentMark::remark() { 1131 assert_at_safepoint_on_vm_thread(); 1132 1133 // If a full collection has happened, we should not continue. However we might 1134 // have ended up here as the Remark VM operation has been scheduled already. 1135 if (has_aborted()) { 1136 return; 1137 } 1138 1139 G1Policy* g1p = _g1h->g1_policy(); 1140 g1p->record_concurrent_mark_remark_start(); 1141 1142 double start = os::elapsedTime(); 1143 1144 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before"); 1145 1146 { 1147 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1148 finalize_marking(); 1149 } 1150 1151 double mark_work_end = os::elapsedTime(); 1152 1153 bool const mark_finished = !has_overflown(); 1154 if (mark_finished) { 1155 weak_refs_work(false /* clear_all_soft_refs */); 1156 1157 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1158 // We're done with marking. 1159 // This is the end of the marking cycle, we're expected all 1160 // threads to have SATB queues with active set to true. 1161 satb_mq_set.set_active_all_threads(false, /* new active value */ 1162 true /* expected_active */); 1163 1164 { 1165 GCTraceTime(Debug, gc, phases) trace("Flush Task Caches", _gc_timer_cm); 1166 flush_all_task_caches(); 1167 } 1168 1169 // Install newly created mark bitmap as "prev". 1170 swap_mark_bitmaps(); 1171 { 1172 GCTraceTime(Debug, gc, phases) trace("Update Remembered Set Tracking Before Rebuild", _gc_timer_cm); 1173 1174 uint const workers_by_capacity = (_g1h->num_regions() + G1UpdateRemSetTrackingBeforeRebuildTask::RegionsPerThread - 1) / 1175 G1UpdateRemSetTrackingBeforeRebuildTask::RegionsPerThread; 1176 uint const num_workers = MIN2(_g1h->workers()->active_workers(), workers_by_capacity); 1177 1178 G1UpdateRemSetTrackingBeforeRebuildTask cl(_g1h, this, num_workers); 1179 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_regions()); 1180 _g1h->workers()->run_task(&cl, num_workers); 1181 1182 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u", 1183 _g1h->num_regions(), cl.total_selected_for_rebuild()); 1184 } 1185 { 1186 GCTraceTime(Debug, gc, phases) trace("Reclaim Empty Regions", _gc_timer_cm); 1187 reclaim_empty_regions(); 1188 } 1189 1190 // Clean out dead classes 1191 if (ClassUnloadingWithConcurrentMark) { 1192 GCTraceTime(Debug, gc, phases) trace("Purge Metaspace", _gc_timer_cm); 1193 ClassLoaderDataGraph::purge(); 1194 } 1195 1196 compute_new_sizes(); 1197 1198 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after"); 1199 1200 assert(!restart_for_overflow(), "sanity"); 1201 // Completely reset the marking state since marking completed 1202 reset_at_marking_complete(); 1203 } else { 1204 // We overflowed. Restart concurrent marking. 1205 _restart_for_overflow = true; 1206 1207 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow"); 1208 1209 // Clear the marking state because we will be restarting 1210 // marking due to overflowing the global mark stack. 1211 reset_marking_for_restart(); 1212 } 1213 1214 { 1215 GCTraceTime(Debug, gc, phases) trace("Report Object Count", _gc_timer_cm); 1216 report_object_count(mark_finished); 1217 } 1218 1219 // Statistics 1220 double now = os::elapsedTime(); 1221 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1222 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1223 _remark_times.add((now - start) * 1000.0); 1224 1225 g1p->record_concurrent_mark_remark_end(); 1226 } 1227 1228 class G1ReclaimEmptyRegionsTask : public AbstractGangTask { 1229 // Per-region work during the Cleanup pause. 1230 class G1ReclaimEmptyRegionsClosure : public HeapRegionClosure { 1231 G1CollectedHeap* _g1h; 1232 size_t _freed_bytes; 1233 FreeRegionList* _local_cleanup_list; 1234 uint _old_regions_removed; 1235 uint _humongous_regions_removed; 1236 HRRSCleanupTask* _hrrs_cleanup_task; 1237 1238 public: 1239 G1ReclaimEmptyRegionsClosure(G1CollectedHeap* g1h, 1240 FreeRegionList* local_cleanup_list, 1241 HRRSCleanupTask* hrrs_cleanup_task) : 1242 _g1h(g1h), 1243 _freed_bytes(0), 1244 _local_cleanup_list(local_cleanup_list), 1245 _old_regions_removed(0), 1246 _humongous_regions_removed(0), 1247 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1248 1249 size_t freed_bytes() { return _freed_bytes; } 1250 const uint old_regions_removed() { return _old_regions_removed; } 1251 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1252 1253 bool do_heap_region(HeapRegion *hr) { 1254 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1255 _freed_bytes += hr->used(); 1256 hr->set_containing_set(NULL); 1257 if (hr->is_humongous()) { 1258 _humongous_regions_removed++; 1259 _g1h->free_humongous_region(hr, _local_cleanup_list); 1260 } else { 1261 _old_regions_removed++; 1262 _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */); 1263 } 1264 hr->clear_cardtable(); 1265 _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index()); 1266 log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom())); 1267 } else { 1268 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1269 } 1270 1271 return false; 1272 } 1273 }; 1274 1275 G1CollectedHeap* _g1h; 1276 FreeRegionList* _cleanup_list; 1277 HeapRegionClaimer _hrclaimer; 1278 1279 public: 1280 G1ReclaimEmptyRegionsTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1281 AbstractGangTask("G1 Cleanup"), 1282 _g1h(g1h), 1283 _cleanup_list(cleanup_list), 1284 _hrclaimer(n_workers) { 1285 1286 HeapRegionRemSet::reset_for_cleanup_tasks(); 1287 } 1288 1289 void work(uint worker_id) { 1290 FreeRegionList local_cleanup_list("Local Cleanup List"); 1291 HRRSCleanupTask hrrs_cleanup_task; 1292 G1ReclaimEmptyRegionsClosure cl(_g1h, 1293 &local_cleanup_list, 1294 &hrrs_cleanup_task); 1295 _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id); 1296 assert(cl.is_complete(), "Shouldn't have aborted!"); 1297 1298 // Now update the old/humongous region sets 1299 _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed()); 1300 { 1301 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1302 _g1h->decrement_summary_bytes(cl.freed_bytes()); 1303 1304 _cleanup_list->add_ordered(&local_cleanup_list); 1305 assert(local_cleanup_list.is_empty(), "post-condition"); 1306 1307 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1308 } 1309 } 1310 }; 1311 1312 void G1ConcurrentMark::reclaim_empty_regions() { 1313 WorkGang* workers = _g1h->workers(); 1314 FreeRegionList empty_regions_list("Empty Regions After Mark List"); 1315 1316 G1ReclaimEmptyRegionsTask cl(_g1h, &empty_regions_list, workers->active_workers()); 1317 workers->run_task(&cl); 1318 1319 if (!empty_regions_list.is_empty()) { 1320 log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length()); 1321 // Now print the empty regions list. 1322 G1HRPrinter* hrp = _g1h->hr_printer(); 1323 if (hrp->is_active()) { 1324 FreeRegionListIterator iter(&empty_regions_list); 1325 while (iter.more_available()) { 1326 HeapRegion* hr = iter.get_next(); 1327 hrp->cleanup(hr); 1328 } 1329 } 1330 // And actually make them available. 1331 _g1h->prepend_to_freelist(&empty_regions_list); 1332 } 1333 } 1334 1335 void G1ConcurrentMark::compute_new_sizes() { 1336 MetaspaceGC::compute_new_size(); 1337 1338 // Cleanup will have freed any regions completely full of garbage. 1339 // Update the soft reference policy with the new heap occupancy. 1340 Universe::update_heap_info_at_gc(); 1341 1342 // We reclaimed old regions so we should calculate the sizes to make 1343 // sure we update the old gen/space data. 1344 _g1h->g1mm()->update_sizes(); 1345 } 1346 1347 void G1ConcurrentMark::cleanup() { 1348 assert_at_safepoint_on_vm_thread(); 1349 1350 // If a full collection has happened, we shouldn't do this. 1351 if (has_aborted()) { 1352 return; 1353 } 1354 1355 G1Policy* g1p = _g1h->g1_policy(); 1356 g1p->record_concurrent_mark_cleanup_start(); 1357 1358 double start = os::elapsedTime(); 1359 1360 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before"); 1361 1362 { 1363 GCTraceTime(Debug, gc, phases) trace("Update Remembered Set Tracking After Rebuild", _gc_timer_cm); 1364 G1UpdateRemSetTrackingAfterRebuild cl(_g1h); 1365 _g1h->heap_region_iterate(&cl); 1366 } 1367 1368 if (log_is_enabled(Trace, gc, liveness)) { 1369 G1PrintRegionLivenessInfoClosure cl("Post-Cleanup"); 1370 _g1h->heap_region_iterate(&cl); 1371 } 1372 1373 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after"); 1374 1375 // We need to make this be a "collection" so any collection pause that 1376 // races with it goes around and waits for Cleanup to finish. 1377 _g1h->increment_total_collections(); 1378 1379 // Local statistics 1380 double recent_cleanup_time = (os::elapsedTime() - start); 1381 _total_cleanup_time += recent_cleanup_time; 1382 _cleanup_times.add(recent_cleanup_time); 1383 1384 { 1385 GCTraceTime(Debug, gc, phases) trace("Finalize Concurrent Mark Cleanup", _gc_timer_cm); 1386 _g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1387 } 1388 } 1389 1390 // Supporting Object and Oop closures for reference discovery 1391 // and processing in during marking 1392 1393 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1394 HeapWord* addr = (HeapWord*)obj; 1395 return addr != NULL && 1396 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj)); 1397 } 1398 1399 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1400 // Uses the G1CMTask associated with a worker thread (for serial reference 1401 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1402 // trace referent objects. 1403 // 1404 // Using the G1CMTask and embedded local queues avoids having the worker 1405 // threads operating on the global mark stack. This reduces the risk 1406 // of overflowing the stack - which we would rather avoid at this late 1407 // state. Also using the tasks' local queues removes the potential 1408 // of the workers interfering with each other that could occur if 1409 // operating on the global stack. 1410 1411 class G1CMKeepAliveAndDrainClosure : public OopClosure { 1412 G1ConcurrentMark* _cm; 1413 G1CMTask* _task; 1414 int _ref_counter_limit; 1415 int _ref_counter; 1416 bool _is_serial; 1417 public: 1418 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1419 _cm(cm), _task(task), _is_serial(is_serial), 1420 _ref_counter_limit(G1RefProcDrainInterval) { 1421 assert(_ref_counter_limit > 0, "sanity"); 1422 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1423 _ref_counter = _ref_counter_limit; 1424 } 1425 1426 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1427 virtual void do_oop( oop* p) { do_oop_work(p); } 1428 1429 template <class T> void do_oop_work(T* p) { 1430 if (_cm->has_overflown()) { 1431 return; 1432 } 1433 if (!_task->deal_with_reference(p)) { 1434 // We did not add anything to the mark bitmap (or mark stack), so there is 1435 // no point trying to drain it. 1436 return; 1437 } 1438 _ref_counter--; 1439 1440 if (_ref_counter == 0) { 1441 // We have dealt with _ref_counter_limit references, pushing them 1442 // and objects reachable from them on to the local stack (and 1443 // possibly the global stack). Call G1CMTask::do_marking_step() to 1444 // process these entries. 1445 // 1446 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1447 // there's nothing more to do (i.e. we're done with the entries that 1448 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1449 // above) or we overflow. 1450 // 1451 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1452 // flag while there may still be some work to do. (See the comment at 1453 // the beginning of G1CMTask::do_marking_step() for those conditions - 1454 // one of which is reaching the specified time target.) It is only 1455 // when G1CMTask::do_marking_step() returns without setting the 1456 // has_aborted() flag that the marking step has completed. 1457 do { 1458 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1459 _task->do_marking_step(mark_step_duration_ms, 1460 false /* do_termination */, 1461 _is_serial); 1462 } while (_task->has_aborted() && !_cm->has_overflown()); 1463 _ref_counter = _ref_counter_limit; 1464 } 1465 } 1466 }; 1467 1468 // 'Drain' oop closure used by both serial and parallel reference processing. 1469 // Uses the G1CMTask associated with a given worker thread (for serial 1470 // reference processing the G1CMtask for worker 0 is used). Calls the 1471 // do_marking_step routine, with an unbelievably large timeout value, 1472 // to drain the marking data structures of the remaining entries 1473 // added by the 'keep alive' oop closure above. 1474 1475 class G1CMDrainMarkingStackClosure : public VoidClosure { 1476 G1ConcurrentMark* _cm; 1477 G1CMTask* _task; 1478 bool _is_serial; 1479 public: 1480 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1481 _cm(cm), _task(task), _is_serial(is_serial) { 1482 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1483 } 1484 1485 void do_void() { 1486 do { 1487 // We call G1CMTask::do_marking_step() to completely drain the local 1488 // and global marking stacks of entries pushed by the 'keep alive' 1489 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1490 // 1491 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1492 // if there's nothing more to do (i.e. we've completely drained the 1493 // entries that were pushed as a a result of applying the 'keep alive' 1494 // closure to the entries on the discovered ref lists) or we overflow 1495 // the global marking stack. 1496 // 1497 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1498 // flag while there may still be some work to do. (See the comment at 1499 // the beginning of G1CMTask::do_marking_step() for those conditions - 1500 // one of which is reaching the specified time target.) It is only 1501 // when G1CMTask::do_marking_step() returns without setting the 1502 // has_aborted() flag that the marking step has completed. 1503 1504 _task->do_marking_step(1000000000.0 /* something very large */, 1505 true /* do_termination */, 1506 _is_serial); 1507 } while (_task->has_aborted() && !_cm->has_overflown()); 1508 } 1509 }; 1510 1511 // Implementation of AbstractRefProcTaskExecutor for parallel 1512 // reference processing at the end of G1 concurrent marking 1513 1514 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor { 1515 private: 1516 G1CollectedHeap* _g1h; 1517 G1ConcurrentMark* _cm; 1518 WorkGang* _workers; 1519 uint _active_workers; 1520 1521 public: 1522 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1523 G1ConcurrentMark* cm, 1524 WorkGang* workers, 1525 uint n_workers) : 1526 _g1h(g1h), _cm(cm), 1527 _workers(workers), _active_workers(n_workers) { } 1528 1529 // Executes the given task using concurrent marking worker threads. 1530 virtual void execute(ProcessTask& task); 1531 virtual void execute(EnqueueTask& task); 1532 }; 1533 1534 class G1CMRefProcTaskProxy : public AbstractGangTask { 1535 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1536 ProcessTask& _proc_task; 1537 G1CollectedHeap* _g1h; 1538 G1ConcurrentMark* _cm; 1539 1540 public: 1541 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1542 G1CollectedHeap* g1h, 1543 G1ConcurrentMark* cm) : 1544 AbstractGangTask("Process reference objects in parallel"), 1545 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1546 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1547 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1548 } 1549 1550 virtual void work(uint worker_id) { 1551 ResourceMark rm; 1552 HandleMark hm; 1553 G1CMTask* task = _cm->task(worker_id); 1554 G1CMIsAliveClosure g1_is_alive(_g1h); 1555 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1556 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1557 1558 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1559 } 1560 }; 1561 1562 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1563 assert(_workers != NULL, "Need parallel worker threads."); 1564 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1565 1566 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1567 1568 // We need to reset the concurrency level before each 1569 // proxy task execution, so that the termination protocol 1570 // and overflow handling in G1CMTask::do_marking_step() knows 1571 // how many workers to wait for. 1572 _cm->set_concurrency(_active_workers); 1573 _workers->run_task(&proc_task_proxy); 1574 } 1575 1576 class G1CMRefEnqueueTaskProxy : public AbstractGangTask { 1577 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1578 EnqueueTask& _enq_task; 1579 1580 public: 1581 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1582 AbstractGangTask("Enqueue reference objects in parallel"), 1583 _enq_task(enq_task) { } 1584 1585 virtual void work(uint worker_id) { 1586 _enq_task.work(worker_id); 1587 } 1588 }; 1589 1590 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1591 assert(_workers != NULL, "Need parallel worker threads."); 1592 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1593 1594 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1595 1596 // Not strictly necessary but... 1597 // 1598 // We need to reset the concurrency level before each 1599 // proxy task execution, so that the termination protocol 1600 // and overflow handling in G1CMTask::do_marking_step() knows 1601 // how many workers to wait for. 1602 _cm->set_concurrency(_active_workers); 1603 _workers->run_task(&enq_task_proxy); 1604 } 1605 1606 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) { 1607 ResourceMark rm; 1608 HandleMark hm; 1609 1610 // Is alive closure. 1611 G1CMIsAliveClosure g1_is_alive(_g1h); 1612 1613 // Inner scope to exclude the cleaning of the string and symbol 1614 // tables from the displayed time. 1615 { 1616 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1617 1618 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1619 1620 // See the comment in G1CollectedHeap::ref_processing_init() 1621 // about how reference processing currently works in G1. 1622 1623 // Set the soft reference policy 1624 rp->setup_policy(clear_all_soft_refs); 1625 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1626 1627 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1628 // in serial reference processing. Note these closures are also 1629 // used for serially processing (by the the current thread) the 1630 // JNI references during parallel reference processing. 1631 // 1632 // These closures do not need to synchronize with the worker 1633 // threads involved in parallel reference processing as these 1634 // instances are executed serially by the current thread (e.g. 1635 // reference processing is not multi-threaded and is thus 1636 // performed by the current thread instead of a gang worker). 1637 // 1638 // The gang tasks involved in parallel reference processing create 1639 // their own instances of these closures, which do their own 1640 // synchronization among themselves. 1641 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1642 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1643 1644 // We need at least one active thread. If reference processing 1645 // is not multi-threaded we use the current (VMThread) thread, 1646 // otherwise we use the work gang from the G1CollectedHeap and 1647 // we utilize all the worker threads we can. 1648 bool processing_is_mt = rp->processing_is_mt(); 1649 uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U); 1650 active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U); 1651 1652 // Parallel processing task executor. 1653 G1CMRefProcTaskExecutor par_task_executor(_g1h, this, 1654 _g1h->workers(), active_workers); 1655 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1656 1657 // Set the concurrency level. The phase was already set prior to 1658 // executing the remark task. 1659 set_concurrency(active_workers); 1660 1661 // Set the degree of MT processing here. If the discovery was done MT, 1662 // the number of threads involved during discovery could differ from 1663 // the number of active workers. This is OK as long as the discovered 1664 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1665 rp->set_active_mt_degree(active_workers); 1666 1667 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1668 1669 // Process the weak references. 1670 const ReferenceProcessorStats& stats = 1671 rp->process_discovered_references(&g1_is_alive, 1672 &g1_keep_alive, 1673 &g1_drain_mark_stack, 1674 executor, 1675 &pt); 1676 _gc_tracer_cm->report_gc_reference_stats(stats); 1677 pt.print_all_references(); 1678 1679 // The do_oop work routines of the keep_alive and drain_marking_stack 1680 // oop closures will set the has_overflown flag if we overflow the 1681 // global marking stack. 1682 1683 assert(has_overflown() || _global_mark_stack.is_empty(), 1684 "Mark stack should be empty (unless it has overflown)"); 1685 1686 assert(rp->num_q() == active_workers, "why not"); 1687 1688 rp->enqueue_discovered_references(executor, &pt); 1689 1690 rp->verify_no_references_recorded(); 1691 1692 pt.print_enqueue_phase(); 1693 1694 assert(!rp->discovery_enabled(), "Post condition"); 1695 } 1696 1697 assert(has_overflown() || _global_mark_stack.is_empty(), 1698 "Mark stack should be empty (unless it has overflown)"); 1699 1700 { 1701 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm); 1702 WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl); 1703 } 1704 1705 if (has_overflown()) { 1706 // We can not trust g1_is_alive if the marking stack overflowed 1707 return; 1708 } 1709 1710 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1711 1712 // Unload Klasses, String, Symbols, Code Cache, etc. 1713 if (ClassUnloadingWithConcurrentMark) { 1714 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1715 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1716 _g1h->complete_cleaning(&g1_is_alive, purged_classes); 1717 } else { 1718 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1719 // No need to clean string table and symbol table as they are treated as strong roots when 1720 // class unloading is disabled. 1721 _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1722 } 1723 } 1724 1725 // When sampling object counts, we already swapped the mark bitmaps, so we need to use 1726 // the prev bitmap determining liveness. 1727 class G1ObjectCountIsAliveClosure: public BoolObjectClosure { 1728 G1CollectedHeap* _g1h; 1729 public: 1730 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) { } 1731 1732 bool do_object_b(oop obj) { 1733 HeapWord* addr = (HeapWord*)obj; 1734 return addr != NULL && 1735 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_dead(obj)); 1736 } 1737 }; 1738 1739 void G1ConcurrentMark::report_object_count(bool mark_completed) { 1740 // Depending on the completion of the marking liveness needs to be determined 1741 // using either the next or prev bitmap. 1742 if (mark_completed) { 1743 G1ObjectCountIsAliveClosure is_alive(_g1h); 1744 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1745 } else { 1746 G1CMIsAliveClosure is_alive(_g1h); 1747 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1748 } 1749 } 1750 1751 1752 void G1ConcurrentMark::swap_mark_bitmaps() { 1753 G1CMBitMap* temp = _prev_mark_bitmap; 1754 _prev_mark_bitmap = _next_mark_bitmap; 1755 _next_mark_bitmap = temp; 1756 _g1h->collector_state()->set_clearing_next_bitmap(true); 1757 } 1758 1759 // Closure for marking entries in SATB buffers. 1760 class G1CMSATBBufferClosure : public SATBBufferClosure { 1761 private: 1762 G1CMTask* _task; 1763 G1CollectedHeap* _g1h; 1764 1765 // This is very similar to G1CMTask::deal_with_reference, but with 1766 // more relaxed requirements for the argument, so this must be more 1767 // circumspect about treating the argument as an object. 1768 void do_entry(void* entry) const { 1769 _task->increment_refs_reached(); 1770 oop const obj = static_cast<oop>(entry); 1771 _task->make_reference_grey(obj); 1772 } 1773 1774 public: 1775 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1776 : _task(task), _g1h(g1h) { } 1777 1778 virtual void do_buffer(void** buffer, size_t size) { 1779 for (size_t i = 0; i < size; ++i) { 1780 do_entry(buffer[i]); 1781 } 1782 } 1783 }; 1784 1785 class G1RemarkThreadsClosure : public ThreadClosure { 1786 G1CMSATBBufferClosure _cm_satb_cl; 1787 G1CMOopClosure _cm_cl; 1788 MarkingCodeBlobClosure _code_cl; 1789 int _thread_parity; 1790 1791 public: 1792 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1793 _cm_satb_cl(task, g1h), 1794 _cm_cl(g1h, task), 1795 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1796 _thread_parity(Threads::thread_claim_parity()) {} 1797 1798 void do_thread(Thread* thread) { 1799 if (thread->is_Java_thread()) { 1800 if (thread->claim_oops_do(true, _thread_parity)) { 1801 JavaThread* jt = (JavaThread*)thread; 1802 1803 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1804 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1805 // * Alive if on the stack of an executing method 1806 // * Weakly reachable otherwise 1807 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1808 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1809 jt->nmethods_do(&_code_cl); 1810 1811 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1812 } 1813 } else if (thread->is_VM_thread()) { 1814 if (thread->claim_oops_do(true, _thread_parity)) { 1815 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1816 } 1817 } 1818 } 1819 }; 1820 1821 class G1CMRemarkTask : public AbstractGangTask { 1822 G1ConcurrentMark* _cm; 1823 public: 1824 void work(uint worker_id) { 1825 G1CMTask* task = _cm->task(worker_id); 1826 task->record_start_time(); 1827 { 1828 ResourceMark rm; 1829 HandleMark hm; 1830 1831 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1832 Threads::threads_do(&threads_f); 1833 } 1834 1835 do { 1836 task->do_marking_step(1000000000.0 /* something very large */, 1837 true /* do_termination */, 1838 false /* is_serial */); 1839 } while (task->has_aborted() && !_cm->has_overflown()); 1840 // If we overflow, then we do not want to restart. We instead 1841 // want to abort remark and do concurrent marking again. 1842 task->record_end_time(); 1843 } 1844 1845 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1846 AbstractGangTask("Par Remark"), _cm(cm) { 1847 _cm->terminator()->reset_for_reuse(active_workers); 1848 } 1849 }; 1850 1851 void G1ConcurrentMark::finalize_marking() { 1852 ResourceMark rm; 1853 HandleMark hm; 1854 1855 _g1h->ensure_parsability(false); 1856 1857 // this is remark, so we'll use up all active threads 1858 uint active_workers = _g1h->workers()->active_workers(); 1859 set_concurrency_and_phase(active_workers, false /* concurrent */); 1860 // Leave _parallel_marking_threads at it's 1861 // value originally calculated in the G1ConcurrentMark 1862 // constructor and pass values of the active workers 1863 // through the gang in the task. 1864 1865 { 1866 StrongRootsScope srs(active_workers); 1867 1868 G1CMRemarkTask remarkTask(this, active_workers); 1869 // We will start all available threads, even if we decide that the 1870 // active_workers will be fewer. The extra ones will just bail out 1871 // immediately. 1872 _g1h->workers()->run_task(&remarkTask); 1873 } 1874 1875 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1876 guarantee(has_overflown() || 1877 satb_mq_set.completed_buffers_num() == 0, 1878 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1879 BOOL_TO_STR(has_overflown()), 1880 satb_mq_set.completed_buffers_num()); 1881 1882 print_stats(); 1883 } 1884 1885 void G1ConcurrentMark::flush_all_task_caches() { 1886 size_t hits = 0; 1887 size_t misses = 0; 1888 for (uint i = 0; i < _max_num_tasks; i++) { 1889 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache(); 1890 hits += stats.first; 1891 misses += stats.second; 1892 } 1893 size_t sum = hits + misses; 1894 log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf", 1895 hits, misses, percent_of(hits, sum)); 1896 } 1897 1898 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) { 1899 _prev_mark_bitmap->clear_range(mr); 1900 } 1901 1902 HeapRegion* 1903 G1ConcurrentMark::claim_region(uint worker_id) { 1904 // "checkpoint" the finger 1905 HeapWord* finger = _finger; 1906 1907 while (finger < _heap.end()) { 1908 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1909 1910 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1911 // Make sure that the reads below do not float before loading curr_region. 1912 OrderAccess::loadload(); 1913 // Above heap_region_containing may return NULL as we always scan claim 1914 // until the end of the heap. In this case, just jump to the next region. 1915 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1916 1917 // Is the gap between reading the finger and doing the CAS too long? 1918 HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); 1919 if (res == finger && curr_region != NULL) { 1920 // we succeeded 1921 HeapWord* bottom = curr_region->bottom(); 1922 HeapWord* limit = curr_region->next_top_at_mark_start(); 1923 1924 // notice that _finger == end cannot be guaranteed here since, 1925 // someone else might have moved the finger even further 1926 assert(_finger >= end, "the finger should have moved forward"); 1927 1928 if (limit > bottom) { 1929 return curr_region; 1930 } else { 1931 assert(limit == bottom, 1932 "the region limit should be at bottom"); 1933 // we return NULL and the caller should try calling 1934 // claim_region() again. 1935 return NULL; 1936 } 1937 } else { 1938 assert(_finger > finger, "the finger should have moved forward"); 1939 // read it again 1940 finger = _finger; 1941 } 1942 } 1943 1944 return NULL; 1945 } 1946 1947 #ifndef PRODUCT 1948 class VerifyNoCSetOops { 1949 G1CollectedHeap* _g1h; 1950 const char* _phase; 1951 int _info; 1952 1953 public: 1954 VerifyNoCSetOops(const char* phase, int info = -1) : 1955 _g1h(G1CollectedHeap::heap()), 1956 _phase(phase), 1957 _info(info) 1958 { } 1959 1960 void operator()(G1TaskQueueEntry task_entry) const { 1961 if (task_entry.is_array_slice()) { 1962 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1963 return; 1964 } 1965 guarantee(oopDesc::is_oop(task_entry.obj()), 1966 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1967 p2i(task_entry.obj()), _phase, _info); 1968 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1969 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1970 p2i(task_entry.obj()), _phase, _info); 1971 } 1972 }; 1973 1974 void G1ConcurrentMark::verify_no_cset_oops() { 1975 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1976 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 1977 return; 1978 } 1979 1980 // Verify entries on the global mark stack 1981 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1982 1983 // Verify entries on the task queues 1984 for (uint i = 0; i < _max_num_tasks; ++i) { 1985 G1CMTaskQueue* queue = _task_queues->queue(i); 1986 queue->iterate(VerifyNoCSetOops("Queue", i)); 1987 } 1988 1989 // Verify the global finger 1990 HeapWord* global_finger = finger(); 1991 if (global_finger != NULL && global_finger < _heap.end()) { 1992 // Since we always iterate over all regions, we might get a NULL HeapRegion 1993 // here. 1994 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1995 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1996 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1997 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1998 } 1999 2000 // Verify the task fingers 2001 assert(_num_concurrent_workers <= _max_num_tasks, "sanity"); 2002 for (uint i = 0; i < _num_concurrent_workers; ++i) { 2003 G1CMTask* task = _tasks[i]; 2004 HeapWord* task_finger = task->finger(); 2005 if (task_finger != NULL && task_finger < _heap.end()) { 2006 // See above note on the global finger verification. 2007 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2008 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2009 !task_hr->in_collection_set(), 2010 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2011 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2012 } 2013 } 2014 } 2015 #endif // PRODUCT 2016 2017 void G1ConcurrentMark::rebuild_rem_set_concurrently() { 2018 _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset); 2019 } 2020 2021 void G1ConcurrentMark::print_stats() { 2022 if (!log_is_enabled(Debug, gc, stats)) { 2023 return; 2024 } 2025 log_debug(gc, stats)("---------------------------------------------------------------------"); 2026 for (size_t i = 0; i < _num_active_tasks; ++i) { 2027 _tasks[i]->print_stats(); 2028 log_debug(gc, stats)("---------------------------------------------------------------------"); 2029 } 2030 } 2031 2032 void G1ConcurrentMark::concurrent_cycle_abort() { 2033 if (!cm_thread()->during_cycle() || _has_aborted) { 2034 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2035 return; 2036 } 2037 2038 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2039 // concurrent bitmap clearing. 2040 { 2041 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2042 clear_bitmap(_next_mark_bitmap, _g1h->workers(), false); 2043 } 2044 // Note we cannot clear the previous marking bitmap here 2045 // since VerifyDuringGC verifies the objects marked during 2046 // a full GC against the previous bitmap. 2047 2048 // Empty mark stack 2049 reset_marking_for_restart(); 2050 for (uint i = 0; i < _max_num_tasks; ++i) { 2051 _tasks[i]->clear_region_fields(); 2052 } 2053 _first_overflow_barrier_sync.abort(); 2054 _second_overflow_barrier_sync.abort(); 2055 _has_aborted = true; 2056 2057 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2058 satb_mq_set.abandon_partial_marking(); 2059 // This can be called either during or outside marking, we'll read 2060 // the expected_active value from the SATB queue set. 2061 satb_mq_set.set_active_all_threads( 2062 false, /* new active value */ 2063 satb_mq_set.is_active() /* expected_active */); 2064 } 2065 2066 static void print_ms_time_info(const char* prefix, const char* name, 2067 NumberSeq& ns) { 2068 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2069 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2070 if (ns.num() > 0) { 2071 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2072 prefix, ns.sd(), ns.maximum()); 2073 } 2074 } 2075 2076 void G1ConcurrentMark::print_summary_info() { 2077 Log(gc, marking) log; 2078 if (!log.is_trace()) { 2079 return; 2080 } 2081 2082 log.trace(" Concurrent marking:"); 2083 print_ms_time_info(" ", "init marks", _init_times); 2084 print_ms_time_info(" ", "remarks", _remark_times); 2085 { 2086 print_ms_time_info(" ", "final marks", _remark_mark_times); 2087 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2088 2089 } 2090 print_ms_time_info(" ", "cleanups", _cleanup_times); 2091 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2092 _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2093 log.trace(" Total stop_world time = %8.2f s.", 2094 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2095 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2096 cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum()); 2097 } 2098 2099 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2100 _concurrent_workers->print_worker_threads_on(st); 2101 } 2102 2103 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2104 _concurrent_workers->threads_do(tc); 2105 } 2106 2107 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2108 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2109 p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap)); 2110 _prev_mark_bitmap->print_on_error(st, " Prev Bits: "); 2111 _next_mark_bitmap->print_on_error(st, " Next Bits: "); 2112 } 2113 2114 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2115 ReferenceProcessor* result = g1h->ref_processor_cm(); 2116 assert(result != NULL, "CM reference processor should not be NULL"); 2117 return result; 2118 } 2119 2120 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2121 G1CMTask* task) 2122 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2123 _g1h(g1h), _task(task) 2124 { } 2125 2126 void G1CMTask::setup_for_region(HeapRegion* hr) { 2127 assert(hr != NULL, 2128 "claim_region() should have filtered out NULL regions"); 2129 _curr_region = hr; 2130 _finger = hr->bottom(); 2131 update_region_limit(); 2132 } 2133 2134 void G1CMTask::update_region_limit() { 2135 HeapRegion* hr = _curr_region; 2136 HeapWord* bottom = hr->bottom(); 2137 HeapWord* limit = hr->next_top_at_mark_start(); 2138 2139 if (limit == bottom) { 2140 // The region was collected underneath our feet. 2141 // We set the finger to bottom to ensure that the bitmap 2142 // iteration that will follow this will not do anything. 2143 // (this is not a condition that holds when we set the region up, 2144 // as the region is not supposed to be empty in the first place) 2145 _finger = bottom; 2146 } else if (limit >= _region_limit) { 2147 assert(limit >= _finger, "peace of mind"); 2148 } else { 2149 assert(limit < _region_limit, "only way to get here"); 2150 // This can happen under some pretty unusual circumstances. An 2151 // evacuation pause empties the region underneath our feet (NTAMS 2152 // at bottom). We then do some allocation in the region (NTAMS 2153 // stays at bottom), followed by the region being used as a GC 2154 // alloc region (NTAMS will move to top() and the objects 2155 // originally below it will be grayed). All objects now marked in 2156 // the region are explicitly grayed, if below the global finger, 2157 // and we do not need in fact to scan anything else. So, we simply 2158 // set _finger to be limit to ensure that the bitmap iteration 2159 // doesn't do anything. 2160 _finger = limit; 2161 } 2162 2163 _region_limit = limit; 2164 } 2165 2166 void G1CMTask::giveup_current_region() { 2167 assert(_curr_region != NULL, "invariant"); 2168 clear_region_fields(); 2169 } 2170 2171 void G1CMTask::clear_region_fields() { 2172 // Values for these three fields that indicate that we're not 2173 // holding on to a region. 2174 _curr_region = NULL; 2175 _finger = NULL; 2176 _region_limit = NULL; 2177 } 2178 2179 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2180 if (cm_oop_closure == NULL) { 2181 assert(_cm_oop_closure != NULL, "invariant"); 2182 } else { 2183 assert(_cm_oop_closure == NULL, "invariant"); 2184 } 2185 _cm_oop_closure = cm_oop_closure; 2186 } 2187 2188 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { 2189 guarantee(next_mark_bitmap != NULL, "invariant"); 2190 _next_mark_bitmap = next_mark_bitmap; 2191 clear_region_fields(); 2192 2193 _calls = 0; 2194 _elapsed_time_ms = 0.0; 2195 _termination_time_ms = 0.0; 2196 _termination_start_time_ms = 0.0; 2197 2198 _mark_stats_cache.reset(); 2199 } 2200 2201 bool G1CMTask::should_exit_termination() { 2202 regular_clock_call(); 2203 // This is called when we are in the termination protocol. We should 2204 // quit if, for some reason, this task wants to abort or the global 2205 // stack is not empty (this means that we can get work from it). 2206 return !_cm->mark_stack_empty() || has_aborted(); 2207 } 2208 2209 void G1CMTask::reached_limit() { 2210 assert(_words_scanned >= _words_scanned_limit || 2211 _refs_reached >= _refs_reached_limit , 2212 "shouldn't have been called otherwise"); 2213 regular_clock_call(); 2214 } 2215 2216 void G1CMTask::regular_clock_call() { 2217 if (has_aborted()) { 2218 return; 2219 } 2220 2221 // First, we need to recalculate the words scanned and refs reached 2222 // limits for the next clock call. 2223 recalculate_limits(); 2224 2225 // During the regular clock call we do the following 2226 2227 // (1) If an overflow has been flagged, then we abort. 2228 if (_cm->has_overflown()) { 2229 set_has_aborted(); 2230 return; 2231 } 2232 2233 // If we are not concurrent (i.e. we're doing remark) we don't need 2234 // to check anything else. The other steps are only needed during 2235 // the concurrent marking phase. 2236 if (!_cm->concurrent()) { 2237 return; 2238 } 2239 2240 // (2) If marking has been aborted for Full GC, then we also abort. 2241 if (_cm->has_aborted()) { 2242 set_has_aborted(); 2243 return; 2244 } 2245 2246 double curr_time_ms = os::elapsedVTime() * 1000.0; 2247 2248 // (4) We check whether we should yield. If we have to, then we abort. 2249 if (SuspendibleThreadSet::should_yield()) { 2250 // We should yield. To do this we abort the task. The caller is 2251 // responsible for yielding. 2252 set_has_aborted(); 2253 return; 2254 } 2255 2256 // (5) We check whether we've reached our time quota. If we have, 2257 // then we abort. 2258 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2259 if (elapsed_time_ms > _time_target_ms) { 2260 set_has_aborted(); 2261 _has_timed_out = true; 2262 return; 2263 } 2264 2265 // (6) Finally, we check whether there are enough completed STAB 2266 // buffers available for processing. If there are, we abort. 2267 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2268 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2269 // we do need to process SATB buffers, we'll abort and restart 2270 // the marking task to do so 2271 set_has_aborted(); 2272 return; 2273 } 2274 } 2275 2276 void G1CMTask::recalculate_limits() { 2277 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2278 _words_scanned_limit = _real_words_scanned_limit; 2279 2280 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2281 _refs_reached_limit = _real_refs_reached_limit; 2282 } 2283 2284 void G1CMTask::decrease_limits() { 2285 // This is called when we believe that we're going to do an infrequent 2286 // operation which will increase the per byte scanned cost (i.e. move 2287 // entries to/from the global stack). It basically tries to decrease the 2288 // scanning limit so that the clock is called earlier. 2289 2290 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4; 2291 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4; 2292 } 2293 2294 void G1CMTask::move_entries_to_global_stack() { 2295 // Local array where we'll store the entries that will be popped 2296 // from the local queue. 2297 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2298 2299 size_t n = 0; 2300 G1TaskQueueEntry task_entry; 2301 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2302 buffer[n] = task_entry; 2303 ++n; 2304 } 2305 if (n < G1CMMarkStack::EntriesPerChunk) { 2306 buffer[n] = G1TaskQueueEntry(); 2307 } 2308 2309 if (n > 0) { 2310 if (!_cm->mark_stack_push(buffer)) { 2311 set_has_aborted(); 2312 } 2313 } 2314 2315 // This operation was quite expensive, so decrease the limits. 2316 decrease_limits(); 2317 } 2318 2319 bool G1CMTask::get_entries_from_global_stack() { 2320 // Local array where we'll store the entries that will be popped 2321 // from the global stack. 2322 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2323 2324 if (!_cm->mark_stack_pop(buffer)) { 2325 return false; 2326 } 2327 2328 // We did actually pop at least one entry. 2329 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2330 G1TaskQueueEntry task_entry = buffer[i]; 2331 if (task_entry.is_null()) { 2332 break; 2333 } 2334 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2335 bool success = _task_queue->push(task_entry); 2336 // We only call this when the local queue is empty or under a 2337 // given target limit. So, we do not expect this push to fail. 2338 assert(success, "invariant"); 2339 } 2340 2341 // This operation was quite expensive, so decrease the limits 2342 decrease_limits(); 2343 return true; 2344 } 2345 2346 void G1CMTask::drain_local_queue(bool partially) { 2347 if (has_aborted()) { 2348 return; 2349 } 2350 2351 // Decide what the target size is, depending whether we're going to 2352 // drain it partially (so that other tasks can steal if they run out 2353 // of things to do) or totally (at the very end). 2354 size_t target_size; 2355 if (partially) { 2356 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2357 } else { 2358 target_size = 0; 2359 } 2360 2361 if (_task_queue->size() > target_size) { 2362 G1TaskQueueEntry entry; 2363 bool ret = _task_queue->pop_local(entry); 2364 while (ret) { 2365 scan_task_entry(entry); 2366 if (_task_queue->size() <= target_size || has_aborted()) { 2367 ret = false; 2368 } else { 2369 ret = _task_queue->pop_local(entry); 2370 } 2371 } 2372 } 2373 } 2374 2375 void G1CMTask::drain_global_stack(bool partially) { 2376 if (has_aborted()) { 2377 return; 2378 } 2379 2380 // We have a policy to drain the local queue before we attempt to 2381 // drain the global stack. 2382 assert(partially || _task_queue->size() == 0, "invariant"); 2383 2384 // Decide what the target size is, depending whether we're going to 2385 // drain it partially (so that other tasks can steal if they run out 2386 // of things to do) or totally (at the very end). 2387 // Notice that when draining the global mark stack partially, due to the racyness 2388 // of the mark stack size update we might in fact drop below the target. But, 2389 // this is not a problem. 2390 // In case of total draining, we simply process until the global mark stack is 2391 // totally empty, disregarding the size counter. 2392 if (partially) { 2393 size_t const target_size = _cm->partial_mark_stack_size_target(); 2394 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2395 if (get_entries_from_global_stack()) { 2396 drain_local_queue(partially); 2397 } 2398 } 2399 } else { 2400 while (!has_aborted() && get_entries_from_global_stack()) { 2401 drain_local_queue(partially); 2402 } 2403 } 2404 } 2405 2406 // SATB Queue has several assumptions on whether to call the par or 2407 // non-par versions of the methods. this is why some of the code is 2408 // replicated. We should really get rid of the single-threaded version 2409 // of the code to simplify things. 2410 void G1CMTask::drain_satb_buffers() { 2411 if (has_aborted()) { 2412 return; 2413 } 2414 2415 // We set this so that the regular clock knows that we're in the 2416 // middle of draining buffers and doesn't set the abort flag when it 2417 // notices that SATB buffers are available for draining. It'd be 2418 // very counter productive if it did that. :-) 2419 _draining_satb_buffers = true; 2420 2421 G1CMSATBBufferClosure satb_cl(this, _g1h); 2422 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2423 2424 // This keeps claiming and applying the closure to completed buffers 2425 // until we run out of buffers or we need to abort. 2426 while (!has_aborted() && 2427 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2428 regular_clock_call(); 2429 } 2430 2431 _draining_satb_buffers = false; 2432 2433 assert(has_aborted() || 2434 _cm->concurrent() || 2435 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2436 2437 // again, this was a potentially expensive operation, decrease the 2438 // limits to get the regular clock call early 2439 decrease_limits(); 2440 } 2441 2442 void G1CMTask::clear_mark_stats_cache(uint region_idx) { 2443 _mark_stats_cache.reset(region_idx); 2444 } 2445 2446 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() { 2447 return _mark_stats_cache.evict_all(); 2448 } 2449 2450 void G1CMTask::print_stats() { 2451 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls); 2452 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2453 _elapsed_time_ms, _termination_time_ms); 2454 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms", 2455 _step_times_ms.num(), 2456 _step_times_ms.avg(), 2457 _step_times_ms.sd(), 2458 _step_times_ms.maximum(), 2459 _step_times_ms.sum()); 2460 size_t const hits = _mark_stats_cache.hits(); 2461 size_t const misses = _mark_stats_cache.misses(); 2462 log_debug(gc, stats)(" Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f", 2463 hits, misses, percent_of(hits, hits + misses)); 2464 } 2465 2466 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2467 return _task_queues->steal(worker_id, hash_seed, task_entry); 2468 } 2469 2470 /***************************************************************************** 2471 2472 The do_marking_step(time_target_ms, ...) method is the building 2473 block of the parallel marking framework. It can be called in parallel 2474 with other invocations of do_marking_step() on different tasks 2475 (but only one per task, obviously) and concurrently with the 2476 mutator threads, or during remark, hence it eliminates the need 2477 for two versions of the code. When called during remark, it will 2478 pick up from where the task left off during the concurrent marking 2479 phase. Interestingly, tasks are also claimable during evacuation 2480 pauses too, since do_marking_step() ensures that it aborts before 2481 it needs to yield. 2482 2483 The data structures that it uses to do marking work are the 2484 following: 2485 2486 (1) Marking Bitmap. If there are gray objects that appear only 2487 on the bitmap (this happens either when dealing with an overflow 2488 or when the initial marking phase has simply marked the roots 2489 and didn't push them on the stack), then tasks claim heap 2490 regions whose bitmap they then scan to find gray objects. A 2491 global finger indicates where the end of the last claimed region 2492 is. A local finger indicates how far into the region a task has 2493 scanned. The two fingers are used to determine how to gray an 2494 object (i.e. whether simply marking it is OK, as it will be 2495 visited by a task in the future, or whether it needs to be also 2496 pushed on a stack). 2497 2498 (2) Local Queue. The local queue of the task which is accessed 2499 reasonably efficiently by the task. Other tasks can steal from 2500 it when they run out of work. Throughout the marking phase, a 2501 task attempts to keep its local queue short but not totally 2502 empty, so that entries are available for stealing by other 2503 tasks. Only when there is no more work, a task will totally 2504 drain its local queue. 2505 2506 (3) Global Mark Stack. This handles local queue overflow. During 2507 marking only sets of entries are moved between it and the local 2508 queues, as access to it requires a mutex and more fine-grain 2509 interaction with it which might cause contention. If it 2510 overflows, then the marking phase should restart and iterate 2511 over the bitmap to identify gray objects. Throughout the marking 2512 phase, tasks attempt to keep the global mark stack at a small 2513 length but not totally empty, so that entries are available for 2514 popping by other tasks. Only when there is no more work, tasks 2515 will totally drain the global mark stack. 2516 2517 (4) SATB Buffer Queue. This is where completed SATB buffers are 2518 made available. Buffers are regularly removed from this queue 2519 and scanned for roots, so that the queue doesn't get too 2520 long. During remark, all completed buffers are processed, as 2521 well as the filled in parts of any uncompleted buffers. 2522 2523 The do_marking_step() method tries to abort when the time target 2524 has been reached. There are a few other cases when the 2525 do_marking_step() method also aborts: 2526 2527 (1) When the marking phase has been aborted (after a Full GC). 2528 2529 (2) When a global overflow (on the global stack) has been 2530 triggered. Before the task aborts, it will actually sync up with 2531 the other tasks to ensure that all the marking data structures 2532 (local queues, stacks, fingers etc.) are re-initialized so that 2533 when do_marking_step() completes, the marking phase can 2534 immediately restart. 2535 2536 (3) When enough completed SATB buffers are available. The 2537 do_marking_step() method only tries to drain SATB buffers right 2538 at the beginning. So, if enough buffers are available, the 2539 marking step aborts and the SATB buffers are processed at 2540 the beginning of the next invocation. 2541 2542 (4) To yield. when we have to yield then we abort and yield 2543 right at the end of do_marking_step(). This saves us from a lot 2544 of hassle as, by yielding we might allow a Full GC. If this 2545 happens then objects will be compacted underneath our feet, the 2546 heap might shrink, etc. We save checking for this by just 2547 aborting and doing the yield right at the end. 2548 2549 From the above it follows that the do_marking_step() method should 2550 be called in a loop (or, otherwise, regularly) until it completes. 2551 2552 If a marking step completes without its has_aborted() flag being 2553 true, it means it has completed the current marking phase (and 2554 also all other marking tasks have done so and have all synced up). 2555 2556 A method called regular_clock_call() is invoked "regularly" (in 2557 sub ms intervals) throughout marking. It is this clock method that 2558 checks all the abort conditions which were mentioned above and 2559 decides when the task should abort. A work-based scheme is used to 2560 trigger this clock method: when the number of object words the 2561 marking phase has scanned or the number of references the marking 2562 phase has visited reach a given limit. Additional invocations to 2563 the method clock have been planted in a few other strategic places 2564 too. The initial reason for the clock method was to avoid calling 2565 vtime too regularly, as it is quite expensive. So, once it was in 2566 place, it was natural to piggy-back all the other conditions on it 2567 too and not constantly check them throughout the code. 2568 2569 If do_termination is true then do_marking_step will enter its 2570 termination protocol. 2571 2572 The value of is_serial must be true when do_marking_step is being 2573 called serially (i.e. by the VMThread) and do_marking_step should 2574 skip any synchronization in the termination and overflow code. 2575 Examples include the serial remark code and the serial reference 2576 processing closures. 2577 2578 The value of is_serial must be false when do_marking_step is 2579 being called by any of the worker threads in a work gang. 2580 Examples include the concurrent marking code (CMMarkingTask), 2581 the MT remark code, and the MT reference processing closures. 2582 2583 *****************************************************************************/ 2584 2585 void G1CMTask::do_marking_step(double time_target_ms, 2586 bool do_termination, 2587 bool is_serial) { 2588 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2589 2590 _start_time_ms = os::elapsedVTime() * 1000.0; 2591 2592 // If do_stealing is true then do_marking_step will attempt to 2593 // steal work from the other G1CMTasks. It only makes sense to 2594 // enable stealing when the termination protocol is enabled 2595 // and do_marking_step() is not being called serially. 2596 bool do_stealing = do_termination && !is_serial; 2597 2598 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2599 _time_target_ms = time_target_ms - diff_prediction_ms; 2600 2601 // set up the variables that are used in the work-based scheme to 2602 // call the regular clock method 2603 _words_scanned = 0; 2604 _refs_reached = 0; 2605 recalculate_limits(); 2606 2607 // clear all flags 2608 clear_has_aborted(); 2609 _has_timed_out = false; 2610 _draining_satb_buffers = false; 2611 2612 ++_calls; 2613 2614 // Set up the bitmap and oop closures. Anything that uses them is 2615 // eventually called from this method, so it is OK to allocate these 2616 // statically. 2617 G1CMBitMapClosure bitmap_closure(this, _cm); 2618 G1CMOopClosure cm_oop_closure(_g1h, this); 2619 set_cm_oop_closure(&cm_oop_closure); 2620 2621 if (_cm->has_overflown()) { 2622 // This can happen if the mark stack overflows during a GC pause 2623 // and this task, after a yield point, restarts. We have to abort 2624 // as we need to get into the overflow protocol which happens 2625 // right at the end of this task. 2626 set_has_aborted(); 2627 } 2628 2629 // First drain any available SATB buffers. After this, we will not 2630 // look at SATB buffers before the next invocation of this method. 2631 // If enough completed SATB buffers are queued up, the regular clock 2632 // will abort this task so that it restarts. 2633 drain_satb_buffers(); 2634 // ...then partially drain the local queue and the global stack 2635 drain_local_queue(true); 2636 drain_global_stack(true); 2637 2638 do { 2639 if (!has_aborted() && _curr_region != NULL) { 2640 // This means that we're already holding on to a region. 2641 assert(_finger != NULL, "if region is not NULL, then the finger " 2642 "should not be NULL either"); 2643 2644 // We might have restarted this task after an evacuation pause 2645 // which might have evacuated the region we're holding on to 2646 // underneath our feet. Let's read its limit again to make sure 2647 // that we do not iterate over a region of the heap that 2648 // contains garbage (update_region_limit() will also move 2649 // _finger to the start of the region if it is found empty). 2650 update_region_limit(); 2651 // We will start from _finger not from the start of the region, 2652 // as we might be restarting this task after aborting half-way 2653 // through scanning this region. In this case, _finger points to 2654 // the address where we last found a marked object. If this is a 2655 // fresh region, _finger points to start(). 2656 MemRegion mr = MemRegion(_finger, _region_limit); 2657 2658 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2659 "humongous regions should go around loop once only"); 2660 2661 // Some special cases: 2662 // If the memory region is empty, we can just give up the region. 2663 // If the current region is humongous then we only need to check 2664 // the bitmap for the bit associated with the start of the object, 2665 // scan the object if it's live, and give up the region. 2666 // Otherwise, let's iterate over the bitmap of the part of the region 2667 // that is left. 2668 // If the iteration is successful, give up the region. 2669 if (mr.is_empty()) { 2670 giveup_current_region(); 2671 regular_clock_call(); 2672 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2673 if (_next_mark_bitmap->is_marked(mr.start())) { 2674 // The object is marked - apply the closure 2675 bitmap_closure.do_addr(mr.start()); 2676 } 2677 // Even if this task aborted while scanning the humongous object 2678 // we can (and should) give up the current region. 2679 giveup_current_region(); 2680 regular_clock_call(); 2681 } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) { 2682 giveup_current_region(); 2683 regular_clock_call(); 2684 } else { 2685 assert(has_aborted(), "currently the only way to do so"); 2686 // The only way to abort the bitmap iteration is to return 2687 // false from the do_bit() method. However, inside the 2688 // do_bit() method we move the _finger to point to the 2689 // object currently being looked at. So, if we bail out, we 2690 // have definitely set _finger to something non-null. 2691 assert(_finger != NULL, "invariant"); 2692 2693 // Region iteration was actually aborted. So now _finger 2694 // points to the address of the object we last scanned. If we 2695 // leave it there, when we restart this task, we will rescan 2696 // the object. It is easy to avoid this. We move the finger by 2697 // enough to point to the next possible object header. 2698 assert(_finger < _region_limit, "invariant"); 2699 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2700 // Check if bitmap iteration was aborted while scanning the last object 2701 if (new_finger >= _region_limit) { 2702 giveup_current_region(); 2703 } else { 2704 move_finger_to(new_finger); 2705 } 2706 } 2707 } 2708 // At this point we have either completed iterating over the 2709 // region we were holding on to, or we have aborted. 2710 2711 // We then partially drain the local queue and the global stack. 2712 // (Do we really need this?) 2713 drain_local_queue(true); 2714 drain_global_stack(true); 2715 2716 // Read the note on the claim_region() method on why it might 2717 // return NULL with potentially more regions available for 2718 // claiming and why we have to check out_of_regions() to determine 2719 // whether we're done or not. 2720 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2721 // We are going to try to claim a new region. We should have 2722 // given up on the previous one. 2723 // Separated the asserts so that we know which one fires. 2724 assert(_curr_region == NULL, "invariant"); 2725 assert(_finger == NULL, "invariant"); 2726 assert(_region_limit == NULL, "invariant"); 2727 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2728 if (claimed_region != NULL) { 2729 // Yes, we managed to claim one 2730 setup_for_region(claimed_region); 2731 assert(_curr_region == claimed_region, "invariant"); 2732 } 2733 // It is important to call the regular clock here. It might take 2734 // a while to claim a region if, for example, we hit a large 2735 // block of empty regions. So we need to call the regular clock 2736 // method once round the loop to make sure it's called 2737 // frequently enough. 2738 regular_clock_call(); 2739 } 2740 2741 if (!has_aborted() && _curr_region == NULL) { 2742 assert(_cm->out_of_regions(), 2743 "at this point we should be out of regions"); 2744 } 2745 } while ( _curr_region != NULL && !has_aborted()); 2746 2747 if (!has_aborted()) { 2748 // We cannot check whether the global stack is empty, since other 2749 // tasks might be pushing objects to it concurrently. 2750 assert(_cm->out_of_regions(), 2751 "at this point we should be out of regions"); 2752 // Try to reduce the number of available SATB buffers so that 2753 // remark has less work to do. 2754 drain_satb_buffers(); 2755 } 2756 2757 // Since we've done everything else, we can now totally drain the 2758 // local queue and global stack. 2759 drain_local_queue(false); 2760 drain_global_stack(false); 2761 2762 // Attempt at work stealing from other task's queues. 2763 if (do_stealing && !has_aborted()) { 2764 // We have not aborted. This means that we have finished all that 2765 // we could. Let's try to do some stealing... 2766 2767 // We cannot check whether the global stack is empty, since other 2768 // tasks might be pushing objects to it concurrently. 2769 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2770 "only way to reach here"); 2771 while (!has_aborted()) { 2772 G1TaskQueueEntry entry; 2773 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2774 scan_task_entry(entry); 2775 2776 // And since we're towards the end, let's totally drain the 2777 // local queue and global stack. 2778 drain_local_queue(false); 2779 drain_global_stack(false); 2780 } else { 2781 break; 2782 } 2783 } 2784 } 2785 2786 // We still haven't aborted. Now, let's try to get into the 2787 // termination protocol. 2788 if (do_termination && !has_aborted()) { 2789 // We cannot check whether the global stack is empty, since other 2790 // tasks might be concurrently pushing objects on it. 2791 // Separated the asserts so that we know which one fires. 2792 assert(_cm->out_of_regions(), "only way to reach here"); 2793 assert(_task_queue->size() == 0, "only way to reach here"); 2794 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2795 2796 // The G1CMTask class also extends the TerminatorTerminator class, 2797 // hence its should_exit_termination() method will also decide 2798 // whether to exit the termination protocol or not. 2799 bool finished = (is_serial || 2800 _cm->terminator()->offer_termination(this)); 2801 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2802 _termination_time_ms += 2803 termination_end_time_ms - _termination_start_time_ms; 2804 2805 if (finished) { 2806 // We're all done. 2807 2808 // We can now guarantee that the global stack is empty, since 2809 // all other tasks have finished. We separated the guarantees so 2810 // that, if a condition is false, we can immediately find out 2811 // which one. 2812 guarantee(_cm->out_of_regions(), "only way to reach here"); 2813 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2814 guarantee(_task_queue->size() == 0, "only way to reach here"); 2815 guarantee(!_cm->has_overflown(), "only way to reach here"); 2816 } else { 2817 // Apparently there's more work to do. Let's abort this task. It 2818 // will restart it and we can hopefully find more things to do. 2819 set_has_aborted(); 2820 } 2821 } 2822 2823 // Mainly for debugging purposes to make sure that a pointer to the 2824 // closure which was statically allocated in this frame doesn't 2825 // escape it by accident. 2826 set_cm_oop_closure(NULL); 2827 double end_time_ms = os::elapsedVTime() * 1000.0; 2828 double elapsed_time_ms = end_time_ms - _start_time_ms; 2829 // Update the step history. 2830 _step_times_ms.add(elapsed_time_ms); 2831 2832 if (has_aborted()) { 2833 // The task was aborted for some reason. 2834 if (_has_timed_out) { 2835 double diff_ms = elapsed_time_ms - _time_target_ms; 2836 // Keep statistics of how well we did with respect to hitting 2837 // our target only if we actually timed out (if we aborted for 2838 // other reasons, then the results might get skewed). 2839 _marking_step_diffs_ms.add(diff_ms); 2840 } 2841 2842 if (_cm->has_overflown()) { 2843 // This is the interesting one. We aborted because a global 2844 // overflow was raised. This means we have to restart the 2845 // marking phase and start iterating over regions. However, in 2846 // order to do this we have to make sure that all tasks stop 2847 // what they are doing and re-initialize in a safe manner. We 2848 // will achieve this with the use of two barrier sync points. 2849 2850 if (!is_serial) { 2851 // We only need to enter the sync barrier if being called 2852 // from a parallel context 2853 _cm->enter_first_sync_barrier(_worker_id); 2854 2855 // When we exit this sync barrier we know that all tasks have 2856 // stopped doing marking work. So, it's now safe to 2857 // re-initialize our data structures. 2858 } 2859 2860 clear_region_fields(); 2861 flush_mark_stats_cache(); 2862 2863 if (!is_serial) { 2864 // If we're executing the concurrent phase of marking, reset the marking 2865 // state; otherwise the marking state is reset after reference processing, 2866 // during the remark pause. 2867 // If we reset here as a result of an overflow during the remark we will 2868 // see assertion failures from any subsequent set_concurrency_and_phase() 2869 // calls. 2870 if (_cm->concurrent() && _worker_id == 0) { 2871 // Worker 0 is responsible for clearing the global data structures because 2872 // of an overflow. During STW we should not clear the overflow flag (in 2873 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit 2874 // method to abort the pause and restart concurrent marking. 2875 _cm->reset_marking_for_restart(); 2876 2877 log_info(gc, marking)("Concurrent Mark reset for overflow"); 2878 } 2879 2880 // ...and enter the second barrier. 2881 _cm->enter_second_sync_barrier(_worker_id); 2882 } 2883 // At this point, if we're during the concurrent phase of 2884 // marking, everything has been re-initialized and we're 2885 // ready to restart. 2886 } 2887 } 2888 } 2889 2890 G1CMTask::G1CMTask(uint worker_id, 2891 G1ConcurrentMark* cm, 2892 G1CMTaskQueue* task_queue, 2893 G1RegionMarkStats* mark_stats, 2894 uint max_regions) : 2895 _objArray_processor(this), 2896 _worker_id(worker_id), 2897 _g1h(G1CollectedHeap::heap()), 2898 _cm(cm), 2899 _next_mark_bitmap(NULL), 2900 _task_queue(task_queue), 2901 _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize), 2902 _calls(0), 2903 _time_target_ms(0.0), 2904 _start_time_ms(0.0), 2905 _cm_oop_closure(NULL), 2906 _curr_region(NULL), 2907 _finger(NULL), 2908 _region_limit(NULL), 2909 _words_scanned(0), 2910 _words_scanned_limit(0), 2911 _real_words_scanned_limit(0), 2912 _refs_reached(0), 2913 _refs_reached_limit(0), 2914 _real_refs_reached_limit(0), 2915 _hash_seed(17), 2916 _has_aborted(false), 2917 _has_timed_out(false), 2918 _draining_satb_buffers(false), 2919 _step_times_ms(), 2920 _elapsed_time_ms(0.0), 2921 _termination_time_ms(0.0), 2922 _termination_start_time_ms(0.0), 2923 _marking_step_diffs_ms() 2924 { 2925 guarantee(task_queue != NULL, "invariant"); 2926 2927 _marking_step_diffs_ms.add(0.5); 2928 } 2929 2930 // These are formatting macros that are used below to ensure 2931 // consistent formatting. The *_H_* versions are used to format the 2932 // header for a particular value and they should be kept consistent 2933 // with the corresponding macro. Also note that most of the macros add 2934 // the necessary white space (as a prefix) which makes them a bit 2935 // easier to compose. 2936 2937 // All the output lines are prefixed with this string to be able to 2938 // identify them easily in a large log file. 2939 #define G1PPRL_LINE_PREFIX "###" 2940 2941 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2942 #ifdef _LP64 2943 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2944 #else // _LP64 2945 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2946 #endif // _LP64 2947 2948 // For per-region info 2949 #define G1PPRL_TYPE_FORMAT " %-4s" 2950 #define G1PPRL_TYPE_H_FORMAT " %4s" 2951 #define G1PPRL_STATE_FORMAT " %-5s" 2952 #define G1PPRL_STATE_H_FORMAT " %5s" 2953 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2954 #define G1PPRL_BYTE_H_FORMAT " %9s" 2955 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2956 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2957 2958 // For summary info 2959 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2960 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2961 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2962 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2963 2964 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) : 2965 _total_used_bytes(0), _total_capacity_bytes(0), 2966 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2967 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) 2968 { 2969 if (!log_is_enabled(Trace, gc, liveness)) { 2970 return; 2971 } 2972 2973 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2974 MemRegion g1_reserved = g1h->g1_reserved(); 2975 double now = os::elapsedTime(); 2976 2977 // Print the header of the output. 2978 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2979 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2980 G1PPRL_SUM_ADDR_FORMAT("reserved") 2981 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2982 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2983 HeapRegion::GrainBytes); 2984 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2985 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2986 G1PPRL_TYPE_H_FORMAT 2987 G1PPRL_ADDR_BASE_H_FORMAT 2988 G1PPRL_BYTE_H_FORMAT 2989 G1PPRL_BYTE_H_FORMAT 2990 G1PPRL_BYTE_H_FORMAT 2991 G1PPRL_DOUBLE_H_FORMAT 2992 G1PPRL_BYTE_H_FORMAT 2993 G1PPRL_STATE_H_FORMAT 2994 G1PPRL_BYTE_H_FORMAT, 2995 "type", "address-range", 2996 "used", "prev-live", "next-live", "gc-eff", 2997 "remset", "state", "code-roots"); 2998 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2999 G1PPRL_TYPE_H_FORMAT 3000 G1PPRL_ADDR_BASE_H_FORMAT 3001 G1PPRL_BYTE_H_FORMAT 3002 G1PPRL_BYTE_H_FORMAT 3003 G1PPRL_BYTE_H_FORMAT 3004 G1PPRL_DOUBLE_H_FORMAT 3005 G1PPRL_BYTE_H_FORMAT 3006 G1PPRL_STATE_H_FORMAT 3007 G1PPRL_BYTE_H_FORMAT, 3008 "", "", 3009 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3010 "(bytes)", "", "(bytes)"); 3011 } 3012 3013 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) { 3014 if (!log_is_enabled(Trace, gc, liveness)) { 3015 return false; 3016 } 3017 3018 const char* type = r->get_type_str(); 3019 HeapWord* bottom = r->bottom(); 3020 HeapWord* end = r->end(); 3021 size_t capacity_bytes = r->capacity(); 3022 size_t used_bytes = r->used(); 3023 size_t prev_live_bytes = r->live_bytes(); 3024 size_t next_live_bytes = r->next_live_bytes(); 3025 double gc_eff = r->gc_efficiency(); 3026 size_t remset_bytes = r->rem_set()->mem_size(); 3027 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3028 const char* remset_type = r->rem_set()->get_short_state_str(); 3029 3030 _total_used_bytes += used_bytes; 3031 _total_capacity_bytes += capacity_bytes; 3032 _total_prev_live_bytes += prev_live_bytes; 3033 _total_next_live_bytes += next_live_bytes; 3034 _total_remset_bytes += remset_bytes; 3035 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3036 3037 // Print a line for this particular region. 3038 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3039 G1PPRL_TYPE_FORMAT 3040 G1PPRL_ADDR_BASE_FORMAT 3041 G1PPRL_BYTE_FORMAT 3042 G1PPRL_BYTE_FORMAT 3043 G1PPRL_BYTE_FORMAT 3044 G1PPRL_DOUBLE_FORMAT 3045 G1PPRL_BYTE_FORMAT 3046 G1PPRL_STATE_FORMAT 3047 G1PPRL_BYTE_FORMAT, 3048 type, p2i(bottom), p2i(end), 3049 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3050 remset_bytes, remset_type, strong_code_roots_bytes); 3051 3052 return false; 3053 } 3054 3055 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3056 if (!log_is_enabled(Trace, gc, liveness)) { 3057 return; 3058 } 3059 3060 // add static memory usages to remembered set sizes 3061 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3062 // Print the footer of the output. 3063 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3064 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3065 " SUMMARY" 3066 G1PPRL_SUM_MB_FORMAT("capacity") 3067 G1PPRL_SUM_MB_PERC_FORMAT("used") 3068 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3069 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3070 G1PPRL_SUM_MB_FORMAT("remset") 3071 G1PPRL_SUM_MB_FORMAT("code-roots"), 3072 bytes_to_mb(_total_capacity_bytes), 3073 bytes_to_mb(_total_used_bytes), 3074 percent_of(_total_used_bytes, _total_capacity_bytes), 3075 bytes_to_mb(_total_prev_live_bytes), 3076 percent_of(_total_prev_live_bytes, _total_capacity_bytes), 3077 bytes_to_mb(_total_next_live_bytes), 3078 percent_of(_total_next_live_bytes, _total_capacity_bytes), 3079 bytes_to_mb(_total_remset_bytes), 3080 bytes_to_mb(_total_strong_code_roots_bytes)); 3081 }