1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 #include "utilities/align.hpp" 61 #include "utilities/growableArray.hpp" 62 63 void G1CMBitMap::print_on_error(outputStream* st, const char* prefix) const { 64 _bm.print_on_error(st, prefix); 65 } 66 67 size_t G1CMBitMap::compute_size(size_t heap_size) { 68 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 69 } 70 71 size_t G1CMBitMap::mark_distance() { 72 return MinObjAlignmentInBytes * BitsPerByte; 73 } 74 75 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 76 _covered = heap; 77 78 _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _covered.word_size() >> _shifter); 79 80 storage->set_mapping_changed_listener(&_listener); 81 } 82 83 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 84 if (zero_filled) { 85 return; 86 } 87 // We need to clear the bitmap on commit, removing any existing information. 88 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 89 _bm->clear_range(mr); 90 } 91 92 void G1CMBitMap::clear_range(MemRegion mr) { 93 MemRegion intersection = mr.intersection(_covered); 94 assert(!intersection.is_empty(), 95 "Given range from " PTR_FORMAT " to " PTR_FORMAT " is completely outside the heap", 96 p2i(mr.start()), p2i(mr.end())); 97 // convert address range into offset range 98 _bm.at_put_range(addr_to_offset(intersection.start()), 99 addr_to_offset(intersection.end()), false); 100 } 101 102 G1CMMarkStack::G1CMMarkStack() : 103 _max_chunk_capacity(0), 104 _base(NULL), 105 _chunk_capacity(0) { 106 set_empty(); 107 } 108 109 bool G1CMMarkStack::resize(size_t new_capacity) { 110 assert(is_empty(), "Only resize when stack is empty."); 111 assert(new_capacity <= _max_chunk_capacity, 112 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 113 114 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity); 115 116 if (new_base == NULL) { 117 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 118 return false; 119 } 120 // Release old mapping. 121 if (_base != NULL) { 122 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 123 } 124 125 _base = new_base; 126 _chunk_capacity = new_capacity; 127 set_empty(); 128 129 return true; 130 } 131 132 size_t G1CMMarkStack::capacity_alignment() { 133 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 134 } 135 136 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 137 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 138 139 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 140 141 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 142 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 143 144 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 145 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 146 _max_chunk_capacity, 147 initial_chunk_capacity); 148 149 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 150 initial_chunk_capacity, _max_chunk_capacity); 151 152 return resize(initial_chunk_capacity); 153 } 154 155 void G1CMMarkStack::expand() { 156 if (_chunk_capacity == _max_chunk_capacity) { 157 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 158 return; 159 } 160 size_t old_capacity = _chunk_capacity; 161 // Double capacity if possible 162 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 163 164 if (resize(new_capacity)) { 165 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 166 old_capacity, new_capacity); 167 } else { 168 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 169 old_capacity, new_capacity); 170 } 171 } 172 173 G1CMMarkStack::~G1CMMarkStack() { 174 if (_base != NULL) { 175 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 176 } 177 } 178 179 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 180 elem->next = *list; 181 *list = elem; 182 } 183 184 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 185 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 186 add_chunk_to_list(&_chunk_list, elem); 187 _chunks_in_chunk_list++; 188 } 189 190 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 191 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 192 add_chunk_to_list(&_free_list, elem); 193 } 194 195 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 196 TaskQueueEntryChunk* result = *list; 197 if (result != NULL) { 198 *list = (*list)->next; 199 } 200 return result; 201 } 202 203 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 204 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 205 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 206 if (result != NULL) { 207 _chunks_in_chunk_list--; 208 } 209 return result; 210 } 211 212 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 213 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 214 return remove_chunk_from_list(&_free_list); 215 } 216 217 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 218 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 219 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 220 // wraparound of _hwm. 221 if (_hwm >= _chunk_capacity) { 222 return NULL; 223 } 224 225 size_t cur_idx = Atomic::add(1, &_hwm) - 1; 226 if (cur_idx >= _chunk_capacity) { 227 return NULL; 228 } 229 230 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 231 result->next = NULL; 232 return result; 233 } 234 235 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 236 // Get a new chunk. 237 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 238 239 if (new_chunk == NULL) { 240 // Did not get a chunk from the free list. Allocate from backing memory. 241 new_chunk = allocate_new_chunk(); 242 243 if (new_chunk == NULL) { 244 return false; 245 } 246 } 247 248 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 249 250 add_chunk_to_chunk_list(new_chunk); 251 252 return true; 253 } 254 255 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 256 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 257 258 if (cur == NULL) { 259 return false; 260 } 261 262 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 263 264 add_chunk_to_free_list(cur); 265 return true; 266 } 267 268 void G1CMMarkStack::set_empty() { 269 _chunks_in_chunk_list = 0; 270 _hwm = 0; 271 _chunk_list = NULL; 272 _free_list = NULL; 273 } 274 275 G1CMRootRegions::G1CMRootRegions() : 276 _cm(NULL), _scan_in_progress(false), 277 _should_abort(false), _claimed_survivor_index(0) { } 278 279 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 280 _survivors = survivors; 281 _cm = cm; 282 } 283 284 void G1CMRootRegions::prepare_for_scan() { 285 assert(!scan_in_progress(), "pre-condition"); 286 287 // Currently, only survivors can be root regions. 288 _claimed_survivor_index = 0; 289 _scan_in_progress = _survivors->regions()->is_nonempty(); 290 _should_abort = false; 291 } 292 293 HeapRegion* G1CMRootRegions::claim_next() { 294 if (_should_abort) { 295 // If someone has set the should_abort flag, we return NULL to 296 // force the caller to bail out of their loop. 297 return NULL; 298 } 299 300 // Currently, only survivors can be root regions. 301 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 302 303 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 304 if (claimed_index < survivor_regions->length()) { 305 return survivor_regions->at(claimed_index); 306 } 307 return NULL; 308 } 309 310 uint G1CMRootRegions::num_root_regions() const { 311 return (uint)_survivors->regions()->length(); 312 } 313 314 void G1CMRootRegions::notify_scan_done() { 315 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 316 _scan_in_progress = false; 317 RootRegionScan_lock->notify_all(); 318 } 319 320 void G1CMRootRegions::cancel_scan() { 321 notify_scan_done(); 322 } 323 324 void G1CMRootRegions::scan_finished() { 325 assert(scan_in_progress(), "pre-condition"); 326 327 // Currently, only survivors can be root regions. 328 if (!_should_abort) { 329 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 330 assert((uint)_claimed_survivor_index >= _survivors->length(), 331 "we should have claimed all survivors, claimed index = %u, length = %u", 332 (uint)_claimed_survivor_index, _survivors->length()); 333 } 334 335 notify_scan_done(); 336 } 337 338 bool G1CMRootRegions::wait_until_scan_finished() { 339 if (!scan_in_progress()) return false; 340 341 { 342 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 343 while (scan_in_progress()) { 344 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 345 } 346 } 347 return true; 348 } 349 350 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 351 return MAX2((n_par_threads + 2) / 4, 1U); 352 } 353 354 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 355 _g1h(g1h), 356 _markBitMap1(), 357 _markBitMap2(), 358 _parallel_marking_threads(0), 359 _max_parallel_marking_threads(0), 360 _sleep_factor(0.0), 361 _marking_task_overhead(1.0), 362 _cleanup_list("Cleanup List"), 363 364 _prevMarkBitMap(&_markBitMap1), 365 _nextMarkBitMap(&_markBitMap2), 366 367 _global_mark_stack(), 368 // _finger set in set_non_marking_state 369 370 _max_worker_id(ParallelGCThreads), 371 // _active_tasks set in set_non_marking_state 372 // _tasks set inside the constructor 373 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 374 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 375 376 _has_overflown(false), 377 _concurrent(false), 378 _has_aborted(false), 379 _restart_for_overflow(false), 380 _concurrent_marking_in_progress(false), 381 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 382 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 383 384 // _verbose_level set below 385 386 _init_times(), 387 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 388 _cleanup_times(), 389 _total_counting_time(0.0), 390 _total_rs_scrub_time(0.0), 391 392 _parallel_workers(NULL), 393 394 _completed_initialization(false) { 395 396 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 397 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 398 399 // Create & start a ConcurrentMark thread. 400 _cmThread = new ConcurrentMarkThread(this); 401 assert(cmThread() != NULL, "CM Thread should have been created"); 402 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 403 if (_cmThread->osthread() == NULL) { 404 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 405 } 406 407 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 408 409 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 410 satb_qs.set_buffer_size(G1SATBBufferSize); 411 412 _root_regions.init(_g1h->survivor(), this); 413 414 if (ConcGCThreads > ParallelGCThreads) { 415 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 416 ConcGCThreads, ParallelGCThreads); 417 return; 418 } 419 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 420 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 421 // if both are set 422 _sleep_factor = 0.0; 423 _marking_task_overhead = 1.0; 424 } else if (G1MarkingOverheadPercent > 0) { 425 // We will calculate the number of parallel marking threads based 426 // on a target overhead with respect to the soft real-time goal 427 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 428 double overall_cm_overhead = 429 (double) MaxGCPauseMillis * marking_overhead / 430 (double) GCPauseIntervalMillis; 431 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 432 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 433 double marking_task_overhead = 434 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 435 double sleep_factor = 436 (1.0 - marking_task_overhead) / marking_task_overhead; 437 438 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 439 _sleep_factor = sleep_factor; 440 _marking_task_overhead = marking_task_overhead; 441 } else { 442 // Calculate the number of parallel marking threads by scaling 443 // the number of parallel GC threads. 444 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 445 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 446 _sleep_factor = 0.0; 447 _marking_task_overhead = 1.0; 448 } 449 450 assert(ConcGCThreads > 0, "Should have been set"); 451 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 452 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 453 _parallel_marking_threads = ConcGCThreads; 454 _max_parallel_marking_threads = _parallel_marking_threads; 455 456 _parallel_workers = new WorkGang("G1 Marker", 457 _max_parallel_marking_threads, false, true); 458 if (_parallel_workers == NULL) { 459 vm_exit_during_initialization("Failed necessary allocation."); 460 } else { 461 _parallel_workers->initialize_workers(); 462 } 463 464 if (FLAG_IS_DEFAULT(MarkStackSize)) { 465 size_t mark_stack_size = 466 MIN2(MarkStackSizeMax, 467 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 468 // Verify that the calculated value for MarkStackSize is in range. 469 // It would be nice to use the private utility routine from Arguments. 470 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 471 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 472 "must be between 1 and " SIZE_FORMAT, 473 mark_stack_size, MarkStackSizeMax); 474 return; 475 } 476 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 477 } else { 478 // Verify MarkStackSize is in range. 479 if (FLAG_IS_CMDLINE(MarkStackSize)) { 480 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 481 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 482 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 483 "must be between 1 and " SIZE_FORMAT, 484 MarkStackSize, MarkStackSizeMax); 485 return; 486 } 487 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 488 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 489 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 490 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 491 MarkStackSize, MarkStackSizeMax); 492 return; 493 } 494 } 495 } 496 } 497 498 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 499 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 500 } 501 502 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 503 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 504 505 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 506 _active_tasks = _max_worker_id; 507 508 for (uint i = 0; i < _max_worker_id; ++i) { 509 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 510 task_queue->initialize(); 511 _task_queues->register_queue(i, task_queue); 512 513 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 514 515 _accum_task_vtime[i] = 0.0; 516 } 517 518 // so that the call below can read a sensible value 519 _heap_start = g1h->reserved_region().start(); 520 set_non_marking_state(); 521 _completed_initialization = true; 522 } 523 524 void G1ConcurrentMark::reset() { 525 // Starting values for these two. This should be called in a STW 526 // phase. 527 MemRegion reserved = _g1h->g1_reserved(); 528 _heap_start = reserved.start(); 529 _heap_end = reserved.end(); 530 531 // Separated the asserts so that we know which one fires. 532 assert(_heap_start != NULL, "heap bounds should look ok"); 533 assert(_heap_end != NULL, "heap bounds should look ok"); 534 assert(_heap_start < _heap_end, "heap bounds should look ok"); 535 536 // Reset all the marking data structures and any necessary flags 537 reset_marking_state(); 538 539 // We do reset all of them, since different phases will use 540 // different number of active threads. So, it's easiest to have all 541 // of them ready. 542 for (uint i = 0; i < _max_worker_id; ++i) { 543 _tasks[i]->reset(_nextMarkBitMap); 544 } 545 546 // we need this to make sure that the flag is on during the evac 547 // pause with initial mark piggy-backed 548 set_concurrent_marking_in_progress(); 549 } 550 551 552 void G1ConcurrentMark::reset_marking_state() { 553 _global_mark_stack.set_empty(); 554 555 // Expand the marking stack, if we have to and if we can. 556 if (has_overflown()) { 557 _global_mark_stack.expand(); 558 } 559 560 clear_has_overflown(); 561 _finger = _heap_start; 562 563 for (uint i = 0; i < _max_worker_id; ++i) { 564 G1CMTaskQueue* queue = _task_queues->queue(i); 565 queue->set_empty(); 566 } 567 } 568 569 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 570 assert(active_tasks <= _max_worker_id, "we should not have more"); 571 572 _active_tasks = active_tasks; 573 // Need to update the three data structures below according to the 574 // number of active threads for this phase. 575 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 576 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 577 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 578 } 579 580 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 581 set_concurrency(active_tasks); 582 583 _concurrent = concurrent; 584 // We propagate this to all tasks, not just the active ones. 585 for (uint i = 0; i < _max_worker_id; ++i) 586 _tasks[i]->set_concurrent(concurrent); 587 588 if (concurrent) { 589 set_concurrent_marking_in_progress(); 590 } else { 591 // We currently assume that the concurrent flag has been set to 592 // false before we start remark. At this point we should also be 593 // in a STW phase. 594 assert(!concurrent_marking_in_progress(), "invariant"); 595 assert(out_of_regions(), 596 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 597 p2i(_finger), p2i(_heap_end)); 598 } 599 } 600 601 void G1ConcurrentMark::set_non_marking_state() { 602 // We set the global marking state to some default values when we're 603 // not doing marking. 604 reset_marking_state(); 605 _active_tasks = 0; 606 clear_concurrent_marking_in_progress(); 607 } 608 609 G1ConcurrentMark::~G1ConcurrentMark() { 610 // The G1ConcurrentMark instance is never freed. 611 ShouldNotReachHere(); 612 } 613 614 class G1ClearBitMapTask : public AbstractGangTask { 615 public: 616 static size_t chunk_size() { return M; } 617 618 private: 619 // Heap region closure used for clearing the given mark bitmap. 620 class G1ClearBitmapHRClosure : public HeapRegionClosure { 621 private: 622 G1CMBitMap* _bitmap; 623 G1ConcurrentMark* _cm; 624 public: 625 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 626 } 627 628 virtual bool doHeapRegion(HeapRegion* r) { 629 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 630 631 HeapWord* cur = r->bottom(); 632 HeapWord* const end = r->end(); 633 634 while (cur < end) { 635 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 636 _bitmap->clear_range(mr); 637 638 cur += chunk_size_in_words; 639 640 // Abort iteration if after yielding the marking has been aborted. 641 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 642 return true; 643 } 644 // Repeat the asserts from before the start of the closure. We will do them 645 // as asserts here to minimize their overhead on the product. However, we 646 // will have them as guarantees at the beginning / end of the bitmap 647 // clearing to get some checking in the product. 648 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 649 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 650 } 651 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 652 653 return false; 654 } 655 }; 656 657 G1ClearBitmapHRClosure _cl; 658 HeapRegionClaimer _hr_claimer; 659 bool _suspendible; // If the task is suspendible, workers must join the STS. 660 661 public: 662 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 663 AbstractGangTask("G1 Clear Bitmap"), 664 _cl(bitmap, suspendible ? cm : NULL), 665 _hr_claimer(n_workers), 666 _suspendible(suspendible) 667 { } 668 669 void work(uint worker_id) { 670 SuspendibleThreadSetJoiner sts_join(_suspendible); 671 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer); 672 } 673 674 bool is_complete() { 675 return _cl.complete(); 676 } 677 }; 678 679 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 680 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 681 682 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 683 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 684 685 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 686 687 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 688 689 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 690 workers->run_task(&cl, num_workers); 691 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 692 } 693 694 void G1ConcurrentMark::cleanup_for_next_mark() { 695 // Make sure that the concurrent mark thread looks to still be in 696 // the current cycle. 697 guarantee(cmThread()->during_cycle(), "invariant"); 698 699 // We are finishing up the current cycle by clearing the next 700 // marking bitmap and getting it ready for the next cycle. During 701 // this time no other cycle can start. So, let's make sure that this 702 // is the case. 703 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 704 705 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 706 707 // Clear the live count data. If the marking has been aborted, the abort() 708 // call already did that. 709 if (!has_aborted()) { 710 clear_live_data(_parallel_workers); 711 DEBUG_ONLY(verify_live_data_clear()); 712 } 713 714 // Repeat the asserts from above. 715 guarantee(cmThread()->during_cycle(), "invariant"); 716 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 717 } 718 719 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 720 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 721 clear_bitmap(_prevMarkBitMap, workers, false); 722 } 723 724 class CheckBitmapClearHRClosure : public HeapRegionClosure { 725 G1CMBitMap* _bitmap; 726 bool _error; 727 public: 728 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 729 } 730 731 virtual bool doHeapRegion(HeapRegion* r) { 732 // This closure can be called concurrently to the mutator, so we must make sure 733 // that the result of the getNextMarkedWordAddress() call is compared to the 734 // value passed to it as limit to detect any found bits. 735 // end never changes in G1. 736 HeapWord* end = r->end(); 737 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 738 } 739 }; 740 741 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 742 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 743 _g1h->heap_region_iterate(&cl); 744 return cl.complete(); 745 } 746 747 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 748 public: 749 bool doHeapRegion(HeapRegion* r) { 750 r->note_start_of_marking(); 751 return false; 752 } 753 }; 754 755 void G1ConcurrentMark::checkpointRootsInitialPre() { 756 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 757 758 _has_aborted = false; 759 760 // Initialize marking structures. This has to be done in a STW phase. 761 reset(); 762 763 // For each region note start of marking. 764 NoteStartOfMarkHRClosure startcl; 765 g1h->heap_region_iterate(&startcl); 766 } 767 768 769 void G1ConcurrentMark::checkpointRootsInitialPost() { 770 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 771 772 // Start Concurrent Marking weak-reference discovery. 773 ReferenceProcessor* rp = g1h->ref_processor_cm(); 774 // enable ("weak") refs discovery 775 rp->enable_discovery(); 776 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 777 778 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 779 // This is the start of the marking cycle, we're expected all 780 // threads to have SATB queues with active set to false. 781 satb_mq_set.set_active_all_threads(true, /* new active value */ 782 false /* expected_active */); 783 784 _root_regions.prepare_for_scan(); 785 786 // update_g1_committed() will be called at the end of an evac pause 787 // when marking is on. So, it's also called at the end of the 788 // initial-mark pause to update the heap end, if the heap expands 789 // during it. No need to call it here. 790 } 791 792 /* 793 * Notice that in the next two methods, we actually leave the STS 794 * during the barrier sync and join it immediately afterwards. If we 795 * do not do this, the following deadlock can occur: one thread could 796 * be in the barrier sync code, waiting for the other thread to also 797 * sync up, whereas another one could be trying to yield, while also 798 * waiting for the other threads to sync up too. 799 * 800 * Note, however, that this code is also used during remark and in 801 * this case we should not attempt to leave / enter the STS, otherwise 802 * we'll either hit an assert (debug / fastdebug) or deadlock 803 * (product). So we should only leave / enter the STS if we are 804 * operating concurrently. 805 * 806 * Because the thread that does the sync barrier has left the STS, it 807 * is possible to be suspended for a Full GC or an evacuation pause 808 * could occur. This is actually safe, since the entering the sync 809 * barrier is one of the last things do_marking_step() does, and it 810 * doesn't manipulate any data structures afterwards. 811 */ 812 813 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 814 bool barrier_aborted; 815 { 816 SuspendibleThreadSetLeaver sts_leave(concurrent()); 817 barrier_aborted = !_first_overflow_barrier_sync.enter(); 818 } 819 820 // at this point everyone should have synced up and not be doing any 821 // more work 822 823 if (barrier_aborted) { 824 // If the barrier aborted we ignore the overflow condition and 825 // just abort the whole marking phase as quickly as possible. 826 return; 827 } 828 829 // If we're executing the concurrent phase of marking, reset the marking 830 // state; otherwise the marking state is reset after reference processing, 831 // during the remark pause. 832 // If we reset here as a result of an overflow during the remark we will 833 // see assertion failures from any subsequent set_concurrency_and_phase() 834 // calls. 835 if (concurrent()) { 836 // let the task associated with with worker 0 do this 837 if (worker_id == 0) { 838 // task 0 is responsible for clearing the global data structures 839 // We should be here because of an overflow. During STW we should 840 // not clear the overflow flag since we rely on it being true when 841 // we exit this method to abort the pause and restart concurrent 842 // marking. 843 reset_marking_state(); 844 845 log_info(gc, marking)("Concurrent Mark reset for overflow"); 846 } 847 } 848 849 // after this, each task should reset its own data structures then 850 // then go into the second barrier 851 } 852 853 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 854 SuspendibleThreadSetLeaver sts_leave(concurrent()); 855 _second_overflow_barrier_sync.enter(); 856 857 // at this point everything should be re-initialized and ready to go 858 } 859 860 class G1CMConcurrentMarkingTask: public AbstractGangTask { 861 private: 862 G1ConcurrentMark* _cm; 863 ConcurrentMarkThread* _cmt; 864 865 public: 866 void work(uint worker_id) { 867 assert(Thread::current()->is_ConcurrentGC_thread(), 868 "this should only be done by a conc GC thread"); 869 ResourceMark rm; 870 871 double start_vtime = os::elapsedVTime(); 872 873 { 874 SuspendibleThreadSetJoiner sts_join; 875 876 assert(worker_id < _cm->active_tasks(), "invariant"); 877 G1CMTask* the_task = _cm->task(worker_id); 878 the_task->record_start_time(); 879 if (!_cm->has_aborted()) { 880 do { 881 double start_vtime_sec = os::elapsedVTime(); 882 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 883 884 the_task->do_marking_step(mark_step_duration_ms, 885 true /* do_termination */, 886 false /* is_serial*/); 887 888 double end_vtime_sec = os::elapsedVTime(); 889 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 890 _cm->do_yield_check(); 891 892 jlong sleep_time_ms; 893 if (!_cm->has_aborted() && the_task->has_aborted()) { 894 sleep_time_ms = 895 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 896 { 897 SuspendibleThreadSetLeaver sts_leave; 898 os::sleep(Thread::current(), sleep_time_ms, false); 899 } 900 } 901 } while (!_cm->has_aborted() && the_task->has_aborted()); 902 } 903 the_task->record_end_time(); 904 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 905 } 906 907 double end_vtime = os::elapsedVTime(); 908 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 909 } 910 911 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 912 ConcurrentMarkThread* cmt) : 913 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 914 915 ~G1CMConcurrentMarkingTask() { } 916 }; 917 918 // Calculates the number of active workers for a concurrent 919 // phase. 920 uint G1ConcurrentMark::calc_parallel_marking_threads() { 921 uint n_conc_workers = 0; 922 if (!UseDynamicNumberOfGCThreads || 923 (!FLAG_IS_DEFAULT(ConcGCThreads) && 924 !ForceDynamicNumberOfGCThreads)) { 925 n_conc_workers = max_parallel_marking_threads(); 926 } else { 927 n_conc_workers = 928 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 929 1, /* Minimum workers */ 930 parallel_marking_threads(), 931 Threads::number_of_non_daemon_threads()); 932 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 933 // that scaling has already gone into "_max_parallel_marking_threads". 934 } 935 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 936 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 937 max_parallel_marking_threads(), n_conc_workers); 938 return n_conc_workers; 939 } 940 941 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 942 // Currently, only survivors can be root regions. 943 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 944 G1RootRegionScanClosure cl(_g1h, this); 945 946 const uintx interval = PrefetchScanIntervalInBytes; 947 HeapWord* curr = hr->bottom(); 948 const HeapWord* end = hr->top(); 949 while (curr < end) { 950 Prefetch::read(curr, interval); 951 oop obj = oop(curr); 952 int size = obj->oop_iterate_size(&cl); 953 assert(size == obj->size(), "sanity"); 954 curr += size; 955 } 956 } 957 958 class G1CMRootRegionScanTask : public AbstractGangTask { 959 private: 960 G1ConcurrentMark* _cm; 961 962 public: 963 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 964 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 965 966 void work(uint worker_id) { 967 assert(Thread::current()->is_ConcurrentGC_thread(), 968 "this should only be done by a conc GC thread"); 969 970 G1CMRootRegions* root_regions = _cm->root_regions(); 971 HeapRegion* hr = root_regions->claim_next(); 972 while (hr != NULL) { 973 _cm->scanRootRegion(hr); 974 hr = root_regions->claim_next(); 975 } 976 } 977 }; 978 979 void G1ConcurrentMark::scan_root_regions() { 980 // scan_in_progress() will have been set to true only if there was 981 // at least one root region to scan. So, if it's false, we 982 // should not attempt to do any further work. 983 if (root_regions()->scan_in_progress()) { 984 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 985 986 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 987 // We distribute work on a per-region basis, so starting 988 // more threads than that is useless. 989 root_regions()->num_root_regions()); 990 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 991 "Maximum number of marking threads exceeded"); 992 993 G1CMRootRegionScanTask task(this); 994 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 995 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 996 _parallel_workers->run_task(&task, _parallel_marking_threads); 997 998 // It's possible that has_aborted() is true here without actually 999 // aborting the survivor scan earlier. This is OK as it's 1000 // mainly used for sanity checking. 1001 root_regions()->scan_finished(); 1002 } 1003 } 1004 1005 void G1ConcurrentMark::concurrent_cycle_start() { 1006 _gc_timer_cm->register_gc_start(); 1007 1008 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 1009 1010 _g1h->trace_heap_before_gc(_gc_tracer_cm); 1011 } 1012 1013 void G1ConcurrentMark::concurrent_cycle_end() { 1014 _g1h->trace_heap_after_gc(_gc_tracer_cm); 1015 1016 if (has_aborted()) { 1017 _gc_tracer_cm->report_concurrent_mode_failure(); 1018 } 1019 1020 _gc_timer_cm->register_gc_end(); 1021 1022 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 1023 } 1024 1025 void G1ConcurrentMark::mark_from_roots() { 1026 // we might be tempted to assert that: 1027 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1028 // "inconsistent argument?"); 1029 // However that wouldn't be right, because it's possible that 1030 // a safepoint is indeed in progress as a younger generation 1031 // stop-the-world GC happens even as we mark in this generation. 1032 1033 _restart_for_overflow = false; 1034 1035 // _g1h has _n_par_threads 1036 _parallel_marking_threads = calc_parallel_marking_threads(); 1037 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1038 "Maximum number of marking threads exceeded"); 1039 1040 uint active_workers = MAX2(1U, parallel_marking_threads()); 1041 assert(active_workers > 0, "Should have been set"); 1042 1043 // Setting active workers is not guaranteed since fewer 1044 // worker threads may currently exist and more may not be 1045 // available. 1046 active_workers = _parallel_workers->update_active_workers(active_workers); 1047 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1048 1049 // Parallel task terminator is set in "set_concurrency_and_phase()" 1050 set_concurrency_and_phase(active_workers, true /* concurrent */); 1051 1052 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1053 _parallel_workers->run_task(&markingTask); 1054 print_stats(); 1055 } 1056 1057 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1058 // world is stopped at this checkpoint 1059 assert(SafepointSynchronize::is_at_safepoint(), 1060 "world should be stopped"); 1061 1062 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1063 1064 // If a full collection has happened, we shouldn't do this. 1065 if (has_aborted()) { 1066 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1067 return; 1068 } 1069 1070 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1071 1072 if (VerifyDuringGC) { 1073 HandleMark hm; // handle scope 1074 g1h->prepare_for_verify(); 1075 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1076 } 1077 g1h->verifier()->check_bitmaps("Remark Start"); 1078 1079 G1Policy* g1p = g1h->g1_policy(); 1080 g1p->record_concurrent_mark_remark_start(); 1081 1082 double start = os::elapsedTime(); 1083 1084 checkpointRootsFinalWork(); 1085 1086 double mark_work_end = os::elapsedTime(); 1087 1088 weakRefsWork(clear_all_soft_refs); 1089 1090 if (has_overflown()) { 1091 // We overflowed. Restart concurrent marking. 1092 _restart_for_overflow = true; 1093 1094 // Verify the heap w.r.t. the previous marking bitmap. 1095 if (VerifyDuringGC) { 1096 HandleMark hm; // handle scope 1097 g1h->prepare_for_verify(); 1098 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1099 } 1100 1101 // Clear the marking state because we will be restarting 1102 // marking due to overflowing the global mark stack. 1103 reset_marking_state(); 1104 } else { 1105 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1106 // We're done with marking. 1107 // This is the end of the marking cycle, we're expected all 1108 // threads to have SATB queues with active set to true. 1109 satb_mq_set.set_active_all_threads(false, /* new active value */ 1110 true /* expected_active */); 1111 1112 if (VerifyDuringGC) { 1113 HandleMark hm; // handle scope 1114 g1h->prepare_for_verify(); 1115 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1116 } 1117 g1h->verifier()->check_bitmaps("Remark End"); 1118 assert(!restart_for_overflow(), "sanity"); 1119 // Completely reset the marking state since marking completed 1120 set_non_marking_state(); 1121 } 1122 1123 // Statistics 1124 double now = os::elapsedTime(); 1125 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1126 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1127 _remark_times.add((now - start) * 1000.0); 1128 1129 g1p->record_concurrent_mark_remark_end(); 1130 1131 G1CMIsAliveClosure is_alive(g1h); 1132 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1133 } 1134 1135 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1136 G1CollectedHeap* _g1; 1137 size_t _freed_bytes; 1138 FreeRegionList* _local_cleanup_list; 1139 uint _old_regions_removed; 1140 uint _humongous_regions_removed; 1141 HRRSCleanupTask* _hrrs_cleanup_task; 1142 1143 public: 1144 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1145 FreeRegionList* local_cleanup_list, 1146 HRRSCleanupTask* hrrs_cleanup_task) : 1147 _g1(g1), 1148 _freed_bytes(0), 1149 _local_cleanup_list(local_cleanup_list), 1150 _old_regions_removed(0), 1151 _humongous_regions_removed(0), 1152 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1153 1154 size_t freed_bytes() { return _freed_bytes; } 1155 const uint old_regions_removed() { return _old_regions_removed; } 1156 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1157 1158 bool doHeapRegion(HeapRegion *hr) { 1159 if (hr->is_archive()) { 1160 return false; 1161 } 1162 _g1->reset_gc_time_stamps(hr); 1163 hr->note_end_of_marking(); 1164 1165 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1166 _freed_bytes += hr->used(); 1167 hr->set_containing_set(NULL); 1168 if (hr->is_humongous()) { 1169 _humongous_regions_removed++; 1170 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1171 } else { 1172 _old_regions_removed++; 1173 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1174 } 1175 } else { 1176 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1177 } 1178 1179 return false; 1180 } 1181 }; 1182 1183 class G1ParNoteEndTask: public AbstractGangTask { 1184 friend class G1NoteEndOfConcMarkClosure; 1185 1186 protected: 1187 G1CollectedHeap* _g1h; 1188 FreeRegionList* _cleanup_list; 1189 HeapRegionClaimer _hrclaimer; 1190 1191 public: 1192 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1193 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1194 } 1195 1196 void work(uint worker_id) { 1197 FreeRegionList local_cleanup_list("Local Cleanup List"); 1198 HRRSCleanupTask hrrs_cleanup_task; 1199 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1200 &hrrs_cleanup_task); 1201 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1202 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1203 1204 // Now update the lists 1205 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1206 { 1207 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1208 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1209 1210 // If we iterate over the global cleanup list at the end of 1211 // cleanup to do this printing we will not guarantee to only 1212 // generate output for the newly-reclaimed regions (the list 1213 // might not be empty at the beginning of cleanup; we might 1214 // still be working on its previous contents). So we do the 1215 // printing here, before we append the new regions to the global 1216 // cleanup list. 1217 1218 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1219 if (hr_printer->is_active()) { 1220 FreeRegionListIterator iter(&local_cleanup_list); 1221 while (iter.more_available()) { 1222 HeapRegion* hr = iter.get_next(); 1223 hr_printer->cleanup(hr); 1224 } 1225 } 1226 1227 _cleanup_list->add_ordered(&local_cleanup_list); 1228 assert(local_cleanup_list.is_empty(), "post-condition"); 1229 1230 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1231 } 1232 } 1233 }; 1234 1235 void G1ConcurrentMark::cleanup() { 1236 // world is stopped at this checkpoint 1237 assert(SafepointSynchronize::is_at_safepoint(), 1238 "world should be stopped"); 1239 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1240 1241 // If a full collection has happened, we shouldn't do this. 1242 if (has_aborted()) { 1243 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1244 return; 1245 } 1246 1247 g1h->verifier()->verify_region_sets_optional(); 1248 1249 if (VerifyDuringGC) { 1250 HandleMark hm; // handle scope 1251 g1h->prepare_for_verify(); 1252 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1253 } 1254 g1h->verifier()->check_bitmaps("Cleanup Start"); 1255 1256 G1Policy* g1p = g1h->g1_policy(); 1257 g1p->record_concurrent_mark_cleanup_start(); 1258 1259 double start = os::elapsedTime(); 1260 1261 HeapRegionRemSet::reset_for_cleanup_tasks(); 1262 1263 { 1264 GCTraceTime(Debug, gc)("Finalize Live Data"); 1265 finalize_live_data(); 1266 } 1267 1268 if (VerifyDuringGC) { 1269 GCTraceTime(Debug, gc)("Verify Live Data"); 1270 verify_live_data(); 1271 } 1272 1273 g1h->collector_state()->set_mark_in_progress(false); 1274 1275 double count_end = os::elapsedTime(); 1276 double this_final_counting_time = (count_end - start); 1277 _total_counting_time += this_final_counting_time; 1278 1279 if (log_is_enabled(Trace, gc, liveness)) { 1280 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1281 _g1h->heap_region_iterate(&cl); 1282 } 1283 1284 // Install newly created mark bitMap as "prev". 1285 swapMarkBitMaps(); 1286 1287 g1h->reset_gc_time_stamp(); 1288 1289 uint n_workers = _g1h->workers()->active_workers(); 1290 1291 // Note end of marking in all heap regions. 1292 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1293 g1h->workers()->run_task(&g1_par_note_end_task); 1294 g1h->check_gc_time_stamps(); 1295 1296 if (!cleanup_list_is_empty()) { 1297 // The cleanup list is not empty, so we'll have to process it 1298 // concurrently. Notify anyone else that might be wanting free 1299 // regions that there will be more free regions coming soon. 1300 g1h->set_free_regions_coming(); 1301 } 1302 1303 // call below, since it affects the metric by which we sort the heap 1304 // regions. 1305 if (G1ScrubRemSets) { 1306 double rs_scrub_start = os::elapsedTime(); 1307 g1h->scrub_rem_set(); 1308 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1309 } 1310 1311 // this will also free any regions totally full of garbage objects, 1312 // and sort the regions. 1313 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1314 1315 // Statistics. 1316 double end = os::elapsedTime(); 1317 _cleanup_times.add((end - start) * 1000.0); 1318 1319 // Clean up will have freed any regions completely full of garbage. 1320 // Update the soft reference policy with the new heap occupancy. 1321 Universe::update_heap_info_at_gc(); 1322 1323 if (VerifyDuringGC) { 1324 HandleMark hm; // handle scope 1325 g1h->prepare_for_verify(); 1326 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1327 } 1328 1329 g1h->verifier()->check_bitmaps("Cleanup End"); 1330 1331 g1h->verifier()->verify_region_sets_optional(); 1332 1333 // We need to make this be a "collection" so any collection pause that 1334 // races with it goes around and waits for completeCleanup to finish. 1335 g1h->increment_total_collections(); 1336 1337 // Clean out dead classes and update Metaspace sizes. 1338 if (ClassUnloadingWithConcurrentMark) { 1339 ClassLoaderDataGraph::purge(); 1340 } 1341 MetaspaceGC::compute_new_size(); 1342 1343 // We reclaimed old regions so we should calculate the sizes to make 1344 // sure we update the old gen/space data. 1345 g1h->g1mm()->update_sizes(); 1346 g1h->allocation_context_stats().update_after_mark(); 1347 } 1348 1349 void G1ConcurrentMark::complete_cleanup() { 1350 if (has_aborted()) return; 1351 1352 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1353 1354 _cleanup_list.verify_optional(); 1355 FreeRegionList tmp_free_list("Tmp Free List"); 1356 1357 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1358 "cleanup list has %u entries", 1359 _cleanup_list.length()); 1360 1361 // No one else should be accessing the _cleanup_list at this point, 1362 // so it is not necessary to take any locks 1363 while (!_cleanup_list.is_empty()) { 1364 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1365 assert(hr != NULL, "Got NULL from a non-empty list"); 1366 hr->par_clear(); 1367 tmp_free_list.add_ordered(hr); 1368 1369 // Instead of adding one region at a time to the secondary_free_list, 1370 // we accumulate them in the local list and move them a few at a 1371 // time. This also cuts down on the number of notify_all() calls 1372 // we do during this process. We'll also append the local list when 1373 // _cleanup_list is empty (which means we just removed the last 1374 // region from the _cleanup_list). 1375 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1376 _cleanup_list.is_empty()) { 1377 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1378 "appending %u entries to the secondary_free_list, " 1379 "cleanup list still has %u entries", 1380 tmp_free_list.length(), 1381 _cleanup_list.length()); 1382 1383 { 1384 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1385 g1h->secondary_free_list_add(&tmp_free_list); 1386 SecondaryFreeList_lock->notify_all(); 1387 } 1388 #ifndef PRODUCT 1389 if (G1StressConcRegionFreeing) { 1390 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1391 os::sleep(Thread::current(), (jlong) 1, false); 1392 } 1393 } 1394 #endif 1395 } 1396 } 1397 assert(tmp_free_list.is_empty(), "post-condition"); 1398 } 1399 1400 // Supporting Object and Oop closures for reference discovery 1401 // and processing in during marking 1402 1403 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1404 HeapWord* addr = (HeapWord*)obj; 1405 return addr != NULL && 1406 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1407 } 1408 1409 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1410 // Uses the G1CMTask associated with a worker thread (for serial reference 1411 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1412 // trace referent objects. 1413 // 1414 // Using the G1CMTask and embedded local queues avoids having the worker 1415 // threads operating on the global mark stack. This reduces the risk 1416 // of overflowing the stack - which we would rather avoid at this late 1417 // state. Also using the tasks' local queues removes the potential 1418 // of the workers interfering with each other that could occur if 1419 // operating on the global stack. 1420 1421 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1422 G1ConcurrentMark* _cm; 1423 G1CMTask* _task; 1424 int _ref_counter_limit; 1425 int _ref_counter; 1426 bool _is_serial; 1427 public: 1428 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1429 _cm(cm), _task(task), _is_serial(is_serial), 1430 _ref_counter_limit(G1RefProcDrainInterval) { 1431 assert(_ref_counter_limit > 0, "sanity"); 1432 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1433 _ref_counter = _ref_counter_limit; 1434 } 1435 1436 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1437 virtual void do_oop( oop* p) { do_oop_work(p); } 1438 1439 template <class T> void do_oop_work(T* p) { 1440 if (!_cm->has_overflown()) { 1441 oop obj = oopDesc::load_decode_heap_oop(p); 1442 _task->deal_with_reference(obj); 1443 _ref_counter--; 1444 1445 if (_ref_counter == 0) { 1446 // We have dealt with _ref_counter_limit references, pushing them 1447 // and objects reachable from them on to the local stack (and 1448 // possibly the global stack). Call G1CMTask::do_marking_step() to 1449 // process these entries. 1450 // 1451 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1452 // there's nothing more to do (i.e. we're done with the entries that 1453 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1454 // above) or we overflow. 1455 // 1456 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1457 // flag while there may still be some work to do. (See the comment at 1458 // the beginning of G1CMTask::do_marking_step() for those conditions - 1459 // one of which is reaching the specified time target.) It is only 1460 // when G1CMTask::do_marking_step() returns without setting the 1461 // has_aborted() flag that the marking step has completed. 1462 do { 1463 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1464 _task->do_marking_step(mark_step_duration_ms, 1465 false /* do_termination */, 1466 _is_serial); 1467 } while (_task->has_aborted() && !_cm->has_overflown()); 1468 _ref_counter = _ref_counter_limit; 1469 } 1470 } 1471 } 1472 }; 1473 1474 // 'Drain' oop closure used by both serial and parallel reference processing. 1475 // Uses the G1CMTask associated with a given worker thread (for serial 1476 // reference processing the G1CMtask for worker 0 is used). Calls the 1477 // do_marking_step routine, with an unbelievably large timeout value, 1478 // to drain the marking data structures of the remaining entries 1479 // added by the 'keep alive' oop closure above. 1480 1481 class G1CMDrainMarkingStackClosure: public VoidClosure { 1482 G1ConcurrentMark* _cm; 1483 G1CMTask* _task; 1484 bool _is_serial; 1485 public: 1486 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1487 _cm(cm), _task(task), _is_serial(is_serial) { 1488 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1489 } 1490 1491 void do_void() { 1492 do { 1493 // We call G1CMTask::do_marking_step() to completely drain the local 1494 // and global marking stacks of entries pushed by the 'keep alive' 1495 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1496 // 1497 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1498 // if there's nothing more to do (i.e. we've completely drained the 1499 // entries that were pushed as a a result of applying the 'keep alive' 1500 // closure to the entries on the discovered ref lists) or we overflow 1501 // the global marking stack. 1502 // 1503 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1504 // flag while there may still be some work to do. (See the comment at 1505 // the beginning of G1CMTask::do_marking_step() for those conditions - 1506 // one of which is reaching the specified time target.) It is only 1507 // when G1CMTask::do_marking_step() returns without setting the 1508 // has_aborted() flag that the marking step has completed. 1509 1510 _task->do_marking_step(1000000000.0 /* something very large */, 1511 true /* do_termination */, 1512 _is_serial); 1513 } while (_task->has_aborted() && !_cm->has_overflown()); 1514 } 1515 }; 1516 1517 // Implementation of AbstractRefProcTaskExecutor for parallel 1518 // reference processing at the end of G1 concurrent marking 1519 1520 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1521 private: 1522 G1CollectedHeap* _g1h; 1523 G1ConcurrentMark* _cm; 1524 WorkGang* _workers; 1525 uint _active_workers; 1526 1527 public: 1528 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1529 G1ConcurrentMark* cm, 1530 WorkGang* workers, 1531 uint n_workers) : 1532 _g1h(g1h), _cm(cm), 1533 _workers(workers), _active_workers(n_workers) { } 1534 1535 // Executes the given task using concurrent marking worker threads. 1536 virtual void execute(ProcessTask& task); 1537 virtual void execute(EnqueueTask& task); 1538 }; 1539 1540 class G1CMRefProcTaskProxy: public AbstractGangTask { 1541 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1542 ProcessTask& _proc_task; 1543 G1CollectedHeap* _g1h; 1544 G1ConcurrentMark* _cm; 1545 1546 public: 1547 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1548 G1CollectedHeap* g1h, 1549 G1ConcurrentMark* cm) : 1550 AbstractGangTask("Process reference objects in parallel"), 1551 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1552 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1553 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1554 } 1555 1556 virtual void work(uint worker_id) { 1557 ResourceMark rm; 1558 HandleMark hm; 1559 G1CMTask* task = _cm->task(worker_id); 1560 G1CMIsAliveClosure g1_is_alive(_g1h); 1561 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1562 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1563 1564 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1565 } 1566 }; 1567 1568 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1569 assert(_workers != NULL, "Need parallel worker threads."); 1570 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1571 1572 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1573 1574 // We need to reset the concurrency level before each 1575 // proxy task execution, so that the termination protocol 1576 // and overflow handling in G1CMTask::do_marking_step() knows 1577 // how many workers to wait for. 1578 _cm->set_concurrency(_active_workers); 1579 _workers->run_task(&proc_task_proxy); 1580 } 1581 1582 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1583 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1584 EnqueueTask& _enq_task; 1585 1586 public: 1587 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1588 AbstractGangTask("Enqueue reference objects in parallel"), 1589 _enq_task(enq_task) { } 1590 1591 virtual void work(uint worker_id) { 1592 _enq_task.work(worker_id); 1593 } 1594 }; 1595 1596 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1597 assert(_workers != NULL, "Need parallel worker threads."); 1598 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1599 1600 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1601 1602 // Not strictly necessary but... 1603 // 1604 // We need to reset the concurrency level before each 1605 // proxy task execution, so that the termination protocol 1606 // and overflow handling in G1CMTask::do_marking_step() knows 1607 // how many workers to wait for. 1608 _cm->set_concurrency(_active_workers); 1609 _workers->run_task(&enq_task_proxy); 1610 } 1611 1612 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1613 if (has_overflown()) { 1614 // Skip processing the discovered references if we have 1615 // overflown the global marking stack. Reference objects 1616 // only get discovered once so it is OK to not 1617 // de-populate the discovered reference lists. We could have, 1618 // but the only benefit would be that, when marking restarts, 1619 // less reference objects are discovered. 1620 return; 1621 } 1622 1623 ResourceMark rm; 1624 HandleMark hm; 1625 1626 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1627 1628 // Is alive closure. 1629 G1CMIsAliveClosure g1_is_alive(g1h); 1630 1631 // Inner scope to exclude the cleaning of the string and symbol 1632 // tables from the displayed time. 1633 { 1634 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1635 1636 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1637 1638 // See the comment in G1CollectedHeap::ref_processing_init() 1639 // about how reference processing currently works in G1. 1640 1641 // Set the soft reference policy 1642 rp->setup_policy(clear_all_soft_refs); 1643 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1644 1645 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1646 // in serial reference processing. Note these closures are also 1647 // used for serially processing (by the the current thread) the 1648 // JNI references during parallel reference processing. 1649 // 1650 // These closures do not need to synchronize with the worker 1651 // threads involved in parallel reference processing as these 1652 // instances are executed serially by the current thread (e.g. 1653 // reference processing is not multi-threaded and is thus 1654 // performed by the current thread instead of a gang worker). 1655 // 1656 // The gang tasks involved in parallel reference processing create 1657 // their own instances of these closures, which do their own 1658 // synchronization among themselves. 1659 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1660 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1661 1662 // We need at least one active thread. If reference processing 1663 // is not multi-threaded we use the current (VMThread) thread, 1664 // otherwise we use the work gang from the G1CollectedHeap and 1665 // we utilize all the worker threads we can. 1666 bool processing_is_mt = rp->processing_is_mt(); 1667 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1668 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1669 1670 // Parallel processing task executor. 1671 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1672 g1h->workers(), active_workers); 1673 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1674 1675 // Set the concurrency level. The phase was already set prior to 1676 // executing the remark task. 1677 set_concurrency(active_workers); 1678 1679 // Set the degree of MT processing here. If the discovery was done MT, 1680 // the number of threads involved during discovery could differ from 1681 // the number of active workers. This is OK as long as the discovered 1682 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1683 rp->set_active_mt_degree(active_workers); 1684 1685 // Process the weak references. 1686 const ReferenceProcessorStats& stats = 1687 rp->process_discovered_references(&g1_is_alive, 1688 &g1_keep_alive, 1689 &g1_drain_mark_stack, 1690 executor, 1691 _gc_timer_cm); 1692 _gc_tracer_cm->report_gc_reference_stats(stats); 1693 1694 // The do_oop work routines of the keep_alive and drain_marking_stack 1695 // oop closures will set the has_overflown flag if we overflow the 1696 // global marking stack. 1697 1698 assert(has_overflown() || _global_mark_stack.is_empty(), 1699 "Mark stack should be empty (unless it has overflown)"); 1700 1701 assert(rp->num_q() == active_workers, "why not"); 1702 1703 rp->enqueue_discovered_references(executor); 1704 1705 rp->verify_no_references_recorded(); 1706 assert(!rp->discovery_enabled(), "Post condition"); 1707 } 1708 1709 if (has_overflown()) { 1710 // We can not trust g1_is_alive if the marking stack overflowed 1711 return; 1712 } 1713 1714 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1715 1716 // Unload Klasses, String, Symbols, Code Cache, etc. 1717 if (ClassUnloadingWithConcurrentMark) { 1718 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1719 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1720 g1h->complete_cleaning(&g1_is_alive, purged_classes); 1721 } else { 1722 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1723 // No need to clean string table and symbol table as they are treated as strong roots when 1724 // class unloading is disabled. 1725 g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1726 1727 } 1728 } 1729 1730 void G1ConcurrentMark::swapMarkBitMaps() { 1731 G1CMBitMap* temp = _prevMarkBitMap; 1732 _prevMarkBitMap = _nextMarkBitMap; 1733 _nextMarkBitMap = temp; 1734 } 1735 1736 // Closure for marking entries in SATB buffers. 1737 class G1CMSATBBufferClosure : public SATBBufferClosure { 1738 private: 1739 G1CMTask* _task; 1740 G1CollectedHeap* _g1h; 1741 1742 // This is very similar to G1CMTask::deal_with_reference, but with 1743 // more relaxed requirements for the argument, so this must be more 1744 // circumspect about treating the argument as an object. 1745 void do_entry(void* entry) const { 1746 _task->increment_refs_reached(); 1747 HeapRegion* hr = _g1h->heap_region_containing(entry); 1748 if (entry < hr->next_top_at_mark_start()) { 1749 // Until we get here, we don't know whether entry refers to a valid 1750 // object; it could instead have been a stale reference. 1751 oop obj = static_cast<oop>(entry); 1752 assert(obj->is_oop(true /* ignore mark word */), 1753 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 1754 _task->make_reference_grey(obj); 1755 } 1756 } 1757 1758 public: 1759 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1760 : _task(task), _g1h(g1h) { } 1761 1762 virtual void do_buffer(void** buffer, size_t size) { 1763 for (size_t i = 0; i < size; ++i) { 1764 do_entry(buffer[i]); 1765 } 1766 } 1767 }; 1768 1769 class G1RemarkThreadsClosure : public ThreadClosure { 1770 G1CMSATBBufferClosure _cm_satb_cl; 1771 G1CMOopClosure _cm_cl; 1772 MarkingCodeBlobClosure _code_cl; 1773 int _thread_parity; 1774 1775 public: 1776 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1777 _cm_satb_cl(task, g1h), 1778 _cm_cl(g1h, g1h->concurrent_mark(), task), 1779 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1780 _thread_parity(Threads::thread_claim_parity()) {} 1781 1782 void do_thread(Thread* thread) { 1783 if (thread->is_Java_thread()) { 1784 if (thread->claim_oops_do(true, _thread_parity)) { 1785 JavaThread* jt = (JavaThread*)thread; 1786 1787 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1788 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1789 // * Alive if on the stack of an executing method 1790 // * Weakly reachable otherwise 1791 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1792 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1793 jt->nmethods_do(&_code_cl); 1794 1795 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1796 } 1797 } else if (thread->is_VM_thread()) { 1798 if (thread->claim_oops_do(true, _thread_parity)) { 1799 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1800 } 1801 } 1802 } 1803 }; 1804 1805 class G1CMRemarkTask: public AbstractGangTask { 1806 private: 1807 G1ConcurrentMark* _cm; 1808 public: 1809 void work(uint worker_id) { 1810 // Since all available tasks are actually started, we should 1811 // only proceed if we're supposed to be active. 1812 if (worker_id < _cm->active_tasks()) { 1813 G1CMTask* task = _cm->task(worker_id); 1814 task->record_start_time(); 1815 { 1816 ResourceMark rm; 1817 HandleMark hm; 1818 1819 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1820 Threads::threads_do(&threads_f); 1821 } 1822 1823 do { 1824 task->do_marking_step(1000000000.0 /* something very large */, 1825 true /* do_termination */, 1826 false /* is_serial */); 1827 } while (task->has_aborted() && !_cm->has_overflown()); 1828 // If we overflow, then we do not want to restart. We instead 1829 // want to abort remark and do concurrent marking again. 1830 task->record_end_time(); 1831 } 1832 } 1833 1834 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1835 AbstractGangTask("Par Remark"), _cm(cm) { 1836 _cm->terminator()->reset_for_reuse(active_workers); 1837 } 1838 }; 1839 1840 void G1ConcurrentMark::checkpointRootsFinalWork() { 1841 ResourceMark rm; 1842 HandleMark hm; 1843 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1844 1845 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1846 1847 g1h->ensure_parsability(false); 1848 1849 // this is remark, so we'll use up all active threads 1850 uint active_workers = g1h->workers()->active_workers(); 1851 set_concurrency_and_phase(active_workers, false /* concurrent */); 1852 // Leave _parallel_marking_threads at it's 1853 // value originally calculated in the G1ConcurrentMark 1854 // constructor and pass values of the active workers 1855 // through the gang in the task. 1856 1857 { 1858 StrongRootsScope srs(active_workers); 1859 1860 G1CMRemarkTask remarkTask(this, active_workers); 1861 // We will start all available threads, even if we decide that the 1862 // active_workers will be fewer. The extra ones will just bail out 1863 // immediately. 1864 g1h->workers()->run_task(&remarkTask); 1865 } 1866 1867 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1868 guarantee(has_overflown() || 1869 satb_mq_set.completed_buffers_num() == 0, 1870 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1871 BOOL_TO_STR(has_overflown()), 1872 satb_mq_set.completed_buffers_num()); 1873 1874 print_stats(); 1875 } 1876 1877 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1878 _prevMarkBitMap->clear_range(mr); 1879 } 1880 1881 HeapRegion* 1882 G1ConcurrentMark::claim_region(uint worker_id) { 1883 // "checkpoint" the finger 1884 HeapWord* finger = _finger; 1885 1886 // _heap_end will not change underneath our feet; it only changes at 1887 // yield points. 1888 while (finger < _heap_end) { 1889 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1890 1891 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1892 // Make sure that the reads below do not float before loading curr_region. 1893 OrderAccess::loadload(); 1894 // Above heap_region_containing may return NULL as we always scan claim 1895 // until the end of the heap. In this case, just jump to the next region. 1896 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1897 1898 // Is the gap between reading the finger and doing the CAS too long? 1899 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1900 if (res == finger && curr_region != NULL) { 1901 // we succeeded 1902 HeapWord* bottom = curr_region->bottom(); 1903 HeapWord* limit = curr_region->next_top_at_mark_start(); 1904 1905 // notice that _finger == end cannot be guaranteed here since, 1906 // someone else might have moved the finger even further 1907 assert(_finger >= end, "the finger should have moved forward"); 1908 1909 if (limit > bottom) { 1910 return curr_region; 1911 } else { 1912 assert(limit == bottom, 1913 "the region limit should be at bottom"); 1914 // we return NULL and the caller should try calling 1915 // claim_region() again. 1916 return NULL; 1917 } 1918 } else { 1919 assert(_finger > finger, "the finger should have moved forward"); 1920 // read it again 1921 finger = _finger; 1922 } 1923 } 1924 1925 return NULL; 1926 } 1927 1928 #ifndef PRODUCT 1929 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1930 private: 1931 G1CollectedHeap* _g1h; 1932 const char* _phase; 1933 int _info; 1934 1935 public: 1936 VerifyNoCSetOops(const char* phase, int info = -1) : 1937 _g1h(G1CollectedHeap::heap()), 1938 _phase(phase), 1939 _info(info) 1940 { } 1941 1942 void operator()(G1TaskQueueEntry task_entry) const { 1943 if (task_entry.is_array_slice()) { 1944 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1945 return; 1946 } 1947 guarantee(task_entry.obj()->is_oop(), 1948 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1949 p2i(task_entry.obj()), _phase, _info); 1950 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1951 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1952 p2i(task_entry.obj()), _phase, _info); 1953 } 1954 }; 1955 1956 void G1ConcurrentMark::verify_no_cset_oops() { 1957 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1958 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 1959 return; 1960 } 1961 1962 // Verify entries on the global mark stack 1963 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1964 1965 // Verify entries on the task queues 1966 for (uint i = 0; i < _max_worker_id; ++i) { 1967 G1CMTaskQueue* queue = _task_queues->queue(i); 1968 queue->iterate(VerifyNoCSetOops("Queue", i)); 1969 } 1970 1971 // Verify the global finger 1972 HeapWord* global_finger = finger(); 1973 if (global_finger != NULL && global_finger < _heap_end) { 1974 // Since we always iterate over all regions, we might get a NULL HeapRegion 1975 // here. 1976 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1977 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1978 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1979 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1980 } 1981 1982 // Verify the task fingers 1983 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 1984 for (uint i = 0; i < parallel_marking_threads(); ++i) { 1985 G1CMTask* task = _tasks[i]; 1986 HeapWord* task_finger = task->finger(); 1987 if (task_finger != NULL && task_finger < _heap_end) { 1988 // See above note on the global finger verification. 1989 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1990 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1991 !task_hr->in_collection_set(), 1992 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1993 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1994 } 1995 } 1996 } 1997 #endif // PRODUCT 1998 void G1ConcurrentMark::create_live_data() { 1999 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 2000 } 2001 2002 void G1ConcurrentMark::finalize_live_data() { 2003 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 2004 } 2005 2006 void G1ConcurrentMark::verify_live_data() { 2007 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 2008 } 2009 2010 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 2011 _g1h->g1_rem_set()->clear_card_live_data(workers); 2012 } 2013 2014 #ifdef ASSERT 2015 void G1ConcurrentMark::verify_live_data_clear() { 2016 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 2017 } 2018 #endif 2019 2020 void G1ConcurrentMark::print_stats() { 2021 if (!log_is_enabled(Debug, gc, stats)) { 2022 return; 2023 } 2024 log_debug(gc, stats)("---------------------------------------------------------------------"); 2025 for (size_t i = 0; i < _active_tasks; ++i) { 2026 _tasks[i]->print_stats(); 2027 log_debug(gc, stats)("---------------------------------------------------------------------"); 2028 } 2029 } 2030 2031 void G1ConcurrentMark::abort() { 2032 if (!cmThread()->during_cycle() || _has_aborted) { 2033 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2034 return; 2035 } 2036 2037 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2038 // concurrent bitmap clearing. 2039 { 2040 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2041 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2042 } 2043 // Note we cannot clear the previous marking bitmap here 2044 // since VerifyDuringGC verifies the objects marked during 2045 // a full GC against the previous bitmap. 2046 2047 { 2048 GCTraceTime(Debug, gc)("Clear Live Data"); 2049 clear_live_data(_g1h->workers()); 2050 } 2051 DEBUG_ONLY({ 2052 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2053 verify_live_data_clear(); 2054 }) 2055 // Empty mark stack 2056 reset_marking_state(); 2057 for (uint i = 0; i < _max_worker_id; ++i) { 2058 _tasks[i]->clear_region_fields(); 2059 } 2060 _first_overflow_barrier_sync.abort(); 2061 _second_overflow_barrier_sync.abort(); 2062 _has_aborted = true; 2063 2064 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2065 satb_mq_set.abandon_partial_marking(); 2066 // This can be called either during or outside marking, we'll read 2067 // the expected_active value from the SATB queue set. 2068 satb_mq_set.set_active_all_threads( 2069 false, /* new active value */ 2070 satb_mq_set.is_active() /* expected_active */); 2071 } 2072 2073 static void print_ms_time_info(const char* prefix, const char* name, 2074 NumberSeq& ns) { 2075 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2076 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2077 if (ns.num() > 0) { 2078 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2079 prefix, ns.sd(), ns.maximum()); 2080 } 2081 } 2082 2083 void G1ConcurrentMark::print_summary_info() { 2084 Log(gc, marking) log; 2085 if (!log.is_trace()) { 2086 return; 2087 } 2088 2089 log.trace(" Concurrent marking:"); 2090 print_ms_time_info(" ", "init marks", _init_times); 2091 print_ms_time_info(" ", "remarks", _remark_times); 2092 { 2093 print_ms_time_info(" ", "final marks", _remark_mark_times); 2094 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2095 2096 } 2097 print_ms_time_info(" ", "cleanups", _cleanup_times); 2098 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2099 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2100 if (G1ScrubRemSets) { 2101 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2102 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2103 } 2104 log.trace(" Total stop_world time = %8.2f s.", 2105 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2106 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2107 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2108 } 2109 2110 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2111 _parallel_workers->print_worker_threads_on(st); 2112 } 2113 2114 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2115 _parallel_workers->threads_do(tc); 2116 } 2117 2118 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2119 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2120 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2121 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2122 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2123 } 2124 2125 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 2126 assert(addr < _cm->finger(), "invariant"); 2127 assert(addr >= _task->finger(), "invariant"); 2128 2129 // We move that task's local finger along. 2130 _task->move_finger_to(addr); 2131 2132 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 2133 // we only partially drain the local queue and global stack 2134 _task->drain_local_queue(true); 2135 _task->drain_global_stack(true); 2136 2137 // if the has_aborted flag has been raised, we need to bail out of 2138 // the iteration 2139 return !_task->has_aborted(); 2140 } 2141 2142 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2143 ReferenceProcessor* result = g1h->ref_processor_cm(); 2144 assert(result != NULL, "CM reference processor should not be NULL"); 2145 return result; 2146 } 2147 2148 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2149 G1ConcurrentMark* cm, 2150 G1CMTask* task) 2151 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2152 _g1h(g1h), _cm(cm), _task(task) 2153 { } 2154 2155 void G1CMTask::setup_for_region(HeapRegion* hr) { 2156 assert(hr != NULL, 2157 "claim_region() should have filtered out NULL regions"); 2158 _curr_region = hr; 2159 _finger = hr->bottom(); 2160 update_region_limit(); 2161 } 2162 2163 void G1CMTask::update_region_limit() { 2164 HeapRegion* hr = _curr_region; 2165 HeapWord* bottom = hr->bottom(); 2166 HeapWord* limit = hr->next_top_at_mark_start(); 2167 2168 if (limit == bottom) { 2169 // The region was collected underneath our feet. 2170 // We set the finger to bottom to ensure that the bitmap 2171 // iteration that will follow this will not do anything. 2172 // (this is not a condition that holds when we set the region up, 2173 // as the region is not supposed to be empty in the first place) 2174 _finger = bottom; 2175 } else if (limit >= _region_limit) { 2176 assert(limit >= _finger, "peace of mind"); 2177 } else { 2178 assert(limit < _region_limit, "only way to get here"); 2179 // This can happen under some pretty unusual circumstances. An 2180 // evacuation pause empties the region underneath our feet (NTAMS 2181 // at bottom). We then do some allocation in the region (NTAMS 2182 // stays at bottom), followed by the region being used as a GC 2183 // alloc region (NTAMS will move to top() and the objects 2184 // originally below it will be grayed). All objects now marked in 2185 // the region are explicitly grayed, if below the global finger, 2186 // and we do not need in fact to scan anything else. So, we simply 2187 // set _finger to be limit to ensure that the bitmap iteration 2188 // doesn't do anything. 2189 _finger = limit; 2190 } 2191 2192 _region_limit = limit; 2193 } 2194 2195 void G1CMTask::giveup_current_region() { 2196 assert(_curr_region != NULL, "invariant"); 2197 clear_region_fields(); 2198 } 2199 2200 void G1CMTask::clear_region_fields() { 2201 // Values for these three fields that indicate that we're not 2202 // holding on to a region. 2203 _curr_region = NULL; 2204 _finger = NULL; 2205 _region_limit = NULL; 2206 } 2207 2208 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2209 if (cm_oop_closure == NULL) { 2210 assert(_cm_oop_closure != NULL, "invariant"); 2211 } else { 2212 assert(_cm_oop_closure == NULL, "invariant"); 2213 } 2214 _cm_oop_closure = cm_oop_closure; 2215 } 2216 2217 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2218 guarantee(nextMarkBitMap != NULL, "invariant"); 2219 _nextMarkBitMap = nextMarkBitMap; 2220 clear_region_fields(); 2221 2222 _calls = 0; 2223 _elapsed_time_ms = 0.0; 2224 _termination_time_ms = 0.0; 2225 _termination_start_time_ms = 0.0; 2226 } 2227 2228 bool G1CMTask::should_exit_termination() { 2229 regular_clock_call(); 2230 // This is called when we are in the termination protocol. We should 2231 // quit if, for some reason, this task wants to abort or the global 2232 // stack is not empty (this means that we can get work from it). 2233 return !_cm->mark_stack_empty() || has_aborted(); 2234 } 2235 2236 void G1CMTask::reached_limit() { 2237 assert(_words_scanned >= _words_scanned_limit || 2238 _refs_reached >= _refs_reached_limit , 2239 "shouldn't have been called otherwise"); 2240 regular_clock_call(); 2241 } 2242 2243 void G1CMTask::regular_clock_call() { 2244 if (has_aborted()) return; 2245 2246 // First, we need to recalculate the words scanned and refs reached 2247 // limits for the next clock call. 2248 recalculate_limits(); 2249 2250 // During the regular clock call we do the following 2251 2252 // (1) If an overflow has been flagged, then we abort. 2253 if (_cm->has_overflown()) { 2254 set_has_aborted(); 2255 return; 2256 } 2257 2258 // If we are not concurrent (i.e. we're doing remark) we don't need 2259 // to check anything else. The other steps are only needed during 2260 // the concurrent marking phase. 2261 if (!concurrent()) return; 2262 2263 // (2) If marking has been aborted for Full GC, then we also abort. 2264 if (_cm->has_aborted()) { 2265 set_has_aborted(); 2266 return; 2267 } 2268 2269 double curr_time_ms = os::elapsedVTime() * 1000.0; 2270 2271 // (4) We check whether we should yield. If we have to, then we abort. 2272 if (SuspendibleThreadSet::should_yield()) { 2273 // We should yield. To do this we abort the task. The caller is 2274 // responsible for yielding. 2275 set_has_aborted(); 2276 return; 2277 } 2278 2279 // (5) We check whether we've reached our time quota. If we have, 2280 // then we abort. 2281 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2282 if (elapsed_time_ms > _time_target_ms) { 2283 set_has_aborted(); 2284 _has_timed_out = true; 2285 return; 2286 } 2287 2288 // (6) Finally, we check whether there are enough completed STAB 2289 // buffers available for processing. If there are, we abort. 2290 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2291 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2292 // we do need to process SATB buffers, we'll abort and restart 2293 // the marking task to do so 2294 set_has_aborted(); 2295 return; 2296 } 2297 } 2298 2299 void G1CMTask::recalculate_limits() { 2300 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2301 _words_scanned_limit = _real_words_scanned_limit; 2302 2303 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2304 _refs_reached_limit = _real_refs_reached_limit; 2305 } 2306 2307 void G1CMTask::decrease_limits() { 2308 // This is called when we believe that we're going to do an infrequent 2309 // operation which will increase the per byte scanned cost (i.e. move 2310 // entries to/from the global stack). It basically tries to decrease the 2311 // scanning limit so that the clock is called earlier. 2312 2313 _words_scanned_limit = _real_words_scanned_limit - 2314 3 * words_scanned_period / 4; 2315 _refs_reached_limit = _real_refs_reached_limit - 2316 3 * refs_reached_period / 4; 2317 } 2318 2319 void G1CMTask::move_entries_to_global_stack() { 2320 // Local array where we'll store the entries that will be popped 2321 // from the local queue. 2322 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2323 2324 size_t n = 0; 2325 G1TaskQueueEntry task_entry; 2326 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2327 buffer[n] = task_entry; 2328 ++n; 2329 } 2330 if (n < G1CMMarkStack::EntriesPerChunk) { 2331 buffer[n] = G1TaskQueueEntry(); 2332 } 2333 2334 if (n > 0) { 2335 if (!_cm->mark_stack_push(buffer)) { 2336 set_has_aborted(); 2337 } 2338 } 2339 2340 // This operation was quite expensive, so decrease the limits. 2341 decrease_limits(); 2342 } 2343 2344 bool G1CMTask::get_entries_from_global_stack() { 2345 // Local array where we'll store the entries that will be popped 2346 // from the global stack. 2347 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2348 2349 if (!_cm->mark_stack_pop(buffer)) { 2350 return false; 2351 } 2352 2353 // We did actually pop at least one entry. 2354 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2355 G1TaskQueueEntry task_entry = buffer[i]; 2356 if (task_entry.is_null()) { 2357 break; 2358 } 2359 assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2360 bool success = _task_queue->push(task_entry); 2361 // We only call this when the local queue is empty or under a 2362 // given target limit. So, we do not expect this push to fail. 2363 assert(success, "invariant"); 2364 } 2365 2366 // This operation was quite expensive, so decrease the limits 2367 decrease_limits(); 2368 return true; 2369 } 2370 2371 void G1CMTask::drain_local_queue(bool partially) { 2372 if (has_aborted()) { 2373 return; 2374 } 2375 2376 // Decide what the target size is, depending whether we're going to 2377 // drain it partially (so that other tasks can steal if they run out 2378 // of things to do) or totally (at the very end). 2379 size_t target_size; 2380 if (partially) { 2381 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2382 } else { 2383 target_size = 0; 2384 } 2385 2386 if (_task_queue->size() > target_size) { 2387 G1TaskQueueEntry entry; 2388 bool ret = _task_queue->pop_local(entry); 2389 while (ret) { 2390 scan_task_entry(entry); 2391 if (_task_queue->size() <= target_size || has_aborted()) { 2392 ret = false; 2393 } else { 2394 ret = _task_queue->pop_local(entry); 2395 } 2396 } 2397 } 2398 } 2399 2400 void G1CMTask::drain_global_stack(bool partially) { 2401 if (has_aborted()) return; 2402 2403 // We have a policy to drain the local queue before we attempt to 2404 // drain the global stack. 2405 assert(partially || _task_queue->size() == 0, "invariant"); 2406 2407 // Decide what the target size is, depending whether we're going to 2408 // drain it partially (so that other tasks can steal if they run out 2409 // of things to do) or totally (at the very end). 2410 // Notice that when draining the global mark stack partially, due to the racyness 2411 // of the mark stack size update we might in fact drop below the target. But, 2412 // this is not a problem. 2413 // In case of total draining, we simply process until the global mark stack is 2414 // totally empty, disregarding the size counter. 2415 if (partially) { 2416 size_t const target_size = _cm->partial_mark_stack_size_target(); 2417 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2418 if (get_entries_from_global_stack()) { 2419 drain_local_queue(partially); 2420 } 2421 } 2422 } else { 2423 while (!has_aborted() && get_entries_from_global_stack()) { 2424 drain_local_queue(partially); 2425 } 2426 } 2427 } 2428 2429 // SATB Queue has several assumptions on whether to call the par or 2430 // non-par versions of the methods. this is why some of the code is 2431 // replicated. We should really get rid of the single-threaded version 2432 // of the code to simplify things. 2433 void G1CMTask::drain_satb_buffers() { 2434 if (has_aborted()) return; 2435 2436 // We set this so that the regular clock knows that we're in the 2437 // middle of draining buffers and doesn't set the abort flag when it 2438 // notices that SATB buffers are available for draining. It'd be 2439 // very counter productive if it did that. :-) 2440 _draining_satb_buffers = true; 2441 2442 G1CMSATBBufferClosure satb_cl(this, _g1h); 2443 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2444 2445 // This keeps claiming and applying the closure to completed buffers 2446 // until we run out of buffers or we need to abort. 2447 while (!has_aborted() && 2448 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2449 regular_clock_call(); 2450 } 2451 2452 _draining_satb_buffers = false; 2453 2454 assert(has_aborted() || 2455 concurrent() || 2456 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2457 2458 // again, this was a potentially expensive operation, decrease the 2459 // limits to get the regular clock call early 2460 decrease_limits(); 2461 } 2462 2463 void G1CMTask::print_stats() { 2464 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2465 _worker_id, _calls); 2466 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2467 _elapsed_time_ms, _termination_time_ms); 2468 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2469 _step_times_ms.num(), _step_times_ms.avg(), 2470 _step_times_ms.sd()); 2471 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2472 _step_times_ms.maximum(), _step_times_ms.sum()); 2473 } 2474 2475 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2476 return _task_queues->steal(worker_id, hash_seed, task_entry); 2477 } 2478 2479 /***************************************************************************** 2480 2481 The do_marking_step(time_target_ms, ...) method is the building 2482 block of the parallel marking framework. It can be called in parallel 2483 with other invocations of do_marking_step() on different tasks 2484 (but only one per task, obviously) and concurrently with the 2485 mutator threads, or during remark, hence it eliminates the need 2486 for two versions of the code. When called during remark, it will 2487 pick up from where the task left off during the concurrent marking 2488 phase. Interestingly, tasks are also claimable during evacuation 2489 pauses too, since do_marking_step() ensures that it aborts before 2490 it needs to yield. 2491 2492 The data structures that it uses to do marking work are the 2493 following: 2494 2495 (1) Marking Bitmap. If there are gray objects that appear only 2496 on the bitmap (this happens either when dealing with an overflow 2497 or when the initial marking phase has simply marked the roots 2498 and didn't push them on the stack), then tasks claim heap 2499 regions whose bitmap they then scan to find gray objects. A 2500 global finger indicates where the end of the last claimed region 2501 is. A local finger indicates how far into the region a task has 2502 scanned. The two fingers are used to determine how to gray an 2503 object (i.e. whether simply marking it is OK, as it will be 2504 visited by a task in the future, or whether it needs to be also 2505 pushed on a stack). 2506 2507 (2) Local Queue. The local queue of the task which is accessed 2508 reasonably efficiently by the task. Other tasks can steal from 2509 it when they run out of work. Throughout the marking phase, a 2510 task attempts to keep its local queue short but not totally 2511 empty, so that entries are available for stealing by other 2512 tasks. Only when there is no more work, a task will totally 2513 drain its local queue. 2514 2515 (3) Global Mark Stack. This handles local queue overflow. During 2516 marking only sets of entries are moved between it and the local 2517 queues, as access to it requires a mutex and more fine-grain 2518 interaction with it which might cause contention. If it 2519 overflows, then the marking phase should restart and iterate 2520 over the bitmap to identify gray objects. Throughout the marking 2521 phase, tasks attempt to keep the global mark stack at a small 2522 length but not totally empty, so that entries are available for 2523 popping by other tasks. Only when there is no more work, tasks 2524 will totally drain the global mark stack. 2525 2526 (4) SATB Buffer Queue. This is where completed SATB buffers are 2527 made available. Buffers are regularly removed from this queue 2528 and scanned for roots, so that the queue doesn't get too 2529 long. During remark, all completed buffers are processed, as 2530 well as the filled in parts of any uncompleted buffers. 2531 2532 The do_marking_step() method tries to abort when the time target 2533 has been reached. There are a few other cases when the 2534 do_marking_step() method also aborts: 2535 2536 (1) When the marking phase has been aborted (after a Full GC). 2537 2538 (2) When a global overflow (on the global stack) has been 2539 triggered. Before the task aborts, it will actually sync up with 2540 the other tasks to ensure that all the marking data structures 2541 (local queues, stacks, fingers etc.) are re-initialized so that 2542 when do_marking_step() completes, the marking phase can 2543 immediately restart. 2544 2545 (3) When enough completed SATB buffers are available. The 2546 do_marking_step() method only tries to drain SATB buffers right 2547 at the beginning. So, if enough buffers are available, the 2548 marking step aborts and the SATB buffers are processed at 2549 the beginning of the next invocation. 2550 2551 (4) To yield. when we have to yield then we abort and yield 2552 right at the end of do_marking_step(). This saves us from a lot 2553 of hassle as, by yielding we might allow a Full GC. If this 2554 happens then objects will be compacted underneath our feet, the 2555 heap might shrink, etc. We save checking for this by just 2556 aborting and doing the yield right at the end. 2557 2558 From the above it follows that the do_marking_step() method should 2559 be called in a loop (or, otherwise, regularly) until it completes. 2560 2561 If a marking step completes without its has_aborted() flag being 2562 true, it means it has completed the current marking phase (and 2563 also all other marking tasks have done so and have all synced up). 2564 2565 A method called regular_clock_call() is invoked "regularly" (in 2566 sub ms intervals) throughout marking. It is this clock method that 2567 checks all the abort conditions which were mentioned above and 2568 decides when the task should abort. A work-based scheme is used to 2569 trigger this clock method: when the number of object words the 2570 marking phase has scanned or the number of references the marking 2571 phase has visited reach a given limit. Additional invocations to 2572 the method clock have been planted in a few other strategic places 2573 too. The initial reason for the clock method was to avoid calling 2574 vtime too regularly, as it is quite expensive. So, once it was in 2575 place, it was natural to piggy-back all the other conditions on it 2576 too and not constantly check them throughout the code. 2577 2578 If do_termination is true then do_marking_step will enter its 2579 termination protocol. 2580 2581 The value of is_serial must be true when do_marking_step is being 2582 called serially (i.e. by the VMThread) and do_marking_step should 2583 skip any synchronization in the termination and overflow code. 2584 Examples include the serial remark code and the serial reference 2585 processing closures. 2586 2587 The value of is_serial must be false when do_marking_step is 2588 being called by any of the worker threads in a work gang. 2589 Examples include the concurrent marking code (CMMarkingTask), 2590 the MT remark code, and the MT reference processing closures. 2591 2592 *****************************************************************************/ 2593 2594 void G1CMTask::do_marking_step(double time_target_ms, 2595 bool do_termination, 2596 bool is_serial) { 2597 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2598 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2599 2600 G1Policy* g1_policy = _g1h->g1_policy(); 2601 assert(_task_queues != NULL, "invariant"); 2602 assert(_task_queue != NULL, "invariant"); 2603 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2604 2605 assert(!_claimed, 2606 "only one thread should claim this task at any one time"); 2607 2608 // OK, this doesn't safeguard again all possible scenarios, as it is 2609 // possible for two threads to set the _claimed flag at the same 2610 // time. But it is only for debugging purposes anyway and it will 2611 // catch most problems. 2612 _claimed = true; 2613 2614 _start_time_ms = os::elapsedVTime() * 1000.0; 2615 2616 // If do_stealing is true then do_marking_step will attempt to 2617 // steal work from the other G1CMTasks. It only makes sense to 2618 // enable stealing when the termination protocol is enabled 2619 // and do_marking_step() is not being called serially. 2620 bool do_stealing = do_termination && !is_serial; 2621 2622 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2623 _time_target_ms = time_target_ms - diff_prediction_ms; 2624 2625 // set up the variables that are used in the work-based scheme to 2626 // call the regular clock method 2627 _words_scanned = 0; 2628 _refs_reached = 0; 2629 recalculate_limits(); 2630 2631 // clear all flags 2632 clear_has_aborted(); 2633 _has_timed_out = false; 2634 _draining_satb_buffers = false; 2635 2636 ++_calls; 2637 2638 // Set up the bitmap and oop closures. Anything that uses them is 2639 // eventually called from this method, so it is OK to allocate these 2640 // statically. 2641 G1CMBitMapClosure bitmap_closure(this, _cm); 2642 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2643 set_cm_oop_closure(&cm_oop_closure); 2644 2645 if (_cm->has_overflown()) { 2646 // This can happen if the mark stack overflows during a GC pause 2647 // and this task, after a yield point, restarts. We have to abort 2648 // as we need to get into the overflow protocol which happens 2649 // right at the end of this task. 2650 set_has_aborted(); 2651 } 2652 2653 // First drain any available SATB buffers. After this, we will not 2654 // look at SATB buffers before the next invocation of this method. 2655 // If enough completed SATB buffers are queued up, the regular clock 2656 // will abort this task so that it restarts. 2657 drain_satb_buffers(); 2658 // ...then partially drain the local queue and the global stack 2659 drain_local_queue(true); 2660 drain_global_stack(true); 2661 2662 do { 2663 if (!has_aborted() && _curr_region != NULL) { 2664 // This means that we're already holding on to a region. 2665 assert(_finger != NULL, "if region is not NULL, then the finger " 2666 "should not be NULL either"); 2667 2668 // We might have restarted this task after an evacuation pause 2669 // which might have evacuated the region we're holding on to 2670 // underneath our feet. Let's read its limit again to make sure 2671 // that we do not iterate over a region of the heap that 2672 // contains garbage (update_region_limit() will also move 2673 // _finger to the start of the region if it is found empty). 2674 update_region_limit(); 2675 // We will start from _finger not from the start of the region, 2676 // as we might be restarting this task after aborting half-way 2677 // through scanning this region. In this case, _finger points to 2678 // the address where we last found a marked object. If this is a 2679 // fresh region, _finger points to start(). 2680 MemRegion mr = MemRegion(_finger, _region_limit); 2681 2682 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2683 "humongous regions should go around loop once only"); 2684 2685 // Some special cases: 2686 // If the memory region is empty, we can just give up the region. 2687 // If the current region is humongous then we only need to check 2688 // the bitmap for the bit associated with the start of the object, 2689 // scan the object if it's live, and give up the region. 2690 // Otherwise, let's iterate over the bitmap of the part of the region 2691 // that is left. 2692 // If the iteration is successful, give up the region. 2693 if (mr.is_empty()) { 2694 giveup_current_region(); 2695 regular_clock_call(); 2696 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2697 if (_nextMarkBitMap->is_marked(mr.start())) { 2698 // The object is marked - apply the closure 2699 bitmap_closure.do_addr(mr.start()); 2700 } 2701 // Even if this task aborted while scanning the humongous object 2702 // we can (and should) give up the current region. 2703 giveup_current_region(); 2704 regular_clock_call(); 2705 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2706 giveup_current_region(); 2707 regular_clock_call(); 2708 } else { 2709 assert(has_aborted(), "currently the only way to do so"); 2710 // The only way to abort the bitmap iteration is to return 2711 // false from the do_bit() method. However, inside the 2712 // do_bit() method we move the _finger to point to the 2713 // object currently being looked at. So, if we bail out, we 2714 // have definitely set _finger to something non-null. 2715 assert(_finger != NULL, "invariant"); 2716 2717 // Region iteration was actually aborted. So now _finger 2718 // points to the address of the object we last scanned. If we 2719 // leave it there, when we restart this task, we will rescan 2720 // the object. It is easy to avoid this. We move the finger by 2721 // enough to point to the next possible object header. 2722 assert(_finger < _region_limit, "invariant"); 2723 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2724 // Check if bitmap iteration was aborted while scanning the last object 2725 if (new_finger >= _region_limit) { 2726 giveup_current_region(); 2727 } else { 2728 move_finger_to(new_finger); 2729 } 2730 } 2731 } 2732 // At this point we have either completed iterating over the 2733 // region we were holding on to, or we have aborted. 2734 2735 // We then partially drain the local queue and the global stack. 2736 // (Do we really need this?) 2737 drain_local_queue(true); 2738 drain_global_stack(true); 2739 2740 // Read the note on the claim_region() method on why it might 2741 // return NULL with potentially more regions available for 2742 // claiming and why we have to check out_of_regions() to determine 2743 // whether we're done or not. 2744 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2745 // We are going to try to claim a new region. We should have 2746 // given up on the previous one. 2747 // Separated the asserts so that we know which one fires. 2748 assert(_curr_region == NULL, "invariant"); 2749 assert(_finger == NULL, "invariant"); 2750 assert(_region_limit == NULL, "invariant"); 2751 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2752 if (claimed_region != NULL) { 2753 // Yes, we managed to claim one 2754 setup_for_region(claimed_region); 2755 assert(_curr_region == claimed_region, "invariant"); 2756 } 2757 // It is important to call the regular clock here. It might take 2758 // a while to claim a region if, for example, we hit a large 2759 // block of empty regions. So we need to call the regular clock 2760 // method once round the loop to make sure it's called 2761 // frequently enough. 2762 regular_clock_call(); 2763 } 2764 2765 if (!has_aborted() && _curr_region == NULL) { 2766 assert(_cm->out_of_regions(), 2767 "at this point we should be out of regions"); 2768 } 2769 } while ( _curr_region != NULL && !has_aborted()); 2770 2771 if (!has_aborted()) { 2772 // We cannot check whether the global stack is empty, since other 2773 // tasks might be pushing objects to it concurrently. 2774 assert(_cm->out_of_regions(), 2775 "at this point we should be out of regions"); 2776 // Try to reduce the number of available SATB buffers so that 2777 // remark has less work to do. 2778 drain_satb_buffers(); 2779 } 2780 2781 // Since we've done everything else, we can now totally drain the 2782 // local queue and global stack. 2783 drain_local_queue(false); 2784 drain_global_stack(false); 2785 2786 // Attempt at work stealing from other task's queues. 2787 if (do_stealing && !has_aborted()) { 2788 // We have not aborted. This means that we have finished all that 2789 // we could. Let's try to do some stealing... 2790 2791 // We cannot check whether the global stack is empty, since other 2792 // tasks might be pushing objects to it concurrently. 2793 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2794 "only way to reach here"); 2795 while (!has_aborted()) { 2796 G1TaskQueueEntry entry; 2797 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2798 scan_task_entry(entry); 2799 2800 // And since we're towards the end, let's totally drain the 2801 // local queue and global stack. 2802 drain_local_queue(false); 2803 drain_global_stack(false); 2804 } else { 2805 break; 2806 } 2807 } 2808 } 2809 2810 // We still haven't aborted. Now, let's try to get into the 2811 // termination protocol. 2812 if (do_termination && !has_aborted()) { 2813 // We cannot check whether the global stack is empty, since other 2814 // tasks might be concurrently pushing objects on it. 2815 // Separated the asserts so that we know which one fires. 2816 assert(_cm->out_of_regions(), "only way to reach here"); 2817 assert(_task_queue->size() == 0, "only way to reach here"); 2818 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2819 2820 // The G1CMTask class also extends the TerminatorTerminator class, 2821 // hence its should_exit_termination() method will also decide 2822 // whether to exit the termination protocol or not. 2823 bool finished = (is_serial || 2824 _cm->terminator()->offer_termination(this)); 2825 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2826 _termination_time_ms += 2827 termination_end_time_ms - _termination_start_time_ms; 2828 2829 if (finished) { 2830 // We're all done. 2831 2832 if (_worker_id == 0) { 2833 // let's allow task 0 to do this 2834 if (concurrent()) { 2835 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2836 // we need to set this to false before the next 2837 // safepoint. This way we ensure that the marking phase 2838 // doesn't observe any more heap expansions. 2839 _cm->clear_concurrent_marking_in_progress(); 2840 } 2841 } 2842 2843 // We can now guarantee that the global stack is empty, since 2844 // all other tasks have finished. We separated the guarantees so 2845 // that, if a condition is false, we can immediately find out 2846 // which one. 2847 guarantee(_cm->out_of_regions(), "only way to reach here"); 2848 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2849 guarantee(_task_queue->size() == 0, "only way to reach here"); 2850 guarantee(!_cm->has_overflown(), "only way to reach here"); 2851 } else { 2852 // Apparently there's more work to do. Let's abort this task. It 2853 // will restart it and we can hopefully find more things to do. 2854 set_has_aborted(); 2855 } 2856 } 2857 2858 // Mainly for debugging purposes to make sure that a pointer to the 2859 // closure which was statically allocated in this frame doesn't 2860 // escape it by accident. 2861 set_cm_oop_closure(NULL); 2862 double end_time_ms = os::elapsedVTime() * 1000.0; 2863 double elapsed_time_ms = end_time_ms - _start_time_ms; 2864 // Update the step history. 2865 _step_times_ms.add(elapsed_time_ms); 2866 2867 if (has_aborted()) { 2868 // The task was aborted for some reason. 2869 if (_has_timed_out) { 2870 double diff_ms = elapsed_time_ms - _time_target_ms; 2871 // Keep statistics of how well we did with respect to hitting 2872 // our target only if we actually timed out (if we aborted for 2873 // other reasons, then the results might get skewed). 2874 _marking_step_diffs_ms.add(diff_ms); 2875 } 2876 2877 if (_cm->has_overflown()) { 2878 // This is the interesting one. We aborted because a global 2879 // overflow was raised. This means we have to restart the 2880 // marking phase and start iterating over regions. However, in 2881 // order to do this we have to make sure that all tasks stop 2882 // what they are doing and re-initialize in a safe manner. We 2883 // will achieve this with the use of two barrier sync points. 2884 2885 if (!is_serial) { 2886 // We only need to enter the sync barrier if being called 2887 // from a parallel context 2888 _cm->enter_first_sync_barrier(_worker_id); 2889 2890 // When we exit this sync barrier we know that all tasks have 2891 // stopped doing marking work. So, it's now safe to 2892 // re-initialize our data structures. At the end of this method, 2893 // task 0 will clear the global data structures. 2894 } 2895 2896 // We clear the local state of this task... 2897 clear_region_fields(); 2898 2899 if (!is_serial) { 2900 // ...and enter the second barrier. 2901 _cm->enter_second_sync_barrier(_worker_id); 2902 } 2903 // At this point, if we're during the concurrent phase of 2904 // marking, everything has been re-initialized and we're 2905 // ready to restart. 2906 } 2907 } 2908 2909 _claimed = false; 2910 } 2911 2912 G1CMTask::G1CMTask(uint worker_id, 2913 G1ConcurrentMark* cm, 2914 G1CMTaskQueue* task_queue, 2915 G1CMTaskQueueSet* task_queues) 2916 : _g1h(G1CollectedHeap::heap()), 2917 _worker_id(worker_id), _cm(cm), 2918 _objArray_processor(this), 2919 _claimed(false), 2920 _nextMarkBitMap(NULL), _hash_seed(17), 2921 _task_queue(task_queue), 2922 _task_queues(task_queues), 2923 _cm_oop_closure(NULL) { 2924 guarantee(task_queue != NULL, "invariant"); 2925 guarantee(task_queues != NULL, "invariant"); 2926 2927 _marking_step_diffs_ms.add(0.5); 2928 } 2929 2930 // These are formatting macros that are used below to ensure 2931 // consistent formatting. The *_H_* versions are used to format the 2932 // header for a particular value and they should be kept consistent 2933 // with the corresponding macro. Also note that most of the macros add 2934 // the necessary white space (as a prefix) which makes them a bit 2935 // easier to compose. 2936 2937 // All the output lines are prefixed with this string to be able to 2938 // identify them easily in a large log file. 2939 #define G1PPRL_LINE_PREFIX "###" 2940 2941 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2942 #ifdef _LP64 2943 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2944 #else // _LP64 2945 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2946 #endif // _LP64 2947 2948 // For per-region info 2949 #define G1PPRL_TYPE_FORMAT " %-4s" 2950 #define G1PPRL_TYPE_H_FORMAT " %4s" 2951 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2952 #define G1PPRL_BYTE_H_FORMAT " %9s" 2953 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2954 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2955 2956 // For summary info 2957 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2958 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2959 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2960 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2961 2962 G1PrintRegionLivenessInfoClosure:: 2963 G1PrintRegionLivenessInfoClosure(const char* phase_name) 2964 : _total_used_bytes(0), _total_capacity_bytes(0), 2965 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2966 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 2967 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2968 MemRegion g1_reserved = g1h->g1_reserved(); 2969 double now = os::elapsedTime(); 2970 2971 // Print the header of the output. 2972 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2973 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2974 G1PPRL_SUM_ADDR_FORMAT("reserved") 2975 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2976 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2977 HeapRegion::GrainBytes); 2978 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2979 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2980 G1PPRL_TYPE_H_FORMAT 2981 G1PPRL_ADDR_BASE_H_FORMAT 2982 G1PPRL_BYTE_H_FORMAT 2983 G1PPRL_BYTE_H_FORMAT 2984 G1PPRL_BYTE_H_FORMAT 2985 G1PPRL_DOUBLE_H_FORMAT 2986 G1PPRL_BYTE_H_FORMAT 2987 G1PPRL_BYTE_H_FORMAT, 2988 "type", "address-range", 2989 "used", "prev-live", "next-live", "gc-eff", 2990 "remset", "code-roots"); 2991 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2992 G1PPRL_TYPE_H_FORMAT 2993 G1PPRL_ADDR_BASE_H_FORMAT 2994 G1PPRL_BYTE_H_FORMAT 2995 G1PPRL_BYTE_H_FORMAT 2996 G1PPRL_BYTE_H_FORMAT 2997 G1PPRL_DOUBLE_H_FORMAT 2998 G1PPRL_BYTE_H_FORMAT 2999 G1PPRL_BYTE_H_FORMAT, 3000 "", "", 3001 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3002 "(bytes)", "(bytes)"); 3003 } 3004 3005 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3006 const char* type = r->get_type_str(); 3007 HeapWord* bottom = r->bottom(); 3008 HeapWord* end = r->end(); 3009 size_t capacity_bytes = r->capacity(); 3010 size_t used_bytes = r->used(); 3011 size_t prev_live_bytes = r->live_bytes(); 3012 size_t next_live_bytes = r->next_live_bytes(); 3013 double gc_eff = r->gc_efficiency(); 3014 size_t remset_bytes = r->rem_set()->mem_size(); 3015 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3016 3017 _total_used_bytes += used_bytes; 3018 _total_capacity_bytes += capacity_bytes; 3019 _total_prev_live_bytes += prev_live_bytes; 3020 _total_next_live_bytes += next_live_bytes; 3021 _total_remset_bytes += remset_bytes; 3022 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3023 3024 // Print a line for this particular region. 3025 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3026 G1PPRL_TYPE_FORMAT 3027 G1PPRL_ADDR_BASE_FORMAT 3028 G1PPRL_BYTE_FORMAT 3029 G1PPRL_BYTE_FORMAT 3030 G1PPRL_BYTE_FORMAT 3031 G1PPRL_DOUBLE_FORMAT 3032 G1PPRL_BYTE_FORMAT 3033 G1PPRL_BYTE_FORMAT, 3034 type, p2i(bottom), p2i(end), 3035 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3036 remset_bytes, strong_code_roots_bytes); 3037 3038 return false; 3039 } 3040 3041 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3042 // add static memory usages to remembered set sizes 3043 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3044 // Print the footer of the output. 3045 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3046 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3047 " SUMMARY" 3048 G1PPRL_SUM_MB_FORMAT("capacity") 3049 G1PPRL_SUM_MB_PERC_FORMAT("used") 3050 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3051 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3052 G1PPRL_SUM_MB_FORMAT("remset") 3053 G1PPRL_SUM_MB_FORMAT("code-roots"), 3054 bytes_to_mb(_total_capacity_bytes), 3055 bytes_to_mb(_total_used_bytes), 3056 perc(_total_used_bytes, _total_capacity_bytes), 3057 bytes_to_mb(_total_prev_live_bytes), 3058 perc(_total_prev_live_bytes, _total_capacity_bytes), 3059 bytes_to_mb(_total_next_live_bytes), 3060 perc(_total_next_live_bytes, _total_capacity_bytes), 3061 bytes_to_mb(_total_remset_bytes), 3062 bytes_to_mb(_total_strong_code_roots_bytes)); 3063 }