1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 #include "utilities/align.hpp" 61 #include "utilities/growableArray.hpp" 62 63 #ifndef PRODUCT 64 bool G1CMBitMap::covers(MemRegion heap_rs) const { 65 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 66 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _word_size, 67 "size inconsistency"); 68 return _start == (HeapWord*)(heap_rs.start()) && 69 _word_size == heap_rs.word_size(); 70 } 71 #endif 72 73 void G1CMBitMap::print_on_error(outputStream* st, const char* prefix) const { 74 _bm.print_on_error(st, prefix); 75 } 76 77 size_t G1CMBitMap::compute_size(size_t heap_size) { 78 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 79 } 80 81 size_t G1CMBitMap::mark_distance() { 82 return MinObjAlignmentInBytes * BitsPerByte; 83 } 84 85 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 86 _start = heap.start(); 87 _word_size = heap.word_size(); 88 89 _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _word_size >> _shifter); 90 91 storage->set_mapping_changed_listener(&_listener); 92 93 assert(covers(heap), "Bitmap initialization inconsistency, does not cover expected memory area."); 94 } 95 96 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 97 if (zero_filled) { 98 return; 99 } 100 // We need to clear the bitmap on commit, removing any existing information. 101 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 102 _bm->clear_range(mr); 103 } 104 105 void G1CMBitMap::clear_range(MemRegion mr) { 106 mr.intersection(MemRegion(_start, _word_size)); 107 assert(!mr.is_empty(), "unexpected empty region"); 108 // convert address range into offset range 109 _bm.at_put_range(addr_to_offset(mr.start()), 110 addr_to_offset(mr.end()), false); 111 } 112 113 G1CMMarkStack::G1CMMarkStack() : 114 _max_chunk_capacity(0), 115 _base(NULL), 116 _chunk_capacity(0) { 117 set_empty(); 118 } 119 120 bool G1CMMarkStack::resize(size_t new_capacity) { 121 assert(is_empty(), "Only resize when stack is empty."); 122 assert(new_capacity <= _max_chunk_capacity, 123 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 124 125 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity); 126 127 if (new_base == NULL) { 128 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 129 return false; 130 } 131 // Release old mapping. 132 if (_base != NULL) { 133 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 134 } 135 136 _base = new_base; 137 _chunk_capacity = new_capacity; 138 set_empty(); 139 140 return true; 141 } 142 143 size_t G1CMMarkStack::capacity_alignment() { 144 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 145 } 146 147 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 148 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 149 150 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 151 152 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 153 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 154 155 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 156 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 157 _max_chunk_capacity, 158 initial_chunk_capacity); 159 160 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 161 initial_chunk_capacity, _max_chunk_capacity); 162 163 return resize(initial_chunk_capacity); 164 } 165 166 void G1CMMarkStack::expand() { 167 if (_chunk_capacity == _max_chunk_capacity) { 168 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 169 return; 170 } 171 size_t old_capacity = _chunk_capacity; 172 // Double capacity if possible 173 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 174 175 if (resize(new_capacity)) { 176 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 177 old_capacity, new_capacity); 178 } else { 179 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 180 old_capacity, new_capacity); 181 } 182 } 183 184 G1CMMarkStack::~G1CMMarkStack() { 185 if (_base != NULL) { 186 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 187 } 188 } 189 190 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 191 elem->next = *list; 192 *list = elem; 193 } 194 195 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 196 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 197 add_chunk_to_list(&_chunk_list, elem); 198 _chunks_in_chunk_list++; 199 } 200 201 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 202 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 203 add_chunk_to_list(&_free_list, elem); 204 } 205 206 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 207 TaskQueueEntryChunk* result = *list; 208 if (result != NULL) { 209 *list = (*list)->next; 210 } 211 return result; 212 } 213 214 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 215 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 216 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 217 if (result != NULL) { 218 _chunks_in_chunk_list--; 219 } 220 return result; 221 } 222 223 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 224 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 225 return remove_chunk_from_list(&_free_list); 226 } 227 228 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 229 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 230 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 231 // wraparound of _hwm. 232 if (_hwm >= _chunk_capacity) { 233 return NULL; 234 } 235 236 size_t cur_idx = Atomic::add(1, &_hwm) - 1; 237 if (cur_idx >= _chunk_capacity) { 238 return NULL; 239 } 240 241 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 242 result->next = NULL; 243 return result; 244 } 245 246 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 247 // Get a new chunk. 248 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 249 250 if (new_chunk == NULL) { 251 // Did not get a chunk from the free list. Allocate from backing memory. 252 new_chunk = allocate_new_chunk(); 253 254 if (new_chunk == NULL) { 255 return false; 256 } 257 } 258 259 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 260 261 add_chunk_to_chunk_list(new_chunk); 262 263 return true; 264 } 265 266 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 267 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 268 269 if (cur == NULL) { 270 return false; 271 } 272 273 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 274 275 add_chunk_to_free_list(cur); 276 return true; 277 } 278 279 void G1CMMarkStack::set_empty() { 280 _chunks_in_chunk_list = 0; 281 _hwm = 0; 282 _chunk_list = NULL; 283 _free_list = NULL; 284 } 285 286 G1CMRootRegions::G1CMRootRegions() : 287 _cm(NULL), _scan_in_progress(false), 288 _should_abort(false), _claimed_survivor_index(0) { } 289 290 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 291 _survivors = survivors; 292 _cm = cm; 293 } 294 295 void G1CMRootRegions::prepare_for_scan() { 296 assert(!scan_in_progress(), "pre-condition"); 297 298 // Currently, only survivors can be root regions. 299 _claimed_survivor_index = 0; 300 _scan_in_progress = _survivors->regions()->is_nonempty(); 301 _should_abort = false; 302 } 303 304 HeapRegion* G1CMRootRegions::claim_next() { 305 if (_should_abort) { 306 // If someone has set the should_abort flag, we return NULL to 307 // force the caller to bail out of their loop. 308 return NULL; 309 } 310 311 // Currently, only survivors can be root regions. 312 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 313 314 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 315 if (claimed_index < survivor_regions->length()) { 316 return survivor_regions->at(claimed_index); 317 } 318 return NULL; 319 } 320 321 uint G1CMRootRegions::num_root_regions() const { 322 return (uint)_survivors->regions()->length(); 323 } 324 325 void G1CMRootRegions::notify_scan_done() { 326 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 327 _scan_in_progress = false; 328 RootRegionScan_lock->notify_all(); 329 } 330 331 void G1CMRootRegions::cancel_scan() { 332 notify_scan_done(); 333 } 334 335 void G1CMRootRegions::scan_finished() { 336 assert(scan_in_progress(), "pre-condition"); 337 338 // Currently, only survivors can be root regions. 339 if (!_should_abort) { 340 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 341 assert((uint)_claimed_survivor_index >= _survivors->length(), 342 "we should have claimed all survivors, claimed index = %u, length = %u", 343 (uint)_claimed_survivor_index, _survivors->length()); 344 } 345 346 notify_scan_done(); 347 } 348 349 bool G1CMRootRegions::wait_until_scan_finished() { 350 if (!scan_in_progress()) return false; 351 352 { 353 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 354 while (scan_in_progress()) { 355 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 356 } 357 } 358 return true; 359 } 360 361 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 362 return MAX2((n_par_threads + 2) / 4, 1U); 363 } 364 365 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 366 _g1h(g1h), 367 _markBitMap1(), 368 _markBitMap2(), 369 _parallel_marking_threads(0), 370 _max_parallel_marking_threads(0), 371 _sleep_factor(0.0), 372 _marking_task_overhead(1.0), 373 _cleanup_list("Cleanup List"), 374 375 _prevMarkBitMap(&_markBitMap1), 376 _nextMarkBitMap(&_markBitMap2), 377 378 _global_mark_stack(), 379 // _finger set in set_non_marking_state 380 381 _max_worker_id(ParallelGCThreads), 382 // _active_tasks set in set_non_marking_state 383 // _tasks set inside the constructor 384 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 385 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 386 387 _has_overflown(false), 388 _concurrent(false), 389 _has_aborted(false), 390 _restart_for_overflow(false), 391 _concurrent_marking_in_progress(false), 392 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 393 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 394 395 // _verbose_level set below 396 397 _init_times(), 398 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 399 _cleanup_times(), 400 _total_counting_time(0.0), 401 _total_rs_scrub_time(0.0), 402 403 _parallel_workers(NULL), 404 405 _completed_initialization(false) { 406 407 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 408 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 409 410 // Create & start a ConcurrentMark thread. 411 _cmThread = new ConcurrentMarkThread(this); 412 assert(cmThread() != NULL, "CM Thread should have been created"); 413 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 414 if (_cmThread->osthread() == NULL) { 415 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 416 } 417 418 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 419 420 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 421 satb_qs.set_buffer_size(G1SATBBufferSize); 422 423 _root_regions.init(_g1h->survivor(), this); 424 425 if (ConcGCThreads > ParallelGCThreads) { 426 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 427 ConcGCThreads, ParallelGCThreads); 428 return; 429 } 430 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 431 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 432 // if both are set 433 _sleep_factor = 0.0; 434 _marking_task_overhead = 1.0; 435 } else if (G1MarkingOverheadPercent > 0) { 436 // We will calculate the number of parallel marking threads based 437 // on a target overhead with respect to the soft real-time goal 438 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 439 double overall_cm_overhead = 440 (double) MaxGCPauseMillis * marking_overhead / 441 (double) GCPauseIntervalMillis; 442 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 443 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 444 double marking_task_overhead = 445 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 446 double sleep_factor = 447 (1.0 - marking_task_overhead) / marking_task_overhead; 448 449 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 450 _sleep_factor = sleep_factor; 451 _marking_task_overhead = marking_task_overhead; 452 } else { 453 // Calculate the number of parallel marking threads by scaling 454 // the number of parallel GC threads. 455 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 456 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 457 _sleep_factor = 0.0; 458 _marking_task_overhead = 1.0; 459 } 460 461 assert(ConcGCThreads > 0, "Should have been set"); 462 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 463 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 464 _parallel_marking_threads = ConcGCThreads; 465 _max_parallel_marking_threads = _parallel_marking_threads; 466 467 _parallel_workers = new WorkGang("G1 Marker", 468 _max_parallel_marking_threads, false, true); 469 if (_parallel_workers == NULL) { 470 vm_exit_during_initialization("Failed necessary allocation."); 471 } else { 472 _parallel_workers->initialize_workers(); 473 } 474 475 if (FLAG_IS_DEFAULT(MarkStackSize)) { 476 size_t mark_stack_size = 477 MIN2(MarkStackSizeMax, 478 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 479 // Verify that the calculated value for MarkStackSize is in range. 480 // It would be nice to use the private utility routine from Arguments. 481 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 482 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 483 "must be between 1 and " SIZE_FORMAT, 484 mark_stack_size, MarkStackSizeMax); 485 return; 486 } 487 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 488 } else { 489 // Verify MarkStackSize is in range. 490 if (FLAG_IS_CMDLINE(MarkStackSize)) { 491 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 492 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 493 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 494 "must be between 1 and " SIZE_FORMAT, 495 MarkStackSize, MarkStackSizeMax); 496 return; 497 } 498 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 499 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 500 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 501 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 502 MarkStackSize, MarkStackSizeMax); 503 return; 504 } 505 } 506 } 507 } 508 509 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 510 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 511 } 512 513 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 514 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 515 516 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 517 _active_tasks = _max_worker_id; 518 519 for (uint i = 0; i < _max_worker_id; ++i) { 520 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 521 task_queue->initialize(); 522 _task_queues->register_queue(i, task_queue); 523 524 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 525 526 _accum_task_vtime[i] = 0.0; 527 } 528 529 // so that the call below can read a sensible value 530 _heap_start = g1h->reserved_region().start(); 531 set_non_marking_state(); 532 _completed_initialization = true; 533 } 534 535 void G1ConcurrentMark::reset() { 536 // Starting values for these two. This should be called in a STW 537 // phase. 538 MemRegion reserved = _g1h->g1_reserved(); 539 _heap_start = reserved.start(); 540 _heap_end = reserved.end(); 541 542 // Separated the asserts so that we know which one fires. 543 assert(_heap_start != NULL, "heap bounds should look ok"); 544 assert(_heap_end != NULL, "heap bounds should look ok"); 545 assert(_heap_start < _heap_end, "heap bounds should look ok"); 546 547 // Reset all the marking data structures and any necessary flags 548 reset_marking_state(); 549 550 // We do reset all of them, since different phases will use 551 // different number of active threads. So, it's easiest to have all 552 // of them ready. 553 for (uint i = 0; i < _max_worker_id; ++i) { 554 _tasks[i]->reset(_nextMarkBitMap); 555 } 556 557 // we need this to make sure that the flag is on during the evac 558 // pause with initial mark piggy-backed 559 set_concurrent_marking_in_progress(); 560 } 561 562 563 void G1ConcurrentMark::reset_marking_state() { 564 _global_mark_stack.set_empty(); 565 566 // Expand the marking stack, if we have to and if we can. 567 if (has_overflown()) { 568 _global_mark_stack.expand(); 569 } 570 571 clear_has_overflown(); 572 _finger = _heap_start; 573 574 for (uint i = 0; i < _max_worker_id; ++i) { 575 G1CMTaskQueue* queue = _task_queues->queue(i); 576 queue->set_empty(); 577 } 578 } 579 580 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 581 assert(active_tasks <= _max_worker_id, "we should not have more"); 582 583 _active_tasks = active_tasks; 584 // Need to update the three data structures below according to the 585 // number of active threads for this phase. 586 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 587 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 588 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 589 } 590 591 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 592 set_concurrency(active_tasks); 593 594 _concurrent = concurrent; 595 // We propagate this to all tasks, not just the active ones. 596 for (uint i = 0; i < _max_worker_id; ++i) 597 _tasks[i]->set_concurrent(concurrent); 598 599 if (concurrent) { 600 set_concurrent_marking_in_progress(); 601 } else { 602 // We currently assume that the concurrent flag has been set to 603 // false before we start remark. At this point we should also be 604 // in a STW phase. 605 assert(!concurrent_marking_in_progress(), "invariant"); 606 assert(out_of_regions(), 607 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 608 p2i(_finger), p2i(_heap_end)); 609 } 610 } 611 612 void G1ConcurrentMark::set_non_marking_state() { 613 // We set the global marking state to some default values when we're 614 // not doing marking. 615 reset_marking_state(); 616 _active_tasks = 0; 617 clear_concurrent_marking_in_progress(); 618 } 619 620 G1ConcurrentMark::~G1ConcurrentMark() { 621 // The G1ConcurrentMark instance is never freed. 622 ShouldNotReachHere(); 623 } 624 625 class G1ClearBitMapTask : public AbstractGangTask { 626 public: 627 static size_t chunk_size() { return M; } 628 629 private: 630 // Heap region closure used for clearing the given mark bitmap. 631 class G1ClearBitmapHRClosure : public HeapRegionClosure { 632 private: 633 G1CMBitMap* _bitmap; 634 G1ConcurrentMark* _cm; 635 public: 636 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 637 } 638 639 virtual bool doHeapRegion(HeapRegion* r) { 640 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 641 642 HeapWord* cur = r->bottom(); 643 HeapWord* const end = r->end(); 644 645 while (cur < end) { 646 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 647 _bitmap->clear_range(mr); 648 649 cur += chunk_size_in_words; 650 651 // Abort iteration if after yielding the marking has been aborted. 652 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 653 return true; 654 } 655 // Repeat the asserts from before the start of the closure. We will do them 656 // as asserts here to minimize their overhead on the product. However, we 657 // will have them as guarantees at the beginning / end of the bitmap 658 // clearing to get some checking in the product. 659 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 660 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 661 } 662 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 663 664 return false; 665 } 666 }; 667 668 G1ClearBitmapHRClosure _cl; 669 HeapRegionClaimer _hr_claimer; 670 bool _suspendible; // If the task is suspendible, workers must join the STS. 671 672 public: 673 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 674 AbstractGangTask("G1 Clear Bitmap"), 675 _cl(bitmap, suspendible ? cm : NULL), 676 _hr_claimer(n_workers), 677 _suspendible(suspendible) 678 { } 679 680 void work(uint worker_id) { 681 SuspendibleThreadSetJoiner sts_join(_suspendible); 682 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer); 683 } 684 685 bool is_complete() { 686 return _cl.complete(); 687 } 688 }; 689 690 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 691 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 692 693 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 694 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 695 696 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 697 698 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 699 700 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 701 workers->run_task(&cl, num_workers); 702 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 703 } 704 705 void G1ConcurrentMark::cleanup_for_next_mark() { 706 // Make sure that the concurrent mark thread looks to still be in 707 // the current cycle. 708 guarantee(cmThread()->during_cycle(), "invariant"); 709 710 // We are finishing up the current cycle by clearing the next 711 // marking bitmap and getting it ready for the next cycle. During 712 // this time no other cycle can start. So, let's make sure that this 713 // is the case. 714 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 715 716 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 717 718 // Clear the live count data. If the marking has been aborted, the abort() 719 // call already did that. 720 if (!has_aborted()) { 721 clear_live_data(_parallel_workers); 722 DEBUG_ONLY(verify_live_data_clear()); 723 } 724 725 // Repeat the asserts from above. 726 guarantee(cmThread()->during_cycle(), "invariant"); 727 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 728 } 729 730 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 731 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 732 clear_bitmap(_prevMarkBitMap, workers, false); 733 } 734 735 class CheckBitmapClearHRClosure : public HeapRegionClosure { 736 G1CMBitMap* _bitmap; 737 bool _error; 738 public: 739 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 740 } 741 742 virtual bool doHeapRegion(HeapRegion* r) { 743 // This closure can be called concurrently to the mutator, so we must make sure 744 // that the result of the getNextMarkedWordAddress() call is compared to the 745 // value passed to it as limit to detect any found bits. 746 // end never changes in G1. 747 HeapWord* end = r->end(); 748 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 749 } 750 }; 751 752 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 753 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 754 _g1h->heap_region_iterate(&cl); 755 return cl.complete(); 756 } 757 758 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 759 public: 760 bool doHeapRegion(HeapRegion* r) { 761 r->note_start_of_marking(); 762 return false; 763 } 764 }; 765 766 void G1ConcurrentMark::checkpointRootsInitialPre() { 767 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 768 769 _has_aborted = false; 770 771 // Initialize marking structures. This has to be done in a STW phase. 772 reset(); 773 774 // For each region note start of marking. 775 NoteStartOfMarkHRClosure startcl; 776 g1h->heap_region_iterate(&startcl); 777 } 778 779 780 void G1ConcurrentMark::checkpointRootsInitialPost() { 781 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 782 783 // Start Concurrent Marking weak-reference discovery. 784 ReferenceProcessor* rp = g1h->ref_processor_cm(); 785 // enable ("weak") refs discovery 786 rp->enable_discovery(); 787 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 788 789 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 790 // This is the start of the marking cycle, we're expected all 791 // threads to have SATB queues with active set to false. 792 satb_mq_set.set_active_all_threads(true, /* new active value */ 793 false /* expected_active */); 794 795 _root_regions.prepare_for_scan(); 796 797 // update_g1_committed() will be called at the end of an evac pause 798 // when marking is on. So, it's also called at the end of the 799 // initial-mark pause to update the heap end, if the heap expands 800 // during it. No need to call it here. 801 } 802 803 /* 804 * Notice that in the next two methods, we actually leave the STS 805 * during the barrier sync and join it immediately afterwards. If we 806 * do not do this, the following deadlock can occur: one thread could 807 * be in the barrier sync code, waiting for the other thread to also 808 * sync up, whereas another one could be trying to yield, while also 809 * waiting for the other threads to sync up too. 810 * 811 * Note, however, that this code is also used during remark and in 812 * this case we should not attempt to leave / enter the STS, otherwise 813 * we'll either hit an assert (debug / fastdebug) or deadlock 814 * (product). So we should only leave / enter the STS if we are 815 * operating concurrently. 816 * 817 * Because the thread that does the sync barrier has left the STS, it 818 * is possible to be suspended for a Full GC or an evacuation pause 819 * could occur. This is actually safe, since the entering the sync 820 * barrier is one of the last things do_marking_step() does, and it 821 * doesn't manipulate any data structures afterwards. 822 */ 823 824 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 825 bool barrier_aborted; 826 { 827 SuspendibleThreadSetLeaver sts_leave(concurrent()); 828 barrier_aborted = !_first_overflow_barrier_sync.enter(); 829 } 830 831 // at this point everyone should have synced up and not be doing any 832 // more work 833 834 if (barrier_aborted) { 835 // If the barrier aborted we ignore the overflow condition and 836 // just abort the whole marking phase as quickly as possible. 837 return; 838 } 839 840 // If we're executing the concurrent phase of marking, reset the marking 841 // state; otherwise the marking state is reset after reference processing, 842 // during the remark pause. 843 // If we reset here as a result of an overflow during the remark we will 844 // see assertion failures from any subsequent set_concurrency_and_phase() 845 // calls. 846 if (concurrent()) { 847 // let the task associated with with worker 0 do this 848 if (worker_id == 0) { 849 // task 0 is responsible for clearing the global data structures 850 // We should be here because of an overflow. During STW we should 851 // not clear the overflow flag since we rely on it being true when 852 // we exit this method to abort the pause and restart concurrent 853 // marking. 854 reset_marking_state(); 855 856 log_info(gc, marking)("Concurrent Mark reset for overflow"); 857 } 858 } 859 860 // after this, each task should reset its own data structures then 861 // then go into the second barrier 862 } 863 864 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 865 SuspendibleThreadSetLeaver sts_leave(concurrent()); 866 _second_overflow_barrier_sync.enter(); 867 868 // at this point everything should be re-initialized and ready to go 869 } 870 871 class G1CMConcurrentMarkingTask: public AbstractGangTask { 872 private: 873 G1ConcurrentMark* _cm; 874 ConcurrentMarkThread* _cmt; 875 876 public: 877 void work(uint worker_id) { 878 assert(Thread::current()->is_ConcurrentGC_thread(), 879 "this should only be done by a conc GC thread"); 880 ResourceMark rm; 881 882 double start_vtime = os::elapsedVTime(); 883 884 { 885 SuspendibleThreadSetJoiner sts_join; 886 887 assert(worker_id < _cm->active_tasks(), "invariant"); 888 G1CMTask* the_task = _cm->task(worker_id); 889 the_task->record_start_time(); 890 if (!_cm->has_aborted()) { 891 do { 892 double start_vtime_sec = os::elapsedVTime(); 893 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 894 895 the_task->do_marking_step(mark_step_duration_ms, 896 true /* do_termination */, 897 false /* is_serial*/); 898 899 double end_vtime_sec = os::elapsedVTime(); 900 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 901 _cm->do_yield_check(); 902 903 jlong sleep_time_ms; 904 if (!_cm->has_aborted() && the_task->has_aborted()) { 905 sleep_time_ms = 906 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 907 { 908 SuspendibleThreadSetLeaver sts_leave; 909 os::sleep(Thread::current(), sleep_time_ms, false); 910 } 911 } 912 } while (!_cm->has_aborted() && the_task->has_aborted()); 913 } 914 the_task->record_end_time(); 915 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 916 } 917 918 double end_vtime = os::elapsedVTime(); 919 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 920 } 921 922 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 923 ConcurrentMarkThread* cmt) : 924 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 925 926 ~G1CMConcurrentMarkingTask() { } 927 }; 928 929 // Calculates the number of active workers for a concurrent 930 // phase. 931 uint G1ConcurrentMark::calc_parallel_marking_threads() { 932 uint n_conc_workers = 0; 933 if (!UseDynamicNumberOfGCThreads || 934 (!FLAG_IS_DEFAULT(ConcGCThreads) && 935 !ForceDynamicNumberOfGCThreads)) { 936 n_conc_workers = max_parallel_marking_threads(); 937 } else { 938 n_conc_workers = 939 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 940 1, /* Minimum workers */ 941 parallel_marking_threads(), 942 Threads::number_of_non_daemon_threads()); 943 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 944 // that scaling has already gone into "_max_parallel_marking_threads". 945 } 946 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 947 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 948 max_parallel_marking_threads(), n_conc_workers); 949 return n_conc_workers; 950 } 951 952 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 953 // Currently, only survivors can be root regions. 954 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 955 G1RootRegionScanClosure cl(_g1h, this); 956 957 const uintx interval = PrefetchScanIntervalInBytes; 958 HeapWord* curr = hr->bottom(); 959 const HeapWord* end = hr->top(); 960 while (curr < end) { 961 Prefetch::read(curr, interval); 962 oop obj = oop(curr); 963 int size = obj->oop_iterate_size(&cl); 964 assert(size == obj->size(), "sanity"); 965 curr += size; 966 } 967 } 968 969 class G1CMRootRegionScanTask : public AbstractGangTask { 970 private: 971 G1ConcurrentMark* _cm; 972 973 public: 974 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 975 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 976 977 void work(uint worker_id) { 978 assert(Thread::current()->is_ConcurrentGC_thread(), 979 "this should only be done by a conc GC thread"); 980 981 G1CMRootRegions* root_regions = _cm->root_regions(); 982 HeapRegion* hr = root_regions->claim_next(); 983 while (hr != NULL) { 984 _cm->scanRootRegion(hr); 985 hr = root_regions->claim_next(); 986 } 987 } 988 }; 989 990 void G1ConcurrentMark::scan_root_regions() { 991 // scan_in_progress() will have been set to true only if there was 992 // at least one root region to scan. So, if it's false, we 993 // should not attempt to do any further work. 994 if (root_regions()->scan_in_progress()) { 995 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 996 997 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 998 // We distribute work on a per-region basis, so starting 999 // more threads than that is useless. 1000 root_regions()->num_root_regions()); 1001 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1002 "Maximum number of marking threads exceeded"); 1003 1004 G1CMRootRegionScanTask task(this); 1005 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 1006 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 1007 _parallel_workers->run_task(&task, _parallel_marking_threads); 1008 1009 // It's possible that has_aborted() is true here without actually 1010 // aborting the survivor scan earlier. This is OK as it's 1011 // mainly used for sanity checking. 1012 root_regions()->scan_finished(); 1013 } 1014 } 1015 1016 void G1ConcurrentMark::concurrent_cycle_start() { 1017 _gc_timer_cm->register_gc_start(); 1018 1019 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 1020 1021 _g1h->trace_heap_before_gc(_gc_tracer_cm); 1022 } 1023 1024 void G1ConcurrentMark::concurrent_cycle_end() { 1025 _g1h->trace_heap_after_gc(_gc_tracer_cm); 1026 1027 if (has_aborted()) { 1028 _gc_tracer_cm->report_concurrent_mode_failure(); 1029 } 1030 1031 _gc_timer_cm->register_gc_end(); 1032 1033 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 1034 } 1035 1036 void G1ConcurrentMark::mark_from_roots() { 1037 // we might be tempted to assert that: 1038 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1039 // "inconsistent argument?"); 1040 // However that wouldn't be right, because it's possible that 1041 // a safepoint is indeed in progress as a younger generation 1042 // stop-the-world GC happens even as we mark in this generation. 1043 1044 _restart_for_overflow = false; 1045 1046 // _g1h has _n_par_threads 1047 _parallel_marking_threads = calc_parallel_marking_threads(); 1048 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1049 "Maximum number of marking threads exceeded"); 1050 1051 uint active_workers = MAX2(1U, parallel_marking_threads()); 1052 assert(active_workers > 0, "Should have been set"); 1053 1054 // Setting active workers is not guaranteed since fewer 1055 // worker threads may currently exist and more may not be 1056 // available. 1057 active_workers = _parallel_workers->update_active_workers(active_workers); 1058 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1059 1060 // Parallel task terminator is set in "set_concurrency_and_phase()" 1061 set_concurrency_and_phase(active_workers, true /* concurrent */); 1062 1063 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1064 _parallel_workers->run_task(&markingTask); 1065 print_stats(); 1066 } 1067 1068 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1069 // world is stopped at this checkpoint 1070 assert(SafepointSynchronize::is_at_safepoint(), 1071 "world should be stopped"); 1072 1073 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1074 1075 // If a full collection has happened, we shouldn't do this. 1076 if (has_aborted()) { 1077 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1078 return; 1079 } 1080 1081 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1082 1083 if (VerifyDuringGC) { 1084 HandleMark hm; // handle scope 1085 g1h->prepare_for_verify(); 1086 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1087 } 1088 g1h->verifier()->check_bitmaps("Remark Start"); 1089 1090 G1Policy* g1p = g1h->g1_policy(); 1091 g1p->record_concurrent_mark_remark_start(); 1092 1093 double start = os::elapsedTime(); 1094 1095 checkpointRootsFinalWork(); 1096 1097 double mark_work_end = os::elapsedTime(); 1098 1099 weakRefsWork(clear_all_soft_refs); 1100 1101 if (has_overflown()) { 1102 // We overflowed. Restart concurrent marking. 1103 _restart_for_overflow = true; 1104 1105 // Verify the heap w.r.t. the previous marking bitmap. 1106 if (VerifyDuringGC) { 1107 HandleMark hm; // handle scope 1108 g1h->prepare_for_verify(); 1109 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1110 } 1111 1112 // Clear the marking state because we will be restarting 1113 // marking due to overflowing the global mark stack. 1114 reset_marking_state(); 1115 } else { 1116 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1117 // We're done with marking. 1118 // This is the end of the marking cycle, we're expected all 1119 // threads to have SATB queues with active set to true. 1120 satb_mq_set.set_active_all_threads(false, /* new active value */ 1121 true /* expected_active */); 1122 1123 if (VerifyDuringGC) { 1124 HandleMark hm; // handle scope 1125 g1h->prepare_for_verify(); 1126 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1127 } 1128 g1h->verifier()->check_bitmaps("Remark End"); 1129 assert(!restart_for_overflow(), "sanity"); 1130 // Completely reset the marking state since marking completed 1131 set_non_marking_state(); 1132 } 1133 1134 // Statistics 1135 double now = os::elapsedTime(); 1136 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1137 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1138 _remark_times.add((now - start) * 1000.0); 1139 1140 g1p->record_concurrent_mark_remark_end(); 1141 1142 G1CMIsAliveClosure is_alive(g1h); 1143 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1144 } 1145 1146 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1147 G1CollectedHeap* _g1; 1148 size_t _freed_bytes; 1149 FreeRegionList* _local_cleanup_list; 1150 uint _old_regions_removed; 1151 uint _humongous_regions_removed; 1152 HRRSCleanupTask* _hrrs_cleanup_task; 1153 1154 public: 1155 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1156 FreeRegionList* local_cleanup_list, 1157 HRRSCleanupTask* hrrs_cleanup_task) : 1158 _g1(g1), 1159 _freed_bytes(0), 1160 _local_cleanup_list(local_cleanup_list), 1161 _old_regions_removed(0), 1162 _humongous_regions_removed(0), 1163 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1164 1165 size_t freed_bytes() { return _freed_bytes; } 1166 const uint old_regions_removed() { return _old_regions_removed; } 1167 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1168 1169 bool doHeapRegion(HeapRegion *hr) { 1170 if (hr->is_archive()) { 1171 return false; 1172 } 1173 _g1->reset_gc_time_stamps(hr); 1174 hr->note_end_of_marking(); 1175 1176 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1177 _freed_bytes += hr->used(); 1178 hr->set_containing_set(NULL); 1179 if (hr->is_humongous()) { 1180 _humongous_regions_removed++; 1181 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1182 } else { 1183 _old_regions_removed++; 1184 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1185 } 1186 } else { 1187 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1188 } 1189 1190 return false; 1191 } 1192 }; 1193 1194 class G1ParNoteEndTask: public AbstractGangTask { 1195 friend class G1NoteEndOfConcMarkClosure; 1196 1197 protected: 1198 G1CollectedHeap* _g1h; 1199 FreeRegionList* _cleanup_list; 1200 HeapRegionClaimer _hrclaimer; 1201 1202 public: 1203 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1204 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1205 } 1206 1207 void work(uint worker_id) { 1208 FreeRegionList local_cleanup_list("Local Cleanup List"); 1209 HRRSCleanupTask hrrs_cleanup_task; 1210 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1211 &hrrs_cleanup_task); 1212 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1213 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1214 1215 // Now update the lists 1216 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1217 { 1218 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1219 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1220 1221 // If we iterate over the global cleanup list at the end of 1222 // cleanup to do this printing we will not guarantee to only 1223 // generate output for the newly-reclaimed regions (the list 1224 // might not be empty at the beginning of cleanup; we might 1225 // still be working on its previous contents). So we do the 1226 // printing here, before we append the new regions to the global 1227 // cleanup list. 1228 1229 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1230 if (hr_printer->is_active()) { 1231 FreeRegionListIterator iter(&local_cleanup_list); 1232 while (iter.more_available()) { 1233 HeapRegion* hr = iter.get_next(); 1234 hr_printer->cleanup(hr); 1235 } 1236 } 1237 1238 _cleanup_list->add_ordered(&local_cleanup_list); 1239 assert(local_cleanup_list.is_empty(), "post-condition"); 1240 1241 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1242 } 1243 } 1244 }; 1245 1246 void G1ConcurrentMark::cleanup() { 1247 // world is stopped at this checkpoint 1248 assert(SafepointSynchronize::is_at_safepoint(), 1249 "world should be stopped"); 1250 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1251 1252 // If a full collection has happened, we shouldn't do this. 1253 if (has_aborted()) { 1254 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1255 return; 1256 } 1257 1258 g1h->verifier()->verify_region_sets_optional(); 1259 1260 if (VerifyDuringGC) { 1261 HandleMark hm; // handle scope 1262 g1h->prepare_for_verify(); 1263 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1264 } 1265 g1h->verifier()->check_bitmaps("Cleanup Start"); 1266 1267 G1Policy* g1p = g1h->g1_policy(); 1268 g1p->record_concurrent_mark_cleanup_start(); 1269 1270 double start = os::elapsedTime(); 1271 1272 HeapRegionRemSet::reset_for_cleanup_tasks(); 1273 1274 { 1275 GCTraceTime(Debug, gc)("Finalize Live Data"); 1276 finalize_live_data(); 1277 } 1278 1279 if (VerifyDuringGC) { 1280 GCTraceTime(Debug, gc)("Verify Live Data"); 1281 verify_live_data(); 1282 } 1283 1284 g1h->collector_state()->set_mark_in_progress(false); 1285 1286 double count_end = os::elapsedTime(); 1287 double this_final_counting_time = (count_end - start); 1288 _total_counting_time += this_final_counting_time; 1289 1290 if (log_is_enabled(Trace, gc, liveness)) { 1291 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1292 _g1h->heap_region_iterate(&cl); 1293 } 1294 1295 // Install newly created mark bitMap as "prev". 1296 swapMarkBitMaps(); 1297 1298 g1h->reset_gc_time_stamp(); 1299 1300 uint n_workers = _g1h->workers()->active_workers(); 1301 1302 // Note end of marking in all heap regions. 1303 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1304 g1h->workers()->run_task(&g1_par_note_end_task); 1305 g1h->check_gc_time_stamps(); 1306 1307 if (!cleanup_list_is_empty()) { 1308 // The cleanup list is not empty, so we'll have to process it 1309 // concurrently. Notify anyone else that might be wanting free 1310 // regions that there will be more free regions coming soon. 1311 g1h->set_free_regions_coming(); 1312 } 1313 1314 // call below, since it affects the metric by which we sort the heap 1315 // regions. 1316 if (G1ScrubRemSets) { 1317 double rs_scrub_start = os::elapsedTime(); 1318 g1h->scrub_rem_set(); 1319 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1320 } 1321 1322 // this will also free any regions totally full of garbage objects, 1323 // and sort the regions. 1324 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1325 1326 // Statistics. 1327 double end = os::elapsedTime(); 1328 _cleanup_times.add((end - start) * 1000.0); 1329 1330 // Clean up will have freed any regions completely full of garbage. 1331 // Update the soft reference policy with the new heap occupancy. 1332 Universe::update_heap_info_at_gc(); 1333 1334 if (VerifyDuringGC) { 1335 HandleMark hm; // handle scope 1336 g1h->prepare_for_verify(); 1337 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1338 } 1339 1340 g1h->verifier()->check_bitmaps("Cleanup End"); 1341 1342 g1h->verifier()->verify_region_sets_optional(); 1343 1344 // We need to make this be a "collection" so any collection pause that 1345 // races with it goes around and waits for completeCleanup to finish. 1346 g1h->increment_total_collections(); 1347 1348 // Clean out dead classes and update Metaspace sizes. 1349 if (ClassUnloadingWithConcurrentMark) { 1350 ClassLoaderDataGraph::purge(); 1351 } 1352 MetaspaceGC::compute_new_size(); 1353 1354 // We reclaimed old regions so we should calculate the sizes to make 1355 // sure we update the old gen/space data. 1356 g1h->g1mm()->update_sizes(); 1357 g1h->allocation_context_stats().update_after_mark(); 1358 } 1359 1360 void G1ConcurrentMark::complete_cleanup() { 1361 if (has_aborted()) return; 1362 1363 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1364 1365 _cleanup_list.verify_optional(); 1366 FreeRegionList tmp_free_list("Tmp Free List"); 1367 1368 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1369 "cleanup list has %u entries", 1370 _cleanup_list.length()); 1371 1372 // No one else should be accessing the _cleanup_list at this point, 1373 // so it is not necessary to take any locks 1374 while (!_cleanup_list.is_empty()) { 1375 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1376 assert(hr != NULL, "Got NULL from a non-empty list"); 1377 hr->par_clear(); 1378 tmp_free_list.add_ordered(hr); 1379 1380 // Instead of adding one region at a time to the secondary_free_list, 1381 // we accumulate them in the local list and move them a few at a 1382 // time. This also cuts down on the number of notify_all() calls 1383 // we do during this process. We'll also append the local list when 1384 // _cleanup_list is empty (which means we just removed the last 1385 // region from the _cleanup_list). 1386 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1387 _cleanup_list.is_empty()) { 1388 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1389 "appending %u entries to the secondary_free_list, " 1390 "cleanup list still has %u entries", 1391 tmp_free_list.length(), 1392 _cleanup_list.length()); 1393 1394 { 1395 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1396 g1h->secondary_free_list_add(&tmp_free_list); 1397 SecondaryFreeList_lock->notify_all(); 1398 } 1399 #ifndef PRODUCT 1400 if (G1StressConcRegionFreeing) { 1401 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1402 os::sleep(Thread::current(), (jlong) 1, false); 1403 } 1404 } 1405 #endif 1406 } 1407 } 1408 assert(tmp_free_list.is_empty(), "post-condition"); 1409 } 1410 1411 // Supporting Object and Oop closures for reference discovery 1412 // and processing in during marking 1413 1414 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1415 HeapWord* addr = (HeapWord*)obj; 1416 return addr != NULL && 1417 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1418 } 1419 1420 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1421 // Uses the G1CMTask associated with a worker thread (for serial reference 1422 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1423 // trace referent objects. 1424 // 1425 // Using the G1CMTask and embedded local queues avoids having the worker 1426 // threads operating on the global mark stack. This reduces the risk 1427 // of overflowing the stack - which we would rather avoid at this late 1428 // state. Also using the tasks' local queues removes the potential 1429 // of the workers interfering with each other that could occur if 1430 // operating on the global stack. 1431 1432 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1433 G1ConcurrentMark* _cm; 1434 G1CMTask* _task; 1435 int _ref_counter_limit; 1436 int _ref_counter; 1437 bool _is_serial; 1438 public: 1439 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1440 _cm(cm), _task(task), _is_serial(is_serial), 1441 _ref_counter_limit(G1RefProcDrainInterval) { 1442 assert(_ref_counter_limit > 0, "sanity"); 1443 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1444 _ref_counter = _ref_counter_limit; 1445 } 1446 1447 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1448 virtual void do_oop( oop* p) { do_oop_work(p); } 1449 1450 template <class T> void do_oop_work(T* p) { 1451 if (!_cm->has_overflown()) { 1452 oop obj = oopDesc::load_decode_heap_oop(p); 1453 _task->deal_with_reference(obj); 1454 _ref_counter--; 1455 1456 if (_ref_counter == 0) { 1457 // We have dealt with _ref_counter_limit references, pushing them 1458 // and objects reachable from them on to the local stack (and 1459 // possibly the global stack). Call G1CMTask::do_marking_step() to 1460 // process these entries. 1461 // 1462 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1463 // there's nothing more to do (i.e. we're done with the entries that 1464 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1465 // above) or we overflow. 1466 // 1467 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1468 // flag while there may still be some work to do. (See the comment at 1469 // the beginning of G1CMTask::do_marking_step() for those conditions - 1470 // one of which is reaching the specified time target.) It is only 1471 // when G1CMTask::do_marking_step() returns without setting the 1472 // has_aborted() flag that the marking step has completed. 1473 do { 1474 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1475 _task->do_marking_step(mark_step_duration_ms, 1476 false /* do_termination */, 1477 _is_serial); 1478 } while (_task->has_aborted() && !_cm->has_overflown()); 1479 _ref_counter = _ref_counter_limit; 1480 } 1481 } 1482 } 1483 }; 1484 1485 // 'Drain' oop closure used by both serial and parallel reference processing. 1486 // Uses the G1CMTask associated with a given worker thread (for serial 1487 // reference processing the G1CMtask for worker 0 is used). Calls the 1488 // do_marking_step routine, with an unbelievably large timeout value, 1489 // to drain the marking data structures of the remaining entries 1490 // added by the 'keep alive' oop closure above. 1491 1492 class G1CMDrainMarkingStackClosure: public VoidClosure { 1493 G1ConcurrentMark* _cm; 1494 G1CMTask* _task; 1495 bool _is_serial; 1496 public: 1497 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1498 _cm(cm), _task(task), _is_serial(is_serial) { 1499 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1500 } 1501 1502 void do_void() { 1503 do { 1504 // We call G1CMTask::do_marking_step() to completely drain the local 1505 // and global marking stacks of entries pushed by the 'keep alive' 1506 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1507 // 1508 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1509 // if there's nothing more to do (i.e. we've completely drained the 1510 // entries that were pushed as a a result of applying the 'keep alive' 1511 // closure to the entries on the discovered ref lists) or we overflow 1512 // the global marking stack. 1513 // 1514 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1515 // flag while there may still be some work to do. (See the comment at 1516 // the beginning of G1CMTask::do_marking_step() for those conditions - 1517 // one of which is reaching the specified time target.) It is only 1518 // when G1CMTask::do_marking_step() returns without setting the 1519 // has_aborted() flag that the marking step has completed. 1520 1521 _task->do_marking_step(1000000000.0 /* something very large */, 1522 true /* do_termination */, 1523 _is_serial); 1524 } while (_task->has_aborted() && !_cm->has_overflown()); 1525 } 1526 }; 1527 1528 // Implementation of AbstractRefProcTaskExecutor for parallel 1529 // reference processing at the end of G1 concurrent marking 1530 1531 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1532 private: 1533 G1CollectedHeap* _g1h; 1534 G1ConcurrentMark* _cm; 1535 WorkGang* _workers; 1536 uint _active_workers; 1537 1538 public: 1539 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1540 G1ConcurrentMark* cm, 1541 WorkGang* workers, 1542 uint n_workers) : 1543 _g1h(g1h), _cm(cm), 1544 _workers(workers), _active_workers(n_workers) { } 1545 1546 // Executes the given task using concurrent marking worker threads. 1547 virtual void execute(ProcessTask& task); 1548 virtual void execute(EnqueueTask& task); 1549 }; 1550 1551 class G1CMRefProcTaskProxy: public AbstractGangTask { 1552 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1553 ProcessTask& _proc_task; 1554 G1CollectedHeap* _g1h; 1555 G1ConcurrentMark* _cm; 1556 1557 public: 1558 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1559 G1CollectedHeap* g1h, 1560 G1ConcurrentMark* cm) : 1561 AbstractGangTask("Process reference objects in parallel"), 1562 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1563 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1564 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1565 } 1566 1567 virtual void work(uint worker_id) { 1568 ResourceMark rm; 1569 HandleMark hm; 1570 G1CMTask* task = _cm->task(worker_id); 1571 G1CMIsAliveClosure g1_is_alive(_g1h); 1572 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1573 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1574 1575 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1576 } 1577 }; 1578 1579 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1580 assert(_workers != NULL, "Need parallel worker threads."); 1581 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1582 1583 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1584 1585 // We need to reset the concurrency level before each 1586 // proxy task execution, so that the termination protocol 1587 // and overflow handling in G1CMTask::do_marking_step() knows 1588 // how many workers to wait for. 1589 _cm->set_concurrency(_active_workers); 1590 _workers->run_task(&proc_task_proxy); 1591 } 1592 1593 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1594 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1595 EnqueueTask& _enq_task; 1596 1597 public: 1598 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1599 AbstractGangTask("Enqueue reference objects in parallel"), 1600 _enq_task(enq_task) { } 1601 1602 virtual void work(uint worker_id) { 1603 _enq_task.work(worker_id); 1604 } 1605 }; 1606 1607 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1608 assert(_workers != NULL, "Need parallel worker threads."); 1609 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1610 1611 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1612 1613 // Not strictly necessary but... 1614 // 1615 // We need to reset the concurrency level before each 1616 // proxy task execution, so that the termination protocol 1617 // and overflow handling in G1CMTask::do_marking_step() knows 1618 // how many workers to wait for. 1619 _cm->set_concurrency(_active_workers); 1620 _workers->run_task(&enq_task_proxy); 1621 } 1622 1623 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1624 if (has_overflown()) { 1625 // Skip processing the discovered references if we have 1626 // overflown the global marking stack. Reference objects 1627 // only get discovered once so it is OK to not 1628 // de-populate the discovered reference lists. We could have, 1629 // but the only benefit would be that, when marking restarts, 1630 // less reference objects are discovered. 1631 return; 1632 } 1633 1634 ResourceMark rm; 1635 HandleMark hm; 1636 1637 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1638 1639 // Is alive closure. 1640 G1CMIsAliveClosure g1_is_alive(g1h); 1641 1642 // Inner scope to exclude the cleaning of the string and symbol 1643 // tables from the displayed time. 1644 { 1645 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1646 1647 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1648 1649 // See the comment in G1CollectedHeap::ref_processing_init() 1650 // about how reference processing currently works in G1. 1651 1652 // Set the soft reference policy 1653 rp->setup_policy(clear_all_soft_refs); 1654 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1655 1656 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1657 // in serial reference processing. Note these closures are also 1658 // used for serially processing (by the the current thread) the 1659 // JNI references during parallel reference processing. 1660 // 1661 // These closures do not need to synchronize with the worker 1662 // threads involved in parallel reference processing as these 1663 // instances are executed serially by the current thread (e.g. 1664 // reference processing is not multi-threaded and is thus 1665 // performed by the current thread instead of a gang worker). 1666 // 1667 // The gang tasks involved in parallel reference processing create 1668 // their own instances of these closures, which do their own 1669 // synchronization among themselves. 1670 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1671 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1672 1673 // We need at least one active thread. If reference processing 1674 // is not multi-threaded we use the current (VMThread) thread, 1675 // otherwise we use the work gang from the G1CollectedHeap and 1676 // we utilize all the worker threads we can. 1677 bool processing_is_mt = rp->processing_is_mt(); 1678 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1679 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1680 1681 // Parallel processing task executor. 1682 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1683 g1h->workers(), active_workers); 1684 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1685 1686 // Set the concurrency level. The phase was already set prior to 1687 // executing the remark task. 1688 set_concurrency(active_workers); 1689 1690 // Set the degree of MT processing here. If the discovery was done MT, 1691 // the number of threads involved during discovery could differ from 1692 // the number of active workers. This is OK as long as the discovered 1693 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1694 rp->set_active_mt_degree(active_workers); 1695 1696 // Process the weak references. 1697 const ReferenceProcessorStats& stats = 1698 rp->process_discovered_references(&g1_is_alive, 1699 &g1_keep_alive, 1700 &g1_drain_mark_stack, 1701 executor, 1702 _gc_timer_cm); 1703 _gc_tracer_cm->report_gc_reference_stats(stats); 1704 1705 // The do_oop work routines of the keep_alive and drain_marking_stack 1706 // oop closures will set the has_overflown flag if we overflow the 1707 // global marking stack. 1708 1709 assert(has_overflown() || _global_mark_stack.is_empty(), 1710 "Mark stack should be empty (unless it has overflown)"); 1711 1712 assert(rp->num_q() == active_workers, "why not"); 1713 1714 rp->enqueue_discovered_references(executor); 1715 1716 rp->verify_no_references_recorded(); 1717 assert(!rp->discovery_enabled(), "Post condition"); 1718 } 1719 1720 if (has_overflown()) { 1721 // We can not trust g1_is_alive if the marking stack overflowed 1722 return; 1723 } 1724 1725 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1726 1727 // Unload Klasses, String, Symbols, Code Cache, etc. 1728 if (ClassUnloadingWithConcurrentMark) { 1729 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1730 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1731 g1h->complete_cleaning(&g1_is_alive, purged_classes); 1732 } else { 1733 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1734 // No need to clean string table and symbol table as they are treated as strong roots when 1735 // class unloading is disabled. 1736 g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1737 1738 } 1739 } 1740 1741 void G1ConcurrentMark::swapMarkBitMaps() { 1742 G1CMBitMap* temp = _prevMarkBitMap; 1743 _prevMarkBitMap = _nextMarkBitMap; 1744 _nextMarkBitMap = temp; 1745 } 1746 1747 // Closure for marking entries in SATB buffers. 1748 class G1CMSATBBufferClosure : public SATBBufferClosure { 1749 private: 1750 G1CMTask* _task; 1751 G1CollectedHeap* _g1h; 1752 1753 // This is very similar to G1CMTask::deal_with_reference, but with 1754 // more relaxed requirements for the argument, so this must be more 1755 // circumspect about treating the argument as an object. 1756 void do_entry(void* entry) const { 1757 _task->increment_refs_reached(); 1758 HeapRegion* hr = _g1h->heap_region_containing(entry); 1759 if (entry < hr->next_top_at_mark_start()) { 1760 // Until we get here, we don't know whether entry refers to a valid 1761 // object; it could instead have been a stale reference. 1762 oop obj = static_cast<oop>(entry); 1763 assert(obj->is_oop(true /* ignore mark word */), 1764 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 1765 _task->make_reference_grey(obj); 1766 } 1767 } 1768 1769 public: 1770 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1771 : _task(task), _g1h(g1h) { } 1772 1773 virtual void do_buffer(void** buffer, size_t size) { 1774 for (size_t i = 0; i < size; ++i) { 1775 do_entry(buffer[i]); 1776 } 1777 } 1778 }; 1779 1780 class G1RemarkThreadsClosure : public ThreadClosure { 1781 G1CMSATBBufferClosure _cm_satb_cl; 1782 G1CMOopClosure _cm_cl; 1783 MarkingCodeBlobClosure _code_cl; 1784 int _thread_parity; 1785 1786 public: 1787 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1788 _cm_satb_cl(task, g1h), 1789 _cm_cl(g1h, g1h->concurrent_mark(), task), 1790 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1791 _thread_parity(Threads::thread_claim_parity()) {} 1792 1793 void do_thread(Thread* thread) { 1794 if (thread->is_Java_thread()) { 1795 if (thread->claim_oops_do(true, _thread_parity)) { 1796 JavaThread* jt = (JavaThread*)thread; 1797 1798 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1799 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1800 // * Alive if on the stack of an executing method 1801 // * Weakly reachable otherwise 1802 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1803 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1804 jt->nmethods_do(&_code_cl); 1805 1806 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1807 } 1808 } else if (thread->is_VM_thread()) { 1809 if (thread->claim_oops_do(true, _thread_parity)) { 1810 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1811 } 1812 } 1813 } 1814 }; 1815 1816 class G1CMRemarkTask: public AbstractGangTask { 1817 private: 1818 G1ConcurrentMark* _cm; 1819 public: 1820 void work(uint worker_id) { 1821 // Since all available tasks are actually started, we should 1822 // only proceed if we're supposed to be active. 1823 if (worker_id < _cm->active_tasks()) { 1824 G1CMTask* task = _cm->task(worker_id); 1825 task->record_start_time(); 1826 { 1827 ResourceMark rm; 1828 HandleMark hm; 1829 1830 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1831 Threads::threads_do(&threads_f); 1832 } 1833 1834 do { 1835 task->do_marking_step(1000000000.0 /* something very large */, 1836 true /* do_termination */, 1837 false /* is_serial */); 1838 } while (task->has_aborted() && !_cm->has_overflown()); 1839 // If we overflow, then we do not want to restart. We instead 1840 // want to abort remark and do concurrent marking again. 1841 task->record_end_time(); 1842 } 1843 } 1844 1845 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1846 AbstractGangTask("Par Remark"), _cm(cm) { 1847 _cm->terminator()->reset_for_reuse(active_workers); 1848 } 1849 }; 1850 1851 void G1ConcurrentMark::checkpointRootsFinalWork() { 1852 ResourceMark rm; 1853 HandleMark hm; 1854 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1855 1856 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1857 1858 g1h->ensure_parsability(false); 1859 1860 // this is remark, so we'll use up all active threads 1861 uint active_workers = g1h->workers()->active_workers(); 1862 set_concurrency_and_phase(active_workers, false /* concurrent */); 1863 // Leave _parallel_marking_threads at it's 1864 // value originally calculated in the G1ConcurrentMark 1865 // constructor and pass values of the active workers 1866 // through the gang in the task. 1867 1868 { 1869 StrongRootsScope srs(active_workers); 1870 1871 G1CMRemarkTask remarkTask(this, active_workers); 1872 // We will start all available threads, even if we decide that the 1873 // active_workers will be fewer. The extra ones will just bail out 1874 // immediately. 1875 g1h->workers()->run_task(&remarkTask); 1876 } 1877 1878 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1879 guarantee(has_overflown() || 1880 satb_mq_set.completed_buffers_num() == 0, 1881 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1882 BOOL_TO_STR(has_overflown()), 1883 satb_mq_set.completed_buffers_num()); 1884 1885 print_stats(); 1886 } 1887 1888 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1889 _prevMarkBitMap->clear_range(mr); 1890 } 1891 1892 HeapRegion* 1893 G1ConcurrentMark::claim_region(uint worker_id) { 1894 // "checkpoint" the finger 1895 HeapWord* finger = _finger; 1896 1897 // _heap_end will not change underneath our feet; it only changes at 1898 // yield points. 1899 while (finger < _heap_end) { 1900 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1901 1902 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1903 // Make sure that the reads below do not float before loading curr_region. 1904 OrderAccess::loadload(); 1905 // Above heap_region_containing may return NULL as we always scan claim 1906 // until the end of the heap. In this case, just jump to the next region. 1907 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1908 1909 // Is the gap between reading the finger and doing the CAS too long? 1910 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1911 if (res == finger && curr_region != NULL) { 1912 // we succeeded 1913 HeapWord* bottom = curr_region->bottom(); 1914 HeapWord* limit = curr_region->next_top_at_mark_start(); 1915 1916 // notice that _finger == end cannot be guaranteed here since, 1917 // someone else might have moved the finger even further 1918 assert(_finger >= end, "the finger should have moved forward"); 1919 1920 if (limit > bottom) { 1921 return curr_region; 1922 } else { 1923 assert(limit == bottom, 1924 "the region limit should be at bottom"); 1925 // we return NULL and the caller should try calling 1926 // claim_region() again. 1927 return NULL; 1928 } 1929 } else { 1930 assert(_finger > finger, "the finger should have moved forward"); 1931 // read it again 1932 finger = _finger; 1933 } 1934 } 1935 1936 return NULL; 1937 } 1938 1939 #ifndef PRODUCT 1940 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1941 private: 1942 G1CollectedHeap* _g1h; 1943 const char* _phase; 1944 int _info; 1945 1946 public: 1947 VerifyNoCSetOops(const char* phase, int info = -1) : 1948 _g1h(G1CollectedHeap::heap()), 1949 _phase(phase), 1950 _info(info) 1951 { } 1952 1953 void operator()(G1TaskQueueEntry task_entry) const { 1954 if (task_entry.is_array_slice()) { 1955 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1956 return; 1957 } 1958 guarantee(task_entry.obj()->is_oop(), 1959 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1960 p2i(task_entry.obj()), _phase, _info); 1961 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1962 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1963 p2i(task_entry.obj()), _phase, _info); 1964 } 1965 }; 1966 1967 void G1ConcurrentMark::verify_no_cset_oops() { 1968 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1969 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 1970 return; 1971 } 1972 1973 // Verify entries on the global mark stack 1974 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1975 1976 // Verify entries on the task queues 1977 for (uint i = 0; i < _max_worker_id; ++i) { 1978 G1CMTaskQueue* queue = _task_queues->queue(i); 1979 queue->iterate(VerifyNoCSetOops("Queue", i)); 1980 } 1981 1982 // Verify the global finger 1983 HeapWord* global_finger = finger(); 1984 if (global_finger != NULL && global_finger < _heap_end) { 1985 // Since we always iterate over all regions, we might get a NULL HeapRegion 1986 // here. 1987 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1988 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1989 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1990 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1991 } 1992 1993 // Verify the task fingers 1994 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 1995 for (uint i = 0; i < parallel_marking_threads(); ++i) { 1996 G1CMTask* task = _tasks[i]; 1997 HeapWord* task_finger = task->finger(); 1998 if (task_finger != NULL && task_finger < _heap_end) { 1999 // See above note on the global finger verification. 2000 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2001 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2002 !task_hr->in_collection_set(), 2003 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2004 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2005 } 2006 } 2007 } 2008 #endif // PRODUCT 2009 void G1ConcurrentMark::create_live_data() { 2010 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 2011 } 2012 2013 void G1ConcurrentMark::finalize_live_data() { 2014 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 2015 } 2016 2017 void G1ConcurrentMark::verify_live_data() { 2018 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 2019 } 2020 2021 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 2022 _g1h->g1_rem_set()->clear_card_live_data(workers); 2023 } 2024 2025 #ifdef ASSERT 2026 void G1ConcurrentMark::verify_live_data_clear() { 2027 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 2028 } 2029 #endif 2030 2031 void G1ConcurrentMark::print_stats() { 2032 if (!log_is_enabled(Debug, gc, stats)) { 2033 return; 2034 } 2035 log_debug(gc, stats)("---------------------------------------------------------------------"); 2036 for (size_t i = 0; i < _active_tasks; ++i) { 2037 _tasks[i]->print_stats(); 2038 log_debug(gc, stats)("---------------------------------------------------------------------"); 2039 } 2040 } 2041 2042 void G1ConcurrentMark::abort() { 2043 if (!cmThread()->during_cycle() || _has_aborted) { 2044 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2045 return; 2046 } 2047 2048 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2049 // concurrent bitmap clearing. 2050 { 2051 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2052 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2053 } 2054 // Note we cannot clear the previous marking bitmap here 2055 // since VerifyDuringGC verifies the objects marked during 2056 // a full GC against the previous bitmap. 2057 2058 { 2059 GCTraceTime(Debug, gc)("Clear Live Data"); 2060 clear_live_data(_g1h->workers()); 2061 } 2062 DEBUG_ONLY({ 2063 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2064 verify_live_data_clear(); 2065 }) 2066 // Empty mark stack 2067 reset_marking_state(); 2068 for (uint i = 0; i < _max_worker_id; ++i) { 2069 _tasks[i]->clear_region_fields(); 2070 } 2071 _first_overflow_barrier_sync.abort(); 2072 _second_overflow_barrier_sync.abort(); 2073 _has_aborted = true; 2074 2075 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2076 satb_mq_set.abandon_partial_marking(); 2077 // This can be called either during or outside marking, we'll read 2078 // the expected_active value from the SATB queue set. 2079 satb_mq_set.set_active_all_threads( 2080 false, /* new active value */ 2081 satb_mq_set.is_active() /* expected_active */); 2082 } 2083 2084 static void print_ms_time_info(const char* prefix, const char* name, 2085 NumberSeq& ns) { 2086 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2087 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2088 if (ns.num() > 0) { 2089 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2090 prefix, ns.sd(), ns.maximum()); 2091 } 2092 } 2093 2094 void G1ConcurrentMark::print_summary_info() { 2095 Log(gc, marking) log; 2096 if (!log.is_trace()) { 2097 return; 2098 } 2099 2100 log.trace(" Concurrent marking:"); 2101 print_ms_time_info(" ", "init marks", _init_times); 2102 print_ms_time_info(" ", "remarks", _remark_times); 2103 { 2104 print_ms_time_info(" ", "final marks", _remark_mark_times); 2105 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2106 2107 } 2108 print_ms_time_info(" ", "cleanups", _cleanup_times); 2109 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2110 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2111 if (G1ScrubRemSets) { 2112 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2113 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2114 } 2115 log.trace(" Total stop_world time = %8.2f s.", 2116 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2117 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2118 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2119 } 2120 2121 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2122 _parallel_workers->print_worker_threads_on(st); 2123 } 2124 2125 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2126 _parallel_workers->threads_do(tc); 2127 } 2128 2129 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2130 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2131 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2132 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2133 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2134 } 2135 2136 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 2137 assert(addr < _cm->finger(), "invariant"); 2138 assert(addr >= _task->finger(), "invariant"); 2139 2140 // We move that task's local finger along. 2141 _task->move_finger_to(addr); 2142 2143 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 2144 // we only partially drain the local queue and global stack 2145 _task->drain_local_queue(true); 2146 _task->drain_global_stack(true); 2147 2148 // if the has_aborted flag has been raised, we need to bail out of 2149 // the iteration 2150 return !_task->has_aborted(); 2151 } 2152 2153 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2154 ReferenceProcessor* result = g1h->ref_processor_cm(); 2155 assert(result != NULL, "CM reference processor should not be NULL"); 2156 return result; 2157 } 2158 2159 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2160 G1ConcurrentMark* cm, 2161 G1CMTask* task) 2162 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2163 _g1h(g1h), _cm(cm), _task(task) 2164 { } 2165 2166 void G1CMTask::setup_for_region(HeapRegion* hr) { 2167 assert(hr != NULL, 2168 "claim_region() should have filtered out NULL regions"); 2169 _curr_region = hr; 2170 _finger = hr->bottom(); 2171 update_region_limit(); 2172 } 2173 2174 void G1CMTask::update_region_limit() { 2175 HeapRegion* hr = _curr_region; 2176 HeapWord* bottom = hr->bottom(); 2177 HeapWord* limit = hr->next_top_at_mark_start(); 2178 2179 if (limit == bottom) { 2180 // The region was collected underneath our feet. 2181 // We set the finger to bottom to ensure that the bitmap 2182 // iteration that will follow this will not do anything. 2183 // (this is not a condition that holds when we set the region up, 2184 // as the region is not supposed to be empty in the first place) 2185 _finger = bottom; 2186 } else if (limit >= _region_limit) { 2187 assert(limit >= _finger, "peace of mind"); 2188 } else { 2189 assert(limit < _region_limit, "only way to get here"); 2190 // This can happen under some pretty unusual circumstances. An 2191 // evacuation pause empties the region underneath our feet (NTAMS 2192 // at bottom). We then do some allocation in the region (NTAMS 2193 // stays at bottom), followed by the region being used as a GC 2194 // alloc region (NTAMS will move to top() and the objects 2195 // originally below it will be grayed). All objects now marked in 2196 // the region are explicitly grayed, if below the global finger, 2197 // and we do not need in fact to scan anything else. So, we simply 2198 // set _finger to be limit to ensure that the bitmap iteration 2199 // doesn't do anything. 2200 _finger = limit; 2201 } 2202 2203 _region_limit = limit; 2204 } 2205 2206 void G1CMTask::giveup_current_region() { 2207 assert(_curr_region != NULL, "invariant"); 2208 clear_region_fields(); 2209 } 2210 2211 void G1CMTask::clear_region_fields() { 2212 // Values for these three fields that indicate that we're not 2213 // holding on to a region. 2214 _curr_region = NULL; 2215 _finger = NULL; 2216 _region_limit = NULL; 2217 } 2218 2219 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2220 if (cm_oop_closure == NULL) { 2221 assert(_cm_oop_closure != NULL, "invariant"); 2222 } else { 2223 assert(_cm_oop_closure == NULL, "invariant"); 2224 } 2225 _cm_oop_closure = cm_oop_closure; 2226 } 2227 2228 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2229 guarantee(nextMarkBitMap != NULL, "invariant"); 2230 _nextMarkBitMap = nextMarkBitMap; 2231 clear_region_fields(); 2232 2233 _calls = 0; 2234 _elapsed_time_ms = 0.0; 2235 _termination_time_ms = 0.0; 2236 _termination_start_time_ms = 0.0; 2237 } 2238 2239 bool G1CMTask::should_exit_termination() { 2240 regular_clock_call(); 2241 // This is called when we are in the termination protocol. We should 2242 // quit if, for some reason, this task wants to abort or the global 2243 // stack is not empty (this means that we can get work from it). 2244 return !_cm->mark_stack_empty() || has_aborted(); 2245 } 2246 2247 void G1CMTask::reached_limit() { 2248 assert(_words_scanned >= _words_scanned_limit || 2249 _refs_reached >= _refs_reached_limit , 2250 "shouldn't have been called otherwise"); 2251 regular_clock_call(); 2252 } 2253 2254 void G1CMTask::regular_clock_call() { 2255 if (has_aborted()) return; 2256 2257 // First, we need to recalculate the words scanned and refs reached 2258 // limits for the next clock call. 2259 recalculate_limits(); 2260 2261 // During the regular clock call we do the following 2262 2263 // (1) If an overflow has been flagged, then we abort. 2264 if (_cm->has_overflown()) { 2265 set_has_aborted(); 2266 return; 2267 } 2268 2269 // If we are not concurrent (i.e. we're doing remark) we don't need 2270 // to check anything else. The other steps are only needed during 2271 // the concurrent marking phase. 2272 if (!concurrent()) return; 2273 2274 // (2) If marking has been aborted for Full GC, then we also abort. 2275 if (_cm->has_aborted()) { 2276 set_has_aborted(); 2277 return; 2278 } 2279 2280 double curr_time_ms = os::elapsedVTime() * 1000.0; 2281 2282 // (4) We check whether we should yield. If we have to, then we abort. 2283 if (SuspendibleThreadSet::should_yield()) { 2284 // We should yield. To do this we abort the task. The caller is 2285 // responsible for yielding. 2286 set_has_aborted(); 2287 return; 2288 } 2289 2290 // (5) We check whether we've reached our time quota. If we have, 2291 // then we abort. 2292 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2293 if (elapsed_time_ms > _time_target_ms) { 2294 set_has_aborted(); 2295 _has_timed_out = true; 2296 return; 2297 } 2298 2299 // (6) Finally, we check whether there are enough completed STAB 2300 // buffers available for processing. If there are, we abort. 2301 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2302 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2303 // we do need to process SATB buffers, we'll abort and restart 2304 // the marking task to do so 2305 set_has_aborted(); 2306 return; 2307 } 2308 } 2309 2310 void G1CMTask::recalculate_limits() { 2311 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2312 _words_scanned_limit = _real_words_scanned_limit; 2313 2314 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2315 _refs_reached_limit = _real_refs_reached_limit; 2316 } 2317 2318 void G1CMTask::decrease_limits() { 2319 // This is called when we believe that we're going to do an infrequent 2320 // operation which will increase the per byte scanned cost (i.e. move 2321 // entries to/from the global stack). It basically tries to decrease the 2322 // scanning limit so that the clock is called earlier. 2323 2324 _words_scanned_limit = _real_words_scanned_limit - 2325 3 * words_scanned_period / 4; 2326 _refs_reached_limit = _real_refs_reached_limit - 2327 3 * refs_reached_period / 4; 2328 } 2329 2330 void G1CMTask::move_entries_to_global_stack() { 2331 // Local array where we'll store the entries that will be popped 2332 // from the local queue. 2333 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2334 2335 size_t n = 0; 2336 G1TaskQueueEntry task_entry; 2337 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2338 buffer[n] = task_entry; 2339 ++n; 2340 } 2341 if (n < G1CMMarkStack::EntriesPerChunk) { 2342 buffer[n] = G1TaskQueueEntry(); 2343 } 2344 2345 if (n > 0) { 2346 if (!_cm->mark_stack_push(buffer)) { 2347 set_has_aborted(); 2348 } 2349 } 2350 2351 // This operation was quite expensive, so decrease the limits. 2352 decrease_limits(); 2353 } 2354 2355 bool G1CMTask::get_entries_from_global_stack() { 2356 // Local array where we'll store the entries that will be popped 2357 // from the global stack. 2358 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2359 2360 if (!_cm->mark_stack_pop(buffer)) { 2361 return false; 2362 } 2363 2364 // We did actually pop at least one entry. 2365 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2366 G1TaskQueueEntry task_entry = buffer[i]; 2367 if (task_entry.is_null()) { 2368 break; 2369 } 2370 assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2371 bool success = _task_queue->push(task_entry); 2372 // We only call this when the local queue is empty or under a 2373 // given target limit. So, we do not expect this push to fail. 2374 assert(success, "invariant"); 2375 } 2376 2377 // This operation was quite expensive, so decrease the limits 2378 decrease_limits(); 2379 return true; 2380 } 2381 2382 void G1CMTask::drain_local_queue(bool partially) { 2383 if (has_aborted()) { 2384 return; 2385 } 2386 2387 // Decide what the target size is, depending whether we're going to 2388 // drain it partially (so that other tasks can steal if they run out 2389 // of things to do) or totally (at the very end). 2390 size_t target_size; 2391 if (partially) { 2392 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2393 } else { 2394 target_size = 0; 2395 } 2396 2397 if (_task_queue->size() > target_size) { 2398 G1TaskQueueEntry entry; 2399 bool ret = _task_queue->pop_local(entry); 2400 while (ret) { 2401 scan_task_entry(entry); 2402 if (_task_queue->size() <= target_size || has_aborted()) { 2403 ret = false; 2404 } else { 2405 ret = _task_queue->pop_local(entry); 2406 } 2407 } 2408 } 2409 } 2410 2411 void G1CMTask::drain_global_stack(bool partially) { 2412 if (has_aborted()) return; 2413 2414 // We have a policy to drain the local queue before we attempt to 2415 // drain the global stack. 2416 assert(partially || _task_queue->size() == 0, "invariant"); 2417 2418 // Decide what the target size is, depending whether we're going to 2419 // drain it partially (so that other tasks can steal if they run out 2420 // of things to do) or totally (at the very end). 2421 // Notice that when draining the global mark stack partially, due to the racyness 2422 // of the mark stack size update we might in fact drop below the target. But, 2423 // this is not a problem. 2424 // In case of total draining, we simply process until the global mark stack is 2425 // totally empty, disregarding the size counter. 2426 if (partially) { 2427 size_t const target_size = _cm->partial_mark_stack_size_target(); 2428 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2429 if (get_entries_from_global_stack()) { 2430 drain_local_queue(partially); 2431 } 2432 } 2433 } else { 2434 while (!has_aborted() && get_entries_from_global_stack()) { 2435 drain_local_queue(partially); 2436 } 2437 } 2438 } 2439 2440 // SATB Queue has several assumptions on whether to call the par or 2441 // non-par versions of the methods. this is why some of the code is 2442 // replicated. We should really get rid of the single-threaded version 2443 // of the code to simplify things. 2444 void G1CMTask::drain_satb_buffers() { 2445 if (has_aborted()) return; 2446 2447 // We set this so that the regular clock knows that we're in the 2448 // middle of draining buffers and doesn't set the abort flag when it 2449 // notices that SATB buffers are available for draining. It'd be 2450 // very counter productive if it did that. :-) 2451 _draining_satb_buffers = true; 2452 2453 G1CMSATBBufferClosure satb_cl(this, _g1h); 2454 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2455 2456 // This keeps claiming and applying the closure to completed buffers 2457 // until we run out of buffers or we need to abort. 2458 while (!has_aborted() && 2459 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2460 regular_clock_call(); 2461 } 2462 2463 _draining_satb_buffers = false; 2464 2465 assert(has_aborted() || 2466 concurrent() || 2467 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2468 2469 // again, this was a potentially expensive operation, decrease the 2470 // limits to get the regular clock call early 2471 decrease_limits(); 2472 } 2473 2474 void G1CMTask::print_stats() { 2475 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2476 _worker_id, _calls); 2477 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2478 _elapsed_time_ms, _termination_time_ms); 2479 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2480 _step_times_ms.num(), _step_times_ms.avg(), 2481 _step_times_ms.sd()); 2482 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2483 _step_times_ms.maximum(), _step_times_ms.sum()); 2484 } 2485 2486 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2487 return _task_queues->steal(worker_id, hash_seed, task_entry); 2488 } 2489 2490 /***************************************************************************** 2491 2492 The do_marking_step(time_target_ms, ...) method is the building 2493 block of the parallel marking framework. It can be called in parallel 2494 with other invocations of do_marking_step() on different tasks 2495 (but only one per task, obviously) and concurrently with the 2496 mutator threads, or during remark, hence it eliminates the need 2497 for two versions of the code. When called during remark, it will 2498 pick up from where the task left off during the concurrent marking 2499 phase. Interestingly, tasks are also claimable during evacuation 2500 pauses too, since do_marking_step() ensures that it aborts before 2501 it needs to yield. 2502 2503 The data structures that it uses to do marking work are the 2504 following: 2505 2506 (1) Marking Bitmap. If there are gray objects that appear only 2507 on the bitmap (this happens either when dealing with an overflow 2508 or when the initial marking phase has simply marked the roots 2509 and didn't push them on the stack), then tasks claim heap 2510 regions whose bitmap they then scan to find gray objects. A 2511 global finger indicates where the end of the last claimed region 2512 is. A local finger indicates how far into the region a task has 2513 scanned. The two fingers are used to determine how to gray an 2514 object (i.e. whether simply marking it is OK, as it will be 2515 visited by a task in the future, or whether it needs to be also 2516 pushed on a stack). 2517 2518 (2) Local Queue. The local queue of the task which is accessed 2519 reasonably efficiently by the task. Other tasks can steal from 2520 it when they run out of work. Throughout the marking phase, a 2521 task attempts to keep its local queue short but not totally 2522 empty, so that entries are available for stealing by other 2523 tasks. Only when there is no more work, a task will totally 2524 drain its local queue. 2525 2526 (3) Global Mark Stack. This handles local queue overflow. During 2527 marking only sets of entries are moved between it and the local 2528 queues, as access to it requires a mutex and more fine-grain 2529 interaction with it which might cause contention. If it 2530 overflows, then the marking phase should restart and iterate 2531 over the bitmap to identify gray objects. Throughout the marking 2532 phase, tasks attempt to keep the global mark stack at a small 2533 length but not totally empty, so that entries are available for 2534 popping by other tasks. Only when there is no more work, tasks 2535 will totally drain the global mark stack. 2536 2537 (4) SATB Buffer Queue. This is where completed SATB buffers are 2538 made available. Buffers are regularly removed from this queue 2539 and scanned for roots, so that the queue doesn't get too 2540 long. During remark, all completed buffers are processed, as 2541 well as the filled in parts of any uncompleted buffers. 2542 2543 The do_marking_step() method tries to abort when the time target 2544 has been reached. There are a few other cases when the 2545 do_marking_step() method also aborts: 2546 2547 (1) When the marking phase has been aborted (after a Full GC). 2548 2549 (2) When a global overflow (on the global stack) has been 2550 triggered. Before the task aborts, it will actually sync up with 2551 the other tasks to ensure that all the marking data structures 2552 (local queues, stacks, fingers etc.) are re-initialized so that 2553 when do_marking_step() completes, the marking phase can 2554 immediately restart. 2555 2556 (3) When enough completed SATB buffers are available. The 2557 do_marking_step() method only tries to drain SATB buffers right 2558 at the beginning. So, if enough buffers are available, the 2559 marking step aborts and the SATB buffers are processed at 2560 the beginning of the next invocation. 2561 2562 (4) To yield. when we have to yield then we abort and yield 2563 right at the end of do_marking_step(). This saves us from a lot 2564 of hassle as, by yielding we might allow a Full GC. If this 2565 happens then objects will be compacted underneath our feet, the 2566 heap might shrink, etc. We save checking for this by just 2567 aborting and doing the yield right at the end. 2568 2569 From the above it follows that the do_marking_step() method should 2570 be called in a loop (or, otherwise, regularly) until it completes. 2571 2572 If a marking step completes without its has_aborted() flag being 2573 true, it means it has completed the current marking phase (and 2574 also all other marking tasks have done so and have all synced up). 2575 2576 A method called regular_clock_call() is invoked "regularly" (in 2577 sub ms intervals) throughout marking. It is this clock method that 2578 checks all the abort conditions which were mentioned above and 2579 decides when the task should abort. A work-based scheme is used to 2580 trigger this clock method: when the number of object words the 2581 marking phase has scanned or the number of references the marking 2582 phase has visited reach a given limit. Additional invocations to 2583 the method clock have been planted in a few other strategic places 2584 too. The initial reason for the clock method was to avoid calling 2585 vtime too regularly, as it is quite expensive. So, once it was in 2586 place, it was natural to piggy-back all the other conditions on it 2587 too and not constantly check them throughout the code. 2588 2589 If do_termination is true then do_marking_step will enter its 2590 termination protocol. 2591 2592 The value of is_serial must be true when do_marking_step is being 2593 called serially (i.e. by the VMThread) and do_marking_step should 2594 skip any synchronization in the termination and overflow code. 2595 Examples include the serial remark code and the serial reference 2596 processing closures. 2597 2598 The value of is_serial must be false when do_marking_step is 2599 being called by any of the worker threads in a work gang. 2600 Examples include the concurrent marking code (CMMarkingTask), 2601 the MT remark code, and the MT reference processing closures. 2602 2603 *****************************************************************************/ 2604 2605 void G1CMTask::do_marking_step(double time_target_ms, 2606 bool do_termination, 2607 bool is_serial) { 2608 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2609 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2610 2611 G1Policy* g1_policy = _g1h->g1_policy(); 2612 assert(_task_queues != NULL, "invariant"); 2613 assert(_task_queue != NULL, "invariant"); 2614 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2615 2616 assert(!_claimed, 2617 "only one thread should claim this task at any one time"); 2618 2619 // OK, this doesn't safeguard again all possible scenarios, as it is 2620 // possible for two threads to set the _claimed flag at the same 2621 // time. But it is only for debugging purposes anyway and it will 2622 // catch most problems. 2623 _claimed = true; 2624 2625 _start_time_ms = os::elapsedVTime() * 1000.0; 2626 2627 // If do_stealing is true then do_marking_step will attempt to 2628 // steal work from the other G1CMTasks. It only makes sense to 2629 // enable stealing when the termination protocol is enabled 2630 // and do_marking_step() is not being called serially. 2631 bool do_stealing = do_termination && !is_serial; 2632 2633 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2634 _time_target_ms = time_target_ms - diff_prediction_ms; 2635 2636 // set up the variables that are used in the work-based scheme to 2637 // call the regular clock method 2638 _words_scanned = 0; 2639 _refs_reached = 0; 2640 recalculate_limits(); 2641 2642 // clear all flags 2643 clear_has_aborted(); 2644 _has_timed_out = false; 2645 _draining_satb_buffers = false; 2646 2647 ++_calls; 2648 2649 // Set up the bitmap and oop closures. Anything that uses them is 2650 // eventually called from this method, so it is OK to allocate these 2651 // statically. 2652 G1CMBitMapClosure bitmap_closure(this, _cm); 2653 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2654 set_cm_oop_closure(&cm_oop_closure); 2655 2656 if (_cm->has_overflown()) { 2657 // This can happen if the mark stack overflows during a GC pause 2658 // and this task, after a yield point, restarts. We have to abort 2659 // as we need to get into the overflow protocol which happens 2660 // right at the end of this task. 2661 set_has_aborted(); 2662 } 2663 2664 // First drain any available SATB buffers. After this, we will not 2665 // look at SATB buffers before the next invocation of this method. 2666 // If enough completed SATB buffers are queued up, the regular clock 2667 // will abort this task so that it restarts. 2668 drain_satb_buffers(); 2669 // ...then partially drain the local queue and the global stack 2670 drain_local_queue(true); 2671 drain_global_stack(true); 2672 2673 do { 2674 if (!has_aborted() && _curr_region != NULL) { 2675 // This means that we're already holding on to a region. 2676 assert(_finger != NULL, "if region is not NULL, then the finger " 2677 "should not be NULL either"); 2678 2679 // We might have restarted this task after an evacuation pause 2680 // which might have evacuated the region we're holding on to 2681 // underneath our feet. Let's read its limit again to make sure 2682 // that we do not iterate over a region of the heap that 2683 // contains garbage (update_region_limit() will also move 2684 // _finger to the start of the region if it is found empty). 2685 update_region_limit(); 2686 // We will start from _finger not from the start of the region, 2687 // as we might be restarting this task after aborting half-way 2688 // through scanning this region. In this case, _finger points to 2689 // the address where we last found a marked object. If this is a 2690 // fresh region, _finger points to start(). 2691 MemRegion mr = MemRegion(_finger, _region_limit); 2692 2693 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2694 "humongous regions should go around loop once only"); 2695 2696 // Some special cases: 2697 // If the memory region is empty, we can just give up the region. 2698 // If the current region is humongous then we only need to check 2699 // the bitmap for the bit associated with the start of the object, 2700 // scan the object if it's live, and give up the region. 2701 // Otherwise, let's iterate over the bitmap of the part of the region 2702 // that is left. 2703 // If the iteration is successful, give up the region. 2704 if (mr.is_empty()) { 2705 giveup_current_region(); 2706 regular_clock_call(); 2707 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2708 if (_nextMarkBitMap->is_marked(mr.start())) { 2709 // The object is marked - apply the closure 2710 bitmap_closure.do_addr(mr.start()); 2711 } 2712 // Even if this task aborted while scanning the humongous object 2713 // we can (and should) give up the current region. 2714 giveup_current_region(); 2715 regular_clock_call(); 2716 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2717 giveup_current_region(); 2718 regular_clock_call(); 2719 } else { 2720 assert(has_aborted(), "currently the only way to do so"); 2721 // The only way to abort the bitmap iteration is to return 2722 // false from the do_bit() method. However, inside the 2723 // do_bit() method we move the _finger to point to the 2724 // object currently being looked at. So, if we bail out, we 2725 // have definitely set _finger to something non-null. 2726 assert(_finger != NULL, "invariant"); 2727 2728 // Region iteration was actually aborted. So now _finger 2729 // points to the address of the object we last scanned. If we 2730 // leave it there, when we restart this task, we will rescan 2731 // the object. It is easy to avoid this. We move the finger by 2732 // enough to point to the next possible object header (the 2733 // bitmap knows by how much we need to move it as it knows its 2734 // granularity). 2735 assert(_finger < _region_limit, "invariant"); 2736 HeapWord* new_finger = _nextMarkBitMap->addr_after_obj(_finger); 2737 // Check if bitmap iteration was aborted while scanning the last object 2738 if (new_finger >= _region_limit) { 2739 giveup_current_region(); 2740 } else { 2741 move_finger_to(new_finger); 2742 } 2743 } 2744 } 2745 // At this point we have either completed iterating over the 2746 // region we were holding on to, or we have aborted. 2747 2748 // We then partially drain the local queue and the global stack. 2749 // (Do we really need this?) 2750 drain_local_queue(true); 2751 drain_global_stack(true); 2752 2753 // Read the note on the claim_region() method on why it might 2754 // return NULL with potentially more regions available for 2755 // claiming and why we have to check out_of_regions() to determine 2756 // whether we're done or not. 2757 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2758 // We are going to try to claim a new region. We should have 2759 // given up on the previous one. 2760 // Separated the asserts so that we know which one fires. 2761 assert(_curr_region == NULL, "invariant"); 2762 assert(_finger == NULL, "invariant"); 2763 assert(_region_limit == NULL, "invariant"); 2764 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2765 if (claimed_region != NULL) { 2766 // Yes, we managed to claim one 2767 setup_for_region(claimed_region); 2768 assert(_curr_region == claimed_region, "invariant"); 2769 } 2770 // It is important to call the regular clock here. It might take 2771 // a while to claim a region if, for example, we hit a large 2772 // block of empty regions. So we need to call the regular clock 2773 // method once round the loop to make sure it's called 2774 // frequently enough. 2775 regular_clock_call(); 2776 } 2777 2778 if (!has_aborted() && _curr_region == NULL) { 2779 assert(_cm->out_of_regions(), 2780 "at this point we should be out of regions"); 2781 } 2782 } while ( _curr_region != NULL && !has_aborted()); 2783 2784 if (!has_aborted()) { 2785 // We cannot check whether the global stack is empty, since other 2786 // tasks might be pushing objects to it concurrently. 2787 assert(_cm->out_of_regions(), 2788 "at this point we should be out of regions"); 2789 // Try to reduce the number of available SATB buffers so that 2790 // remark has less work to do. 2791 drain_satb_buffers(); 2792 } 2793 2794 // Since we've done everything else, we can now totally drain the 2795 // local queue and global stack. 2796 drain_local_queue(false); 2797 drain_global_stack(false); 2798 2799 // Attempt at work stealing from other task's queues. 2800 if (do_stealing && !has_aborted()) { 2801 // We have not aborted. This means that we have finished all that 2802 // we could. Let's try to do some stealing... 2803 2804 // We cannot check whether the global stack is empty, since other 2805 // tasks might be pushing objects to it concurrently. 2806 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2807 "only way to reach here"); 2808 while (!has_aborted()) { 2809 G1TaskQueueEntry entry; 2810 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2811 scan_task_entry(entry); 2812 2813 // And since we're towards the end, let's totally drain the 2814 // local queue and global stack. 2815 drain_local_queue(false); 2816 drain_global_stack(false); 2817 } else { 2818 break; 2819 } 2820 } 2821 } 2822 2823 // We still haven't aborted. Now, let's try to get into the 2824 // termination protocol. 2825 if (do_termination && !has_aborted()) { 2826 // We cannot check whether the global stack is empty, since other 2827 // tasks might be concurrently pushing objects on it. 2828 // Separated the asserts so that we know which one fires. 2829 assert(_cm->out_of_regions(), "only way to reach here"); 2830 assert(_task_queue->size() == 0, "only way to reach here"); 2831 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2832 2833 // The G1CMTask class also extends the TerminatorTerminator class, 2834 // hence its should_exit_termination() method will also decide 2835 // whether to exit the termination protocol or not. 2836 bool finished = (is_serial || 2837 _cm->terminator()->offer_termination(this)); 2838 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2839 _termination_time_ms += 2840 termination_end_time_ms - _termination_start_time_ms; 2841 2842 if (finished) { 2843 // We're all done. 2844 2845 if (_worker_id == 0) { 2846 // let's allow task 0 to do this 2847 if (concurrent()) { 2848 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2849 // we need to set this to false before the next 2850 // safepoint. This way we ensure that the marking phase 2851 // doesn't observe any more heap expansions. 2852 _cm->clear_concurrent_marking_in_progress(); 2853 } 2854 } 2855 2856 // We can now guarantee that the global stack is empty, since 2857 // all other tasks have finished. We separated the guarantees so 2858 // that, if a condition is false, we can immediately find out 2859 // which one. 2860 guarantee(_cm->out_of_regions(), "only way to reach here"); 2861 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2862 guarantee(_task_queue->size() == 0, "only way to reach here"); 2863 guarantee(!_cm->has_overflown(), "only way to reach here"); 2864 } else { 2865 // Apparently there's more work to do. Let's abort this task. It 2866 // will restart it and we can hopefully find more things to do. 2867 set_has_aborted(); 2868 } 2869 } 2870 2871 // Mainly for debugging purposes to make sure that a pointer to the 2872 // closure which was statically allocated in this frame doesn't 2873 // escape it by accident. 2874 set_cm_oop_closure(NULL); 2875 double end_time_ms = os::elapsedVTime() * 1000.0; 2876 double elapsed_time_ms = end_time_ms - _start_time_ms; 2877 // Update the step history. 2878 _step_times_ms.add(elapsed_time_ms); 2879 2880 if (has_aborted()) { 2881 // The task was aborted for some reason. 2882 if (_has_timed_out) { 2883 double diff_ms = elapsed_time_ms - _time_target_ms; 2884 // Keep statistics of how well we did with respect to hitting 2885 // our target only if we actually timed out (if we aborted for 2886 // other reasons, then the results might get skewed). 2887 _marking_step_diffs_ms.add(diff_ms); 2888 } 2889 2890 if (_cm->has_overflown()) { 2891 // This is the interesting one. We aborted because a global 2892 // overflow was raised. This means we have to restart the 2893 // marking phase and start iterating over regions. However, in 2894 // order to do this we have to make sure that all tasks stop 2895 // what they are doing and re-initialize in a safe manner. We 2896 // will achieve this with the use of two barrier sync points. 2897 2898 if (!is_serial) { 2899 // We only need to enter the sync barrier if being called 2900 // from a parallel context 2901 _cm->enter_first_sync_barrier(_worker_id); 2902 2903 // When we exit this sync barrier we know that all tasks have 2904 // stopped doing marking work. So, it's now safe to 2905 // re-initialize our data structures. At the end of this method, 2906 // task 0 will clear the global data structures. 2907 } 2908 2909 // We clear the local state of this task... 2910 clear_region_fields(); 2911 2912 if (!is_serial) { 2913 // ...and enter the second barrier. 2914 _cm->enter_second_sync_barrier(_worker_id); 2915 } 2916 // At this point, if we're during the concurrent phase of 2917 // marking, everything has been re-initialized and we're 2918 // ready to restart. 2919 } 2920 } 2921 2922 _claimed = false; 2923 } 2924 2925 G1CMTask::G1CMTask(uint worker_id, 2926 G1ConcurrentMark* cm, 2927 G1CMTaskQueue* task_queue, 2928 G1CMTaskQueueSet* task_queues) 2929 : _g1h(G1CollectedHeap::heap()), 2930 _worker_id(worker_id), _cm(cm), 2931 _objArray_processor(this), 2932 _claimed(false), 2933 _nextMarkBitMap(NULL), _hash_seed(17), 2934 _task_queue(task_queue), 2935 _task_queues(task_queues), 2936 _cm_oop_closure(NULL) { 2937 guarantee(task_queue != NULL, "invariant"); 2938 guarantee(task_queues != NULL, "invariant"); 2939 2940 _marking_step_diffs_ms.add(0.5); 2941 } 2942 2943 // These are formatting macros that are used below to ensure 2944 // consistent formatting. The *_H_* versions are used to format the 2945 // header for a particular value and they should be kept consistent 2946 // with the corresponding macro. Also note that most of the macros add 2947 // the necessary white space (as a prefix) which makes them a bit 2948 // easier to compose. 2949 2950 // All the output lines are prefixed with this string to be able to 2951 // identify them easily in a large log file. 2952 #define G1PPRL_LINE_PREFIX "###" 2953 2954 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2955 #ifdef _LP64 2956 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2957 #else // _LP64 2958 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2959 #endif // _LP64 2960 2961 // For per-region info 2962 #define G1PPRL_TYPE_FORMAT " %-4s" 2963 #define G1PPRL_TYPE_H_FORMAT " %4s" 2964 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2965 #define G1PPRL_BYTE_H_FORMAT " %9s" 2966 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2967 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2968 2969 // For summary info 2970 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2971 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2972 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2973 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2974 2975 G1PrintRegionLivenessInfoClosure:: 2976 G1PrintRegionLivenessInfoClosure(const char* phase_name) 2977 : _total_used_bytes(0), _total_capacity_bytes(0), 2978 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2979 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 2980 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2981 MemRegion g1_reserved = g1h->g1_reserved(); 2982 double now = os::elapsedTime(); 2983 2984 // Print the header of the output. 2985 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2986 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2987 G1PPRL_SUM_ADDR_FORMAT("reserved") 2988 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2989 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2990 HeapRegion::GrainBytes); 2991 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2992 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2993 G1PPRL_TYPE_H_FORMAT 2994 G1PPRL_ADDR_BASE_H_FORMAT 2995 G1PPRL_BYTE_H_FORMAT 2996 G1PPRL_BYTE_H_FORMAT 2997 G1PPRL_BYTE_H_FORMAT 2998 G1PPRL_DOUBLE_H_FORMAT 2999 G1PPRL_BYTE_H_FORMAT 3000 G1PPRL_BYTE_H_FORMAT, 3001 "type", "address-range", 3002 "used", "prev-live", "next-live", "gc-eff", 3003 "remset", "code-roots"); 3004 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3005 G1PPRL_TYPE_H_FORMAT 3006 G1PPRL_ADDR_BASE_H_FORMAT 3007 G1PPRL_BYTE_H_FORMAT 3008 G1PPRL_BYTE_H_FORMAT 3009 G1PPRL_BYTE_H_FORMAT 3010 G1PPRL_DOUBLE_H_FORMAT 3011 G1PPRL_BYTE_H_FORMAT 3012 G1PPRL_BYTE_H_FORMAT, 3013 "", "", 3014 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3015 "(bytes)", "(bytes)"); 3016 } 3017 3018 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3019 const char* type = r->get_type_str(); 3020 HeapWord* bottom = r->bottom(); 3021 HeapWord* end = r->end(); 3022 size_t capacity_bytes = r->capacity(); 3023 size_t used_bytes = r->used(); 3024 size_t prev_live_bytes = r->live_bytes(); 3025 size_t next_live_bytes = r->next_live_bytes(); 3026 double gc_eff = r->gc_efficiency(); 3027 size_t remset_bytes = r->rem_set()->mem_size(); 3028 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3029 3030 _total_used_bytes += used_bytes; 3031 _total_capacity_bytes += capacity_bytes; 3032 _total_prev_live_bytes += prev_live_bytes; 3033 _total_next_live_bytes += next_live_bytes; 3034 _total_remset_bytes += remset_bytes; 3035 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3036 3037 // Print a line for this particular region. 3038 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3039 G1PPRL_TYPE_FORMAT 3040 G1PPRL_ADDR_BASE_FORMAT 3041 G1PPRL_BYTE_FORMAT 3042 G1PPRL_BYTE_FORMAT 3043 G1PPRL_BYTE_FORMAT 3044 G1PPRL_DOUBLE_FORMAT 3045 G1PPRL_BYTE_FORMAT 3046 G1PPRL_BYTE_FORMAT, 3047 type, p2i(bottom), p2i(end), 3048 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3049 remset_bytes, strong_code_roots_bytes); 3050 3051 return false; 3052 } 3053 3054 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3055 // add static memory usages to remembered set sizes 3056 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3057 // Print the footer of the output. 3058 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3059 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3060 " SUMMARY" 3061 G1PPRL_SUM_MB_FORMAT("capacity") 3062 G1PPRL_SUM_MB_PERC_FORMAT("used") 3063 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3064 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3065 G1PPRL_SUM_MB_FORMAT("remset") 3066 G1PPRL_SUM_MB_FORMAT("code-roots"), 3067 bytes_to_mb(_total_capacity_bytes), 3068 bytes_to_mb(_total_used_bytes), 3069 perc(_total_used_bytes, _total_capacity_bytes), 3070 bytes_to_mb(_total_prev_live_bytes), 3071 perc(_total_prev_live_bytes, _total_capacity_bytes), 3072 bytes_to_mb(_total_next_live_bytes), 3073 perc(_total_next_live_bytes, _total_capacity_bytes), 3074 bytes_to_mb(_total_remset_bytes), 3075 bytes_to_mb(_total_strong_code_roots_bytes)); 3076 }