1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 #include "utilities/growableArray.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 G1CMBitMapRO::G1CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 assert(limit != NULL, "limit must not be NULL"); 78 size_t limitOffset = heapWordToOffset(limit); 79 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 80 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 81 assert(nextAddr >= addr, "get_next_one postcondition"); 82 assert(nextAddr == limit || isMarked(nextAddr), 83 "get_next_one postcondition"); 84 return nextAddr; 85 } 86 87 #ifndef PRODUCT 88 bool G1CMBitMapRO::covers(MemRegion heap_rs) const { 89 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 90 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 91 "size inconsistency"); 92 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 93 _bmWordSize == heap_rs.word_size(); 94 } 95 #endif 96 97 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 98 _bm.print_on_error(st, prefix); 99 } 100 101 size_t G1CMBitMap::compute_size(size_t heap_size) { 102 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 103 } 104 105 size_t G1CMBitMap::mark_distance() { 106 return MinObjAlignmentInBytes * BitsPerByte; 107 } 108 109 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 110 _bmStartWord = heap.start(); 111 _bmWordSize = heap.word_size(); 112 113 _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter); 114 115 storage->set_mapping_changed_listener(&_listener); 116 } 117 118 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 119 if (zero_filled) { 120 return; 121 } 122 // We need to clear the bitmap on commit, removing any existing information. 123 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 124 _bm->clear_range(mr); 125 } 126 127 void G1CMBitMap::clear_range(MemRegion mr) { 128 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 129 assert(!mr.is_empty(), "unexpected empty region"); 130 // convert address range into offset range 131 _bm.at_put_range(heapWordToOffset(mr.start()), 132 heapWordToOffset(mr.end()), false); 133 } 134 135 G1CMMarkStack::G1CMMarkStack() : 136 _max_chunk_capacity(0), 137 _base(NULL), 138 _chunk_capacity(0), 139 _should_expand(false) { 140 set_empty(); 141 } 142 143 bool G1CMMarkStack::resize(size_t new_capacity) { 144 assert(is_empty(), "Only resize when stack is empty."); 145 assert(new_capacity <= _max_chunk_capacity, 146 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 147 148 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity); 149 150 if (new_base == NULL) { 151 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 152 return false; 153 } 154 // Release old mapping. 155 if (_base != NULL) { 156 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 157 } 158 159 _base = new_base; 160 _chunk_capacity = new_capacity; 161 set_empty(); 162 _should_expand = false; 163 164 return true; 165 } 166 167 size_t G1CMMarkStack::capacity_alignment() { 168 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 169 } 170 171 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 172 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 173 174 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 175 176 _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 177 size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 178 179 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 180 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 181 _max_chunk_capacity, 182 initial_chunk_capacity); 183 184 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 185 initial_chunk_capacity, _max_chunk_capacity); 186 187 return resize(initial_chunk_capacity); 188 } 189 190 void G1CMMarkStack::expand() { 191 // Clear expansion flag 192 _should_expand = false; 193 194 if (_chunk_capacity == _max_chunk_capacity) { 195 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 196 return; 197 } 198 size_t old_capacity = _chunk_capacity; 199 // Double capacity if possible 200 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 201 202 if (resize(new_capacity)) { 203 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 204 old_capacity, new_capacity); 205 } else { 206 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 207 old_capacity, new_capacity); 208 } 209 } 210 211 G1CMMarkStack::~G1CMMarkStack() { 212 if (_base != NULL) { 213 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 214 } 215 } 216 217 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 218 elem->next = *list; 219 *list = elem; 220 } 221 222 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 223 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 224 add_chunk_to_list(&_chunk_list, elem); 225 _chunks_in_chunk_list++; 226 } 227 228 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 229 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 230 add_chunk_to_list(&_free_list, elem); 231 } 232 233 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 234 TaskQueueEntryChunk* result = *list; 235 if (result != NULL) { 236 *list = (*list)->next; 237 } 238 return result; 239 } 240 241 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 242 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 243 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 244 if (result != NULL) { 245 _chunks_in_chunk_list--; 246 } 247 return result; 248 } 249 250 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 251 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 252 return remove_chunk_from_list(&_free_list); 253 } 254 255 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 256 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 257 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 258 // wraparound of _hwm. 259 if (_hwm >= _chunk_capacity) { 260 return NULL; 261 } 262 263 size_t cur_idx = Atomic::add(1, &_hwm) - 1; 264 if (cur_idx >= _chunk_capacity) { 265 return NULL; 266 } 267 268 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 269 result->next = NULL; 270 return result; 271 } 272 273 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 274 // Get a new chunk. 275 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 276 277 if (new_chunk == NULL) { 278 // Did not get a chunk from the free list. Allocate from backing memory. 279 new_chunk = allocate_new_chunk(); 280 281 if (new_chunk == NULL) { 282 return false; 283 } 284 } 285 286 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 287 288 add_chunk_to_chunk_list(new_chunk); 289 290 return true; 291 } 292 293 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 294 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 295 296 if (cur == NULL) { 297 return false; 298 } 299 300 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 301 302 add_chunk_to_free_list(cur); 303 return true; 304 } 305 306 void G1CMMarkStack::set_empty() { 307 _chunks_in_chunk_list = 0; 308 _hwm = 0; 309 _chunk_list = NULL; 310 _free_list = NULL; 311 } 312 313 G1CMRootRegions::G1CMRootRegions() : 314 _cm(NULL), _scan_in_progress(false), 315 _should_abort(false), _claimed_survivor_index(0) { } 316 317 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 318 _survivors = survivors; 319 _cm = cm; 320 } 321 322 void G1CMRootRegions::prepare_for_scan() { 323 assert(!scan_in_progress(), "pre-condition"); 324 325 // Currently, only survivors can be root regions. 326 _claimed_survivor_index = 0; 327 _scan_in_progress = _survivors->regions()->is_nonempty(); 328 _should_abort = false; 329 } 330 331 HeapRegion* G1CMRootRegions::claim_next() { 332 if (_should_abort) { 333 // If someone has set the should_abort flag, we return NULL to 334 // force the caller to bail out of their loop. 335 return NULL; 336 } 337 338 // Currently, only survivors can be root regions. 339 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 340 341 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 342 if (claimed_index < survivor_regions->length()) { 343 return survivor_regions->at(claimed_index); 344 } 345 return NULL; 346 } 347 348 uint G1CMRootRegions::num_root_regions() const { 349 return (uint)_survivors->regions()->length(); 350 } 351 352 void G1CMRootRegions::notify_scan_done() { 353 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 354 _scan_in_progress = false; 355 RootRegionScan_lock->notify_all(); 356 } 357 358 void G1CMRootRegions::cancel_scan() { 359 notify_scan_done(); 360 } 361 362 void G1CMRootRegions::scan_finished() { 363 assert(scan_in_progress(), "pre-condition"); 364 365 // Currently, only survivors can be root regions. 366 if (!_should_abort) { 367 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 368 assert((uint)_claimed_survivor_index >= _survivors->length(), 369 "we should have claimed all survivors, claimed index = %u, length = %u", 370 (uint)_claimed_survivor_index, _survivors->length()); 371 } 372 373 notify_scan_done(); 374 } 375 376 bool G1CMRootRegions::wait_until_scan_finished() { 377 if (!scan_in_progress()) return false; 378 379 { 380 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 381 while (scan_in_progress()) { 382 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 383 } 384 } 385 return true; 386 } 387 388 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 389 return MAX2((n_par_threads + 2) / 4, 1U); 390 } 391 392 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 393 _g1h(g1h), 394 _markBitMap1(), 395 _markBitMap2(), 396 _parallel_marking_threads(0), 397 _max_parallel_marking_threads(0), 398 _sleep_factor(0.0), 399 _marking_task_overhead(1.0), 400 _cleanup_list("Cleanup List"), 401 402 _prevMarkBitMap(&_markBitMap1), 403 _nextMarkBitMap(&_markBitMap2), 404 405 _global_mark_stack(), 406 // _finger set in set_non_marking_state 407 408 _max_worker_id(ParallelGCThreads), 409 // _active_tasks set in set_non_marking_state 410 // _tasks set inside the constructor 411 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 412 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 413 414 _has_overflown(false), 415 _concurrent(false), 416 _has_aborted(false), 417 _restart_for_overflow(false), 418 _concurrent_marking_in_progress(false), 419 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 420 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 421 422 // _verbose_level set below 423 424 _init_times(), 425 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 426 _cleanup_times(), 427 _total_counting_time(0.0), 428 _total_rs_scrub_time(0.0), 429 430 _parallel_workers(NULL), 431 432 _completed_initialization(false) { 433 434 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 435 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 436 437 // Create & start a ConcurrentMark thread. 438 _cmThread = new ConcurrentMarkThread(this); 439 assert(cmThread() != NULL, "CM Thread should have been created"); 440 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 441 if (_cmThread->osthread() == NULL) { 442 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 443 } 444 445 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 446 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 447 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 448 449 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 450 satb_qs.set_buffer_size(G1SATBBufferSize); 451 452 _root_regions.init(_g1h->survivor(), this); 453 454 if (ConcGCThreads > ParallelGCThreads) { 455 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 456 ConcGCThreads, ParallelGCThreads); 457 return; 458 } 459 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 460 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 461 // if both are set 462 _sleep_factor = 0.0; 463 _marking_task_overhead = 1.0; 464 } else if (G1MarkingOverheadPercent > 0) { 465 // We will calculate the number of parallel marking threads based 466 // on a target overhead with respect to the soft real-time goal 467 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 468 double overall_cm_overhead = 469 (double) MaxGCPauseMillis * marking_overhead / 470 (double) GCPauseIntervalMillis; 471 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 472 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 473 double marking_task_overhead = 474 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 475 double sleep_factor = 476 (1.0 - marking_task_overhead) / marking_task_overhead; 477 478 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 479 _sleep_factor = sleep_factor; 480 _marking_task_overhead = marking_task_overhead; 481 } else { 482 // Calculate the number of parallel marking threads by scaling 483 // the number of parallel GC threads. 484 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 485 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 486 _sleep_factor = 0.0; 487 _marking_task_overhead = 1.0; 488 } 489 490 assert(ConcGCThreads > 0, "Should have been set"); 491 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 492 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 493 _parallel_marking_threads = ConcGCThreads; 494 _max_parallel_marking_threads = _parallel_marking_threads; 495 496 _parallel_workers = new WorkGang("G1 Marker", 497 _max_parallel_marking_threads, false, true); 498 if (_parallel_workers == NULL) { 499 vm_exit_during_initialization("Failed necessary allocation."); 500 } else { 501 _parallel_workers->initialize_workers(); 502 } 503 504 if (FLAG_IS_DEFAULT(MarkStackSize)) { 505 size_t mark_stack_size = 506 MIN2(MarkStackSizeMax, 507 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 508 // Verify that the calculated value for MarkStackSize is in range. 509 // It would be nice to use the private utility routine from Arguments. 510 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 511 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 512 "must be between 1 and " SIZE_FORMAT, 513 mark_stack_size, MarkStackSizeMax); 514 return; 515 } 516 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 517 } else { 518 // Verify MarkStackSize is in range. 519 if (FLAG_IS_CMDLINE(MarkStackSize)) { 520 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 521 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 522 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 523 "must be between 1 and " SIZE_FORMAT, 524 MarkStackSize, MarkStackSizeMax); 525 return; 526 } 527 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 528 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 529 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 530 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 531 MarkStackSize, MarkStackSizeMax); 532 return; 533 } 534 } 535 } 536 } 537 538 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 539 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 540 } 541 542 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 543 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 544 545 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 546 _active_tasks = _max_worker_id; 547 548 for (uint i = 0; i < _max_worker_id; ++i) { 549 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 550 task_queue->initialize(); 551 _task_queues->register_queue(i, task_queue); 552 553 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 554 555 _accum_task_vtime[i] = 0.0; 556 } 557 558 // so that the call below can read a sensible value 559 _heap_start = g1h->reserved_region().start(); 560 set_non_marking_state(); 561 _completed_initialization = true; 562 } 563 564 void G1ConcurrentMark::reset() { 565 // Starting values for these two. This should be called in a STW 566 // phase. 567 MemRegion reserved = _g1h->g1_reserved(); 568 _heap_start = reserved.start(); 569 _heap_end = reserved.end(); 570 571 // Separated the asserts so that we know which one fires. 572 assert(_heap_start != NULL, "heap bounds should look ok"); 573 assert(_heap_end != NULL, "heap bounds should look ok"); 574 assert(_heap_start < _heap_end, "heap bounds should look ok"); 575 576 // Reset all the marking data structures and any necessary flags 577 reset_marking_state(); 578 579 // We do reset all of them, since different phases will use 580 // different number of active threads. So, it's easiest to have all 581 // of them ready. 582 for (uint i = 0; i < _max_worker_id; ++i) { 583 _tasks[i]->reset(_nextMarkBitMap); 584 } 585 586 // we need this to make sure that the flag is on during the evac 587 // pause with initial mark piggy-backed 588 set_concurrent_marking_in_progress(); 589 } 590 591 592 void G1ConcurrentMark::reset_marking_state() { 593 _global_mark_stack.set_should_expand(has_overflown()); 594 _global_mark_stack.set_empty(); 595 clear_has_overflown(); 596 _finger = _heap_start; 597 598 for (uint i = 0; i < _max_worker_id; ++i) { 599 G1CMTaskQueue* queue = _task_queues->queue(i); 600 queue->set_empty(); 601 } 602 } 603 604 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 605 assert(active_tasks <= _max_worker_id, "we should not have more"); 606 607 _active_tasks = active_tasks; 608 // Need to update the three data structures below according to the 609 // number of active threads for this phase. 610 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 611 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 612 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 613 } 614 615 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 616 set_concurrency(active_tasks); 617 618 _concurrent = concurrent; 619 // We propagate this to all tasks, not just the active ones. 620 for (uint i = 0; i < _max_worker_id; ++i) 621 _tasks[i]->set_concurrent(concurrent); 622 623 if (concurrent) { 624 set_concurrent_marking_in_progress(); 625 } else { 626 // We currently assume that the concurrent flag has been set to 627 // false before we start remark. At this point we should also be 628 // in a STW phase. 629 assert(!concurrent_marking_in_progress(), "invariant"); 630 assert(out_of_regions(), 631 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 632 p2i(_finger), p2i(_heap_end)); 633 } 634 } 635 636 void G1ConcurrentMark::set_non_marking_state() { 637 // We set the global marking state to some default values when we're 638 // not doing marking. 639 reset_marking_state(); 640 _active_tasks = 0; 641 clear_concurrent_marking_in_progress(); 642 } 643 644 G1ConcurrentMark::~G1ConcurrentMark() { 645 // The G1ConcurrentMark instance is never freed. 646 ShouldNotReachHere(); 647 } 648 649 class G1ClearBitMapTask : public AbstractGangTask { 650 public: 651 static size_t chunk_size() { return M; } 652 653 private: 654 // Heap region closure used for clearing the given mark bitmap. 655 class G1ClearBitmapHRClosure : public HeapRegionClosure { 656 private: 657 G1CMBitMap* _bitmap; 658 G1ConcurrentMark* _cm; 659 public: 660 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 661 } 662 663 virtual bool doHeapRegion(HeapRegion* r) { 664 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 665 666 HeapWord* cur = r->bottom(); 667 HeapWord* const end = r->end(); 668 669 while (cur < end) { 670 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 671 _bitmap->clear_range(mr); 672 673 cur += chunk_size_in_words; 674 675 // Abort iteration if after yielding the marking has been aborted. 676 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 677 return true; 678 } 679 // Repeat the asserts from before the start of the closure. We will do them 680 // as asserts here to minimize their overhead on the product. However, we 681 // will have them as guarantees at the beginning / end of the bitmap 682 // clearing to get some checking in the product. 683 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 684 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 685 } 686 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 687 688 return false; 689 } 690 }; 691 692 G1ClearBitmapHRClosure _cl; 693 HeapRegionClaimer _hr_claimer; 694 bool _suspendible; // If the task is suspendible, workers must join the STS. 695 696 public: 697 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 698 AbstractGangTask("G1 Clear Bitmap"), 699 _cl(bitmap, suspendible ? cm : NULL), 700 _hr_claimer(n_workers), 701 _suspendible(suspendible) 702 { } 703 704 void work(uint worker_id) { 705 SuspendibleThreadSetJoiner sts_join(_suspendible); 706 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true); 707 } 708 709 bool is_complete() { 710 return _cl.complete(); 711 } 712 }; 713 714 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 715 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 716 717 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 718 size_t const num_chunks = align_size_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 719 720 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 721 722 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 723 724 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 725 workers->run_task(&cl, num_workers); 726 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 727 } 728 729 void G1ConcurrentMark::cleanup_for_next_mark() { 730 // Make sure that the concurrent mark thread looks to still be in 731 // the current cycle. 732 guarantee(cmThread()->during_cycle(), "invariant"); 733 734 // We are finishing up the current cycle by clearing the next 735 // marking bitmap and getting it ready for the next cycle. During 736 // this time no other cycle can start. So, let's make sure that this 737 // is the case. 738 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 739 740 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 741 742 // Clear the live count data. If the marking has been aborted, the abort() 743 // call already did that. 744 if (!has_aborted()) { 745 clear_live_data(_parallel_workers); 746 DEBUG_ONLY(verify_live_data_clear()); 747 } 748 749 // Repeat the asserts from above. 750 guarantee(cmThread()->during_cycle(), "invariant"); 751 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 752 } 753 754 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 755 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 756 clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false); 757 } 758 759 class CheckBitmapClearHRClosure : public HeapRegionClosure { 760 G1CMBitMap* _bitmap; 761 bool _error; 762 public: 763 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 764 } 765 766 virtual bool doHeapRegion(HeapRegion* r) { 767 // This closure can be called concurrently to the mutator, so we must make sure 768 // that the result of the getNextMarkedWordAddress() call is compared to the 769 // value passed to it as limit to detect any found bits. 770 // end never changes in G1. 771 HeapWord* end = r->end(); 772 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 773 } 774 }; 775 776 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 777 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 778 _g1h->heap_region_iterate(&cl); 779 return cl.complete(); 780 } 781 782 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 783 public: 784 bool doHeapRegion(HeapRegion* r) { 785 r->note_start_of_marking(); 786 return false; 787 } 788 }; 789 790 void G1ConcurrentMark::checkpointRootsInitialPre() { 791 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 792 G1Policy* g1p = g1h->g1_policy(); 793 794 _has_aborted = false; 795 796 // Initialize marking structures. This has to be done in a STW phase. 797 reset(); 798 799 // For each region note start of marking. 800 NoteStartOfMarkHRClosure startcl; 801 g1h->heap_region_iterate(&startcl); 802 } 803 804 805 void G1ConcurrentMark::checkpointRootsInitialPost() { 806 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 807 808 // Start Concurrent Marking weak-reference discovery. 809 ReferenceProcessor* rp = g1h->ref_processor_cm(); 810 // enable ("weak") refs discovery 811 rp->enable_discovery(); 812 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 813 814 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 815 // This is the start of the marking cycle, we're expected all 816 // threads to have SATB queues with active set to false. 817 satb_mq_set.set_active_all_threads(true, /* new active value */ 818 false /* expected_active */); 819 820 _root_regions.prepare_for_scan(); 821 822 // update_g1_committed() will be called at the end of an evac pause 823 // when marking is on. So, it's also called at the end of the 824 // initial-mark pause to update the heap end, if the heap expands 825 // during it. No need to call it here. 826 } 827 828 /* 829 * Notice that in the next two methods, we actually leave the STS 830 * during the barrier sync and join it immediately afterwards. If we 831 * do not do this, the following deadlock can occur: one thread could 832 * be in the barrier sync code, waiting for the other thread to also 833 * sync up, whereas another one could be trying to yield, while also 834 * waiting for the other threads to sync up too. 835 * 836 * Note, however, that this code is also used during remark and in 837 * this case we should not attempt to leave / enter the STS, otherwise 838 * we'll either hit an assert (debug / fastdebug) or deadlock 839 * (product). So we should only leave / enter the STS if we are 840 * operating concurrently. 841 * 842 * Because the thread that does the sync barrier has left the STS, it 843 * is possible to be suspended for a Full GC or an evacuation pause 844 * could occur. This is actually safe, since the entering the sync 845 * barrier is one of the last things do_marking_step() does, and it 846 * doesn't manipulate any data structures afterwards. 847 */ 848 849 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 850 bool barrier_aborted; 851 { 852 SuspendibleThreadSetLeaver sts_leave(concurrent()); 853 barrier_aborted = !_first_overflow_barrier_sync.enter(); 854 } 855 856 // at this point everyone should have synced up and not be doing any 857 // more work 858 859 if (barrier_aborted) { 860 // If the barrier aborted we ignore the overflow condition and 861 // just abort the whole marking phase as quickly as possible. 862 return; 863 } 864 865 // If we're executing the concurrent phase of marking, reset the marking 866 // state; otherwise the marking state is reset after reference processing, 867 // during the remark pause. 868 // If we reset here as a result of an overflow during the remark we will 869 // see assertion failures from any subsequent set_concurrency_and_phase() 870 // calls. 871 if (concurrent()) { 872 // let the task associated with with worker 0 do this 873 if (worker_id == 0) { 874 // task 0 is responsible for clearing the global data structures 875 // We should be here because of an overflow. During STW we should 876 // not clear the overflow flag since we rely on it being true when 877 // we exit this method to abort the pause and restart concurrent 878 // marking. 879 reset_marking_state(); 880 881 log_info(gc, marking)("Concurrent Mark reset for overflow"); 882 } 883 } 884 885 // after this, each task should reset its own data structures then 886 // then go into the second barrier 887 } 888 889 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 890 SuspendibleThreadSetLeaver sts_leave(concurrent()); 891 _second_overflow_barrier_sync.enter(); 892 893 // at this point everything should be re-initialized and ready to go 894 } 895 896 class G1CMConcurrentMarkingTask: public AbstractGangTask { 897 private: 898 G1ConcurrentMark* _cm; 899 ConcurrentMarkThread* _cmt; 900 901 public: 902 void work(uint worker_id) { 903 assert(Thread::current()->is_ConcurrentGC_thread(), 904 "this should only be done by a conc GC thread"); 905 ResourceMark rm; 906 907 double start_vtime = os::elapsedVTime(); 908 909 { 910 SuspendibleThreadSetJoiner sts_join; 911 912 assert(worker_id < _cm->active_tasks(), "invariant"); 913 G1CMTask* the_task = _cm->task(worker_id); 914 the_task->record_start_time(); 915 if (!_cm->has_aborted()) { 916 do { 917 double start_vtime_sec = os::elapsedVTime(); 918 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 919 920 the_task->do_marking_step(mark_step_duration_ms, 921 true /* do_termination */, 922 false /* is_serial*/); 923 924 double end_vtime_sec = os::elapsedVTime(); 925 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 926 _cm->clear_has_overflown(); 927 928 _cm->do_yield_check(); 929 930 jlong sleep_time_ms; 931 if (!_cm->has_aborted() && the_task->has_aborted()) { 932 sleep_time_ms = 933 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 934 { 935 SuspendibleThreadSetLeaver sts_leave; 936 os::sleep(Thread::current(), sleep_time_ms, false); 937 } 938 } 939 } while (!_cm->has_aborted() && the_task->has_aborted()); 940 } 941 the_task->record_end_time(); 942 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 943 } 944 945 double end_vtime = os::elapsedVTime(); 946 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 947 } 948 949 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 950 ConcurrentMarkThread* cmt) : 951 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 952 953 ~G1CMConcurrentMarkingTask() { } 954 }; 955 956 // Calculates the number of active workers for a concurrent 957 // phase. 958 uint G1ConcurrentMark::calc_parallel_marking_threads() { 959 uint n_conc_workers = 0; 960 if (!UseDynamicNumberOfGCThreads || 961 (!FLAG_IS_DEFAULT(ConcGCThreads) && 962 !ForceDynamicNumberOfGCThreads)) { 963 n_conc_workers = max_parallel_marking_threads(); 964 } else { 965 n_conc_workers = 966 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 967 1, /* Minimum workers */ 968 parallel_marking_threads(), 969 Threads::number_of_non_daemon_threads()); 970 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 971 // that scaling has already gone into "_max_parallel_marking_threads". 972 } 973 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 974 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 975 max_parallel_marking_threads(), n_conc_workers); 976 return n_conc_workers; 977 } 978 979 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 980 // Currently, only survivors can be root regions. 981 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 982 G1RootRegionScanClosure cl(_g1h, this); 983 984 const uintx interval = PrefetchScanIntervalInBytes; 985 HeapWord* curr = hr->bottom(); 986 const HeapWord* end = hr->top(); 987 while (curr < end) { 988 Prefetch::read(curr, interval); 989 oop obj = oop(curr); 990 int size = obj->oop_iterate_size(&cl); 991 assert(size == obj->size(), "sanity"); 992 curr += size; 993 } 994 } 995 996 class G1CMRootRegionScanTask : public AbstractGangTask { 997 private: 998 G1ConcurrentMark* _cm; 999 1000 public: 1001 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 1002 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 1003 1004 void work(uint worker_id) { 1005 assert(Thread::current()->is_ConcurrentGC_thread(), 1006 "this should only be done by a conc GC thread"); 1007 1008 G1CMRootRegions* root_regions = _cm->root_regions(); 1009 HeapRegion* hr = root_regions->claim_next(); 1010 while (hr != NULL) { 1011 _cm->scanRootRegion(hr); 1012 hr = root_regions->claim_next(); 1013 } 1014 } 1015 }; 1016 1017 void G1ConcurrentMark::scan_root_regions() { 1018 // scan_in_progress() will have been set to true only if there was 1019 // at least one root region to scan. So, if it's false, we 1020 // should not attempt to do any further work. 1021 if (root_regions()->scan_in_progress()) { 1022 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 1023 1024 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 1025 // We distribute work on a per-region basis, so starting 1026 // more threads than that is useless. 1027 root_regions()->num_root_regions()); 1028 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1029 "Maximum number of marking threads exceeded"); 1030 1031 G1CMRootRegionScanTask task(this); 1032 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 1033 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 1034 _parallel_workers->run_task(&task, _parallel_marking_threads); 1035 1036 // It's possible that has_aborted() is true here without actually 1037 // aborting the survivor scan earlier. This is OK as it's 1038 // mainly used for sanity checking. 1039 root_regions()->scan_finished(); 1040 } 1041 } 1042 1043 void G1ConcurrentMark::concurrent_cycle_start() { 1044 _gc_timer_cm->register_gc_start(); 1045 1046 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 1047 1048 _g1h->trace_heap_before_gc(_gc_tracer_cm); 1049 } 1050 1051 void G1ConcurrentMark::concurrent_cycle_end() { 1052 _g1h->trace_heap_after_gc(_gc_tracer_cm); 1053 1054 if (has_aborted()) { 1055 _gc_tracer_cm->report_concurrent_mode_failure(); 1056 } 1057 1058 _gc_timer_cm->register_gc_end(); 1059 1060 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 1061 } 1062 1063 void G1ConcurrentMark::mark_from_roots() { 1064 // we might be tempted to assert that: 1065 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1066 // "inconsistent argument?"); 1067 // However that wouldn't be right, because it's possible that 1068 // a safepoint is indeed in progress as a younger generation 1069 // stop-the-world GC happens even as we mark in this generation. 1070 1071 _restart_for_overflow = false; 1072 1073 // _g1h has _n_par_threads 1074 _parallel_marking_threads = calc_parallel_marking_threads(); 1075 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1076 "Maximum number of marking threads exceeded"); 1077 1078 uint active_workers = MAX2(1U, parallel_marking_threads()); 1079 assert(active_workers > 0, "Should have been set"); 1080 1081 // Setting active workers is not guaranteed since fewer 1082 // worker threads may currently exist and more may not be 1083 // available. 1084 active_workers = _parallel_workers->update_active_workers(active_workers); 1085 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1086 1087 // Parallel task terminator is set in "set_concurrency_and_phase()" 1088 set_concurrency_and_phase(active_workers, true /* concurrent */); 1089 1090 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1091 _parallel_workers->run_task(&markingTask); 1092 print_stats(); 1093 } 1094 1095 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1096 // world is stopped at this checkpoint 1097 assert(SafepointSynchronize::is_at_safepoint(), 1098 "world should be stopped"); 1099 1100 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1101 1102 // If a full collection has happened, we shouldn't do this. 1103 if (has_aborted()) { 1104 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1105 return; 1106 } 1107 1108 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1109 1110 if (VerifyDuringGC) { 1111 HandleMark hm; // handle scope 1112 g1h->prepare_for_verify(); 1113 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1114 } 1115 g1h->verifier()->check_bitmaps("Remark Start"); 1116 1117 G1Policy* g1p = g1h->g1_policy(); 1118 g1p->record_concurrent_mark_remark_start(); 1119 1120 double start = os::elapsedTime(); 1121 1122 checkpointRootsFinalWork(); 1123 1124 double mark_work_end = os::elapsedTime(); 1125 1126 weakRefsWork(clear_all_soft_refs); 1127 1128 if (has_overflown()) { 1129 // We overflowed. Restart concurrent marking. 1130 _restart_for_overflow = true; 1131 1132 // Verify the heap w.r.t. the previous marking bitmap. 1133 if (VerifyDuringGC) { 1134 HandleMark hm; // handle scope 1135 g1h->prepare_for_verify(); 1136 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1137 } 1138 1139 // Clear the marking state because we will be restarting 1140 // marking due to overflowing the global mark stack. 1141 reset_marking_state(); 1142 } else { 1143 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1144 // We're done with marking. 1145 // This is the end of the marking cycle, we're expected all 1146 // threads to have SATB queues with active set to true. 1147 satb_mq_set.set_active_all_threads(false, /* new active value */ 1148 true /* expected_active */); 1149 1150 if (VerifyDuringGC) { 1151 HandleMark hm; // handle scope 1152 g1h->prepare_for_verify(); 1153 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1154 } 1155 g1h->verifier()->check_bitmaps("Remark End"); 1156 assert(!restart_for_overflow(), "sanity"); 1157 // Completely reset the marking state since marking completed 1158 set_non_marking_state(); 1159 } 1160 1161 // Expand the marking stack, if we have to and if we can. 1162 if (_global_mark_stack.should_expand()) { 1163 _global_mark_stack.expand(); 1164 } 1165 1166 // Statistics 1167 double now = os::elapsedTime(); 1168 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1169 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1170 _remark_times.add((now - start) * 1000.0); 1171 1172 g1p->record_concurrent_mark_remark_end(); 1173 1174 G1CMIsAliveClosure is_alive(g1h); 1175 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1176 } 1177 1178 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1179 G1CollectedHeap* _g1; 1180 size_t _freed_bytes; 1181 FreeRegionList* _local_cleanup_list; 1182 uint _old_regions_removed; 1183 uint _humongous_regions_removed; 1184 HRRSCleanupTask* _hrrs_cleanup_task; 1185 1186 public: 1187 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1188 FreeRegionList* local_cleanup_list, 1189 HRRSCleanupTask* hrrs_cleanup_task) : 1190 _g1(g1), 1191 _freed_bytes(0), 1192 _local_cleanup_list(local_cleanup_list), 1193 _old_regions_removed(0), 1194 _humongous_regions_removed(0), 1195 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1196 1197 size_t freed_bytes() { return _freed_bytes; } 1198 const uint old_regions_removed() { return _old_regions_removed; } 1199 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1200 1201 bool doHeapRegion(HeapRegion *hr) { 1202 if (hr->is_archive()) { 1203 return false; 1204 } 1205 _g1->reset_gc_time_stamps(hr); 1206 hr->note_end_of_marking(); 1207 1208 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1209 _freed_bytes += hr->used(); 1210 hr->set_containing_set(NULL); 1211 if (hr->is_humongous()) { 1212 _humongous_regions_removed++; 1213 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1214 } else { 1215 _old_regions_removed++; 1216 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1217 } 1218 } else { 1219 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1220 } 1221 1222 return false; 1223 } 1224 }; 1225 1226 class G1ParNoteEndTask: public AbstractGangTask { 1227 friend class G1NoteEndOfConcMarkClosure; 1228 1229 protected: 1230 G1CollectedHeap* _g1h; 1231 FreeRegionList* _cleanup_list; 1232 HeapRegionClaimer _hrclaimer; 1233 1234 public: 1235 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1236 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1237 } 1238 1239 void work(uint worker_id) { 1240 FreeRegionList local_cleanup_list("Local Cleanup List"); 1241 HRRSCleanupTask hrrs_cleanup_task; 1242 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1243 &hrrs_cleanup_task); 1244 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1245 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1246 1247 // Now update the lists 1248 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1249 { 1250 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1251 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1252 1253 // If we iterate over the global cleanup list at the end of 1254 // cleanup to do this printing we will not guarantee to only 1255 // generate output for the newly-reclaimed regions (the list 1256 // might not be empty at the beginning of cleanup; we might 1257 // still be working on its previous contents). So we do the 1258 // printing here, before we append the new regions to the global 1259 // cleanup list. 1260 1261 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1262 if (hr_printer->is_active()) { 1263 FreeRegionListIterator iter(&local_cleanup_list); 1264 while (iter.more_available()) { 1265 HeapRegion* hr = iter.get_next(); 1266 hr_printer->cleanup(hr); 1267 } 1268 } 1269 1270 _cleanup_list->add_ordered(&local_cleanup_list); 1271 assert(local_cleanup_list.is_empty(), "post-condition"); 1272 1273 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1274 } 1275 } 1276 }; 1277 1278 void G1ConcurrentMark::cleanup() { 1279 // world is stopped at this checkpoint 1280 assert(SafepointSynchronize::is_at_safepoint(), 1281 "world should be stopped"); 1282 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1283 1284 // If a full collection has happened, we shouldn't do this. 1285 if (has_aborted()) { 1286 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1287 return; 1288 } 1289 1290 g1h->verifier()->verify_region_sets_optional(); 1291 1292 if (VerifyDuringGC) { 1293 HandleMark hm; // handle scope 1294 g1h->prepare_for_verify(); 1295 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1296 } 1297 g1h->verifier()->check_bitmaps("Cleanup Start"); 1298 1299 G1Policy* g1p = g1h->g1_policy(); 1300 g1p->record_concurrent_mark_cleanup_start(); 1301 1302 double start = os::elapsedTime(); 1303 1304 HeapRegionRemSet::reset_for_cleanup_tasks(); 1305 1306 { 1307 GCTraceTime(Debug, gc)("Finalize Live Data"); 1308 finalize_live_data(); 1309 } 1310 1311 if (VerifyDuringGC) { 1312 GCTraceTime(Debug, gc)("Verify Live Data"); 1313 verify_live_data(); 1314 } 1315 1316 g1h->collector_state()->set_mark_in_progress(false); 1317 1318 double count_end = os::elapsedTime(); 1319 double this_final_counting_time = (count_end - start); 1320 _total_counting_time += this_final_counting_time; 1321 1322 if (log_is_enabled(Trace, gc, liveness)) { 1323 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1324 _g1h->heap_region_iterate(&cl); 1325 } 1326 1327 // Install newly created mark bitMap as "prev". 1328 swapMarkBitMaps(); 1329 1330 g1h->reset_gc_time_stamp(); 1331 1332 uint n_workers = _g1h->workers()->active_workers(); 1333 1334 // Note end of marking in all heap regions. 1335 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1336 g1h->workers()->run_task(&g1_par_note_end_task); 1337 g1h->check_gc_time_stamps(); 1338 1339 if (!cleanup_list_is_empty()) { 1340 // The cleanup list is not empty, so we'll have to process it 1341 // concurrently. Notify anyone else that might be wanting free 1342 // regions that there will be more free regions coming soon. 1343 g1h->set_free_regions_coming(); 1344 } 1345 1346 // call below, since it affects the metric by which we sort the heap 1347 // regions. 1348 if (G1ScrubRemSets) { 1349 double rs_scrub_start = os::elapsedTime(); 1350 g1h->scrub_rem_set(); 1351 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1352 } 1353 1354 // this will also free any regions totally full of garbage objects, 1355 // and sort the regions. 1356 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1357 1358 // Statistics. 1359 double end = os::elapsedTime(); 1360 _cleanup_times.add((end - start) * 1000.0); 1361 1362 // Clean up will have freed any regions completely full of garbage. 1363 // Update the soft reference policy with the new heap occupancy. 1364 Universe::update_heap_info_at_gc(); 1365 1366 if (VerifyDuringGC) { 1367 HandleMark hm; // handle scope 1368 g1h->prepare_for_verify(); 1369 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1370 } 1371 1372 g1h->verifier()->check_bitmaps("Cleanup End"); 1373 1374 g1h->verifier()->verify_region_sets_optional(); 1375 1376 // We need to make this be a "collection" so any collection pause that 1377 // races with it goes around and waits for completeCleanup to finish. 1378 g1h->increment_total_collections(); 1379 1380 // Clean out dead classes and update Metaspace sizes. 1381 if (ClassUnloadingWithConcurrentMark) { 1382 ClassLoaderDataGraph::purge(); 1383 } 1384 MetaspaceGC::compute_new_size(); 1385 1386 // We reclaimed old regions so we should calculate the sizes to make 1387 // sure we update the old gen/space data. 1388 g1h->g1mm()->update_sizes(); 1389 g1h->allocation_context_stats().update_after_mark(); 1390 } 1391 1392 void G1ConcurrentMark::complete_cleanup() { 1393 if (has_aborted()) return; 1394 1395 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1396 1397 _cleanup_list.verify_optional(); 1398 FreeRegionList tmp_free_list("Tmp Free List"); 1399 1400 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1401 "cleanup list has %u entries", 1402 _cleanup_list.length()); 1403 1404 // No one else should be accessing the _cleanup_list at this point, 1405 // so it is not necessary to take any locks 1406 while (!_cleanup_list.is_empty()) { 1407 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1408 assert(hr != NULL, "Got NULL from a non-empty list"); 1409 hr->par_clear(); 1410 tmp_free_list.add_ordered(hr); 1411 1412 // Instead of adding one region at a time to the secondary_free_list, 1413 // we accumulate them in the local list and move them a few at a 1414 // time. This also cuts down on the number of notify_all() calls 1415 // we do during this process. We'll also append the local list when 1416 // _cleanup_list is empty (which means we just removed the last 1417 // region from the _cleanup_list). 1418 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1419 _cleanup_list.is_empty()) { 1420 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1421 "appending %u entries to the secondary_free_list, " 1422 "cleanup list still has %u entries", 1423 tmp_free_list.length(), 1424 _cleanup_list.length()); 1425 1426 { 1427 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1428 g1h->secondary_free_list_add(&tmp_free_list); 1429 SecondaryFreeList_lock->notify_all(); 1430 } 1431 #ifndef PRODUCT 1432 if (G1StressConcRegionFreeing) { 1433 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1434 os::sleep(Thread::current(), (jlong) 1, false); 1435 } 1436 } 1437 #endif 1438 } 1439 } 1440 assert(tmp_free_list.is_empty(), "post-condition"); 1441 } 1442 1443 // Supporting Object and Oop closures for reference discovery 1444 // and processing in during marking 1445 1446 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1447 HeapWord* addr = (HeapWord*)obj; 1448 return addr != NULL && 1449 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1450 } 1451 1452 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1453 // Uses the G1CMTask associated with a worker thread (for serial reference 1454 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1455 // trace referent objects. 1456 // 1457 // Using the G1CMTask and embedded local queues avoids having the worker 1458 // threads operating on the global mark stack. This reduces the risk 1459 // of overflowing the stack - which we would rather avoid at this late 1460 // state. Also using the tasks' local queues removes the potential 1461 // of the workers interfering with each other that could occur if 1462 // operating on the global stack. 1463 1464 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1465 G1ConcurrentMark* _cm; 1466 G1CMTask* _task; 1467 int _ref_counter_limit; 1468 int _ref_counter; 1469 bool _is_serial; 1470 public: 1471 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1472 _cm(cm), _task(task), _is_serial(is_serial), 1473 _ref_counter_limit(G1RefProcDrainInterval) { 1474 assert(_ref_counter_limit > 0, "sanity"); 1475 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1476 _ref_counter = _ref_counter_limit; 1477 } 1478 1479 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1480 virtual void do_oop( oop* p) { do_oop_work(p); } 1481 1482 template <class T> void do_oop_work(T* p) { 1483 if (!_cm->has_overflown()) { 1484 oop obj = oopDesc::load_decode_heap_oop(p); 1485 _task->deal_with_reference(obj); 1486 _ref_counter--; 1487 1488 if (_ref_counter == 0) { 1489 // We have dealt with _ref_counter_limit references, pushing them 1490 // and objects reachable from them on to the local stack (and 1491 // possibly the global stack). Call G1CMTask::do_marking_step() to 1492 // process these entries. 1493 // 1494 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1495 // there's nothing more to do (i.e. we're done with the entries that 1496 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1497 // above) or we overflow. 1498 // 1499 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1500 // flag while there may still be some work to do. (See the comment at 1501 // the beginning of G1CMTask::do_marking_step() for those conditions - 1502 // one of which is reaching the specified time target.) It is only 1503 // when G1CMTask::do_marking_step() returns without setting the 1504 // has_aborted() flag that the marking step has completed. 1505 do { 1506 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1507 _task->do_marking_step(mark_step_duration_ms, 1508 false /* do_termination */, 1509 _is_serial); 1510 } while (_task->has_aborted() && !_cm->has_overflown()); 1511 _ref_counter = _ref_counter_limit; 1512 } 1513 } 1514 } 1515 }; 1516 1517 // 'Drain' oop closure used by both serial and parallel reference processing. 1518 // Uses the G1CMTask associated with a given worker thread (for serial 1519 // reference processing the G1CMtask for worker 0 is used). Calls the 1520 // do_marking_step routine, with an unbelievably large timeout value, 1521 // to drain the marking data structures of the remaining entries 1522 // added by the 'keep alive' oop closure above. 1523 1524 class G1CMDrainMarkingStackClosure: public VoidClosure { 1525 G1ConcurrentMark* _cm; 1526 G1CMTask* _task; 1527 bool _is_serial; 1528 public: 1529 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1530 _cm(cm), _task(task), _is_serial(is_serial) { 1531 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1532 } 1533 1534 void do_void() { 1535 do { 1536 // We call G1CMTask::do_marking_step() to completely drain the local 1537 // and global marking stacks of entries pushed by the 'keep alive' 1538 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1539 // 1540 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1541 // if there's nothing more to do (i.e. we've completely drained the 1542 // entries that were pushed as a a result of applying the 'keep alive' 1543 // closure to the entries on the discovered ref lists) or we overflow 1544 // the global marking stack. 1545 // 1546 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1547 // flag while there may still be some work to do. (See the comment at 1548 // the beginning of G1CMTask::do_marking_step() for those conditions - 1549 // one of which is reaching the specified time target.) It is only 1550 // when G1CMTask::do_marking_step() returns without setting the 1551 // has_aborted() flag that the marking step has completed. 1552 1553 _task->do_marking_step(1000000000.0 /* something very large */, 1554 true /* do_termination */, 1555 _is_serial); 1556 } while (_task->has_aborted() && !_cm->has_overflown()); 1557 } 1558 }; 1559 1560 // Implementation of AbstractRefProcTaskExecutor for parallel 1561 // reference processing at the end of G1 concurrent marking 1562 1563 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1564 private: 1565 G1CollectedHeap* _g1h; 1566 G1ConcurrentMark* _cm; 1567 WorkGang* _workers; 1568 uint _active_workers; 1569 1570 public: 1571 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1572 G1ConcurrentMark* cm, 1573 WorkGang* workers, 1574 uint n_workers) : 1575 _g1h(g1h), _cm(cm), 1576 _workers(workers), _active_workers(n_workers) { } 1577 1578 // Executes the given task using concurrent marking worker threads. 1579 virtual void execute(ProcessTask& task); 1580 virtual void execute(EnqueueTask& task); 1581 }; 1582 1583 class G1CMRefProcTaskProxy: public AbstractGangTask { 1584 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1585 ProcessTask& _proc_task; 1586 G1CollectedHeap* _g1h; 1587 G1ConcurrentMark* _cm; 1588 1589 public: 1590 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1591 G1CollectedHeap* g1h, 1592 G1ConcurrentMark* cm) : 1593 AbstractGangTask("Process reference objects in parallel"), 1594 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1595 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1596 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1597 } 1598 1599 virtual void work(uint worker_id) { 1600 ResourceMark rm; 1601 HandleMark hm; 1602 G1CMTask* task = _cm->task(worker_id); 1603 G1CMIsAliveClosure g1_is_alive(_g1h); 1604 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1605 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1606 1607 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1608 } 1609 }; 1610 1611 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1612 assert(_workers != NULL, "Need parallel worker threads."); 1613 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1614 1615 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1616 1617 // We need to reset the concurrency level before each 1618 // proxy task execution, so that the termination protocol 1619 // and overflow handling in G1CMTask::do_marking_step() knows 1620 // how many workers to wait for. 1621 _cm->set_concurrency(_active_workers); 1622 _workers->run_task(&proc_task_proxy); 1623 } 1624 1625 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1626 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1627 EnqueueTask& _enq_task; 1628 1629 public: 1630 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1631 AbstractGangTask("Enqueue reference objects in parallel"), 1632 _enq_task(enq_task) { } 1633 1634 virtual void work(uint worker_id) { 1635 _enq_task.work(worker_id); 1636 } 1637 }; 1638 1639 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1640 assert(_workers != NULL, "Need parallel worker threads."); 1641 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1642 1643 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1644 1645 // Not strictly necessary but... 1646 // 1647 // We need to reset the concurrency level before each 1648 // proxy task execution, so that the termination protocol 1649 // and overflow handling in G1CMTask::do_marking_step() knows 1650 // how many workers to wait for. 1651 _cm->set_concurrency(_active_workers); 1652 _workers->run_task(&enq_task_proxy); 1653 } 1654 1655 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 1656 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 1657 } 1658 1659 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1660 if (has_overflown()) { 1661 // Skip processing the discovered references if we have 1662 // overflown the global marking stack. Reference objects 1663 // only get discovered once so it is OK to not 1664 // de-populate the discovered reference lists. We could have, 1665 // but the only benefit would be that, when marking restarts, 1666 // less reference objects are discovered. 1667 return; 1668 } 1669 1670 ResourceMark rm; 1671 HandleMark hm; 1672 1673 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1674 1675 // Is alive closure. 1676 G1CMIsAliveClosure g1_is_alive(g1h); 1677 1678 // Inner scope to exclude the cleaning of the string and symbol 1679 // tables from the displayed time. 1680 { 1681 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1682 1683 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1684 1685 // See the comment in G1CollectedHeap::ref_processing_init() 1686 // about how reference processing currently works in G1. 1687 1688 // Set the soft reference policy 1689 rp->setup_policy(clear_all_soft_refs); 1690 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1691 1692 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1693 // in serial reference processing. Note these closures are also 1694 // used for serially processing (by the the current thread) the 1695 // JNI references during parallel reference processing. 1696 // 1697 // These closures do not need to synchronize with the worker 1698 // threads involved in parallel reference processing as these 1699 // instances are executed serially by the current thread (e.g. 1700 // reference processing is not multi-threaded and is thus 1701 // performed by the current thread instead of a gang worker). 1702 // 1703 // The gang tasks involved in parallel reference processing create 1704 // their own instances of these closures, which do their own 1705 // synchronization among themselves. 1706 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1707 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1708 1709 // We need at least one active thread. If reference processing 1710 // is not multi-threaded we use the current (VMThread) thread, 1711 // otherwise we use the work gang from the G1CollectedHeap and 1712 // we utilize all the worker threads we can. 1713 bool processing_is_mt = rp->processing_is_mt(); 1714 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1715 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1716 1717 // Parallel processing task executor. 1718 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1719 g1h->workers(), active_workers); 1720 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1721 1722 // Set the concurrency level. The phase was already set prior to 1723 // executing the remark task. 1724 set_concurrency(active_workers); 1725 1726 // Set the degree of MT processing here. If the discovery was done MT, 1727 // the number of threads involved during discovery could differ from 1728 // the number of active workers. This is OK as long as the discovered 1729 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1730 rp->set_active_mt_degree(active_workers); 1731 1732 // Process the weak references. 1733 const ReferenceProcessorStats& stats = 1734 rp->process_discovered_references(&g1_is_alive, 1735 &g1_keep_alive, 1736 &g1_drain_mark_stack, 1737 executor, 1738 _gc_timer_cm); 1739 _gc_tracer_cm->report_gc_reference_stats(stats); 1740 1741 // The do_oop work routines of the keep_alive and drain_marking_stack 1742 // oop closures will set the has_overflown flag if we overflow the 1743 // global marking stack. 1744 1745 assert(has_overflown() || _global_mark_stack.is_empty(), 1746 "Mark stack should be empty (unless it has overflown)"); 1747 1748 assert(rp->num_q() == active_workers, "why not"); 1749 1750 rp->enqueue_discovered_references(executor); 1751 1752 rp->verify_no_references_recorded(); 1753 assert(!rp->discovery_enabled(), "Post condition"); 1754 } 1755 1756 if (has_overflown()) { 1757 // We can not trust g1_is_alive if the marking stack overflowed 1758 return; 1759 } 1760 1761 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1762 1763 // Unload Klasses, String, Symbols, Code Cache, etc. 1764 if (ClassUnloadingWithConcurrentMark) { 1765 bool purged_classes; 1766 1767 { 1768 GCTraceTime(Debug, gc, phases) trace("System Dictionary Unloading", _gc_timer_cm); 1769 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 1770 } 1771 1772 { 1773 GCTraceTime(Debug, gc, phases) trace("Parallel Unloading", _gc_timer_cm); 1774 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 1775 } 1776 } 1777 1778 if (G1StringDedup::is_enabled()) { 1779 GCTraceTime(Debug, gc, phases) trace("String Deduplication Unlink", _gc_timer_cm); 1780 G1StringDedup::unlink(&g1_is_alive); 1781 } 1782 } 1783 1784 void G1ConcurrentMark::swapMarkBitMaps() { 1785 G1CMBitMapRO* temp = _prevMarkBitMap; 1786 _prevMarkBitMap = (G1CMBitMapRO*)_nextMarkBitMap; 1787 _nextMarkBitMap = (G1CMBitMap*) temp; 1788 } 1789 1790 // Closure for marking entries in SATB buffers. 1791 class G1CMSATBBufferClosure : public SATBBufferClosure { 1792 private: 1793 G1CMTask* _task; 1794 G1CollectedHeap* _g1h; 1795 1796 // This is very similar to G1CMTask::deal_with_reference, but with 1797 // more relaxed requirements for the argument, so this must be more 1798 // circumspect about treating the argument as an object. 1799 void do_entry(void* entry) const { 1800 _task->increment_refs_reached(); 1801 HeapRegion* hr = _g1h->heap_region_containing(entry); 1802 if (entry < hr->next_top_at_mark_start()) { 1803 // Until we get here, we don't know whether entry refers to a valid 1804 // object; it could instead have been a stale reference. 1805 oop obj = static_cast<oop>(entry); 1806 assert(obj->is_oop(true /* ignore mark word */), 1807 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 1808 _task->make_reference_grey(obj); 1809 } 1810 } 1811 1812 public: 1813 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1814 : _task(task), _g1h(g1h) { } 1815 1816 virtual void do_buffer(void** buffer, size_t size) { 1817 for (size_t i = 0; i < size; ++i) { 1818 do_entry(buffer[i]); 1819 } 1820 } 1821 }; 1822 1823 class G1RemarkThreadsClosure : public ThreadClosure { 1824 G1CMSATBBufferClosure _cm_satb_cl; 1825 G1CMOopClosure _cm_cl; 1826 MarkingCodeBlobClosure _code_cl; 1827 int _thread_parity; 1828 1829 public: 1830 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1831 _cm_satb_cl(task, g1h), 1832 _cm_cl(g1h, g1h->concurrent_mark(), task), 1833 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1834 _thread_parity(Threads::thread_claim_parity()) {} 1835 1836 void do_thread(Thread* thread) { 1837 if (thread->is_Java_thread()) { 1838 if (thread->claim_oops_do(true, _thread_parity)) { 1839 JavaThread* jt = (JavaThread*)thread; 1840 1841 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1842 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1843 // * Alive if on the stack of an executing method 1844 // * Weakly reachable otherwise 1845 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1846 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1847 jt->nmethods_do(&_code_cl); 1848 1849 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1850 } 1851 } else if (thread->is_VM_thread()) { 1852 if (thread->claim_oops_do(true, _thread_parity)) { 1853 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1854 } 1855 } 1856 } 1857 }; 1858 1859 class G1CMRemarkTask: public AbstractGangTask { 1860 private: 1861 G1ConcurrentMark* _cm; 1862 public: 1863 void work(uint worker_id) { 1864 // Since all available tasks are actually started, we should 1865 // only proceed if we're supposed to be active. 1866 if (worker_id < _cm->active_tasks()) { 1867 G1CMTask* task = _cm->task(worker_id); 1868 task->record_start_time(); 1869 { 1870 ResourceMark rm; 1871 HandleMark hm; 1872 1873 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1874 Threads::threads_do(&threads_f); 1875 } 1876 1877 do { 1878 task->do_marking_step(1000000000.0 /* something very large */, 1879 true /* do_termination */, 1880 false /* is_serial */); 1881 } while (task->has_aborted() && !_cm->has_overflown()); 1882 // If we overflow, then we do not want to restart. We instead 1883 // want to abort remark and do concurrent marking again. 1884 task->record_end_time(); 1885 } 1886 } 1887 1888 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1889 AbstractGangTask("Par Remark"), _cm(cm) { 1890 _cm->terminator()->reset_for_reuse(active_workers); 1891 } 1892 }; 1893 1894 void G1ConcurrentMark::checkpointRootsFinalWork() { 1895 ResourceMark rm; 1896 HandleMark hm; 1897 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1898 1899 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1900 1901 g1h->ensure_parsability(false); 1902 1903 // this is remark, so we'll use up all active threads 1904 uint active_workers = g1h->workers()->active_workers(); 1905 set_concurrency_and_phase(active_workers, false /* concurrent */); 1906 // Leave _parallel_marking_threads at it's 1907 // value originally calculated in the G1ConcurrentMark 1908 // constructor and pass values of the active workers 1909 // through the gang in the task. 1910 1911 { 1912 StrongRootsScope srs(active_workers); 1913 1914 G1CMRemarkTask remarkTask(this, active_workers); 1915 // We will start all available threads, even if we decide that the 1916 // active_workers will be fewer. The extra ones will just bail out 1917 // immediately. 1918 g1h->workers()->run_task(&remarkTask); 1919 } 1920 1921 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1922 guarantee(has_overflown() || 1923 satb_mq_set.completed_buffers_num() == 0, 1924 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1925 BOOL_TO_STR(has_overflown()), 1926 satb_mq_set.completed_buffers_num()); 1927 1928 print_stats(); 1929 } 1930 1931 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1932 // Note we are overriding the read-only view of the prev map here, via 1933 // the cast. 1934 ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr); 1935 } 1936 1937 HeapRegion* 1938 G1ConcurrentMark::claim_region(uint worker_id) { 1939 // "checkpoint" the finger 1940 HeapWord* finger = _finger; 1941 1942 // _heap_end will not change underneath our feet; it only changes at 1943 // yield points. 1944 while (finger < _heap_end) { 1945 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1946 1947 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1948 // Make sure that the reads below do not float before loading curr_region. 1949 OrderAccess::loadload(); 1950 // Above heap_region_containing may return NULL as we always scan claim 1951 // until the end of the heap. In this case, just jump to the next region. 1952 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1953 1954 // Is the gap between reading the finger and doing the CAS too long? 1955 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1956 if (res == finger && curr_region != NULL) { 1957 // we succeeded 1958 HeapWord* bottom = curr_region->bottom(); 1959 HeapWord* limit = curr_region->next_top_at_mark_start(); 1960 1961 // notice that _finger == end cannot be guaranteed here since, 1962 // someone else might have moved the finger even further 1963 assert(_finger >= end, "the finger should have moved forward"); 1964 1965 if (limit > bottom) { 1966 return curr_region; 1967 } else { 1968 assert(limit == bottom, 1969 "the region limit should be at bottom"); 1970 // we return NULL and the caller should try calling 1971 // claim_region() again. 1972 return NULL; 1973 } 1974 } else { 1975 assert(_finger > finger, "the finger should have moved forward"); 1976 // read it again 1977 finger = _finger; 1978 } 1979 } 1980 1981 return NULL; 1982 } 1983 1984 #ifndef PRODUCT 1985 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1986 private: 1987 G1CollectedHeap* _g1h; 1988 const char* _phase; 1989 int _info; 1990 1991 public: 1992 VerifyNoCSetOops(const char* phase, int info = -1) : 1993 _g1h(G1CollectedHeap::heap()), 1994 _phase(phase), 1995 _info(info) 1996 { } 1997 1998 void operator()(G1TaskQueueEntry task_entry) const { 1999 if (task_entry.is_array_slice()) { 2000 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 2001 return; 2002 } 2003 guarantee(task_entry.obj()->is_oop(), 2004 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2005 p2i(task_entry.obj()), _phase, _info); 2006 guarantee(!_g1h->is_in_cset(task_entry.obj()), 2007 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2008 p2i(task_entry.obj()), _phase, _info); 2009 } 2010 }; 2011 2012 void G1ConcurrentMark::verify_no_cset_oops() { 2013 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2014 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2015 return; 2016 } 2017 2018 // Verify entries on the global mark stack 2019 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 2020 2021 // Verify entries on the task queues 2022 for (uint i = 0; i < _max_worker_id; ++i) { 2023 G1CMTaskQueue* queue = _task_queues->queue(i); 2024 queue->iterate(VerifyNoCSetOops("Queue", i)); 2025 } 2026 2027 // Verify the global finger 2028 HeapWord* global_finger = finger(); 2029 if (global_finger != NULL && global_finger < _heap_end) { 2030 // Since we always iterate over all regions, we might get a NULL HeapRegion 2031 // here. 2032 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2033 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2034 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2035 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2036 } 2037 2038 // Verify the task fingers 2039 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2040 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2041 G1CMTask* task = _tasks[i]; 2042 HeapWord* task_finger = task->finger(); 2043 if (task_finger != NULL && task_finger < _heap_end) { 2044 // See above note on the global finger verification. 2045 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2046 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2047 !task_hr->in_collection_set(), 2048 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2049 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2050 } 2051 } 2052 } 2053 #endif // PRODUCT 2054 void G1ConcurrentMark::create_live_data() { 2055 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 2056 } 2057 2058 void G1ConcurrentMark::finalize_live_data() { 2059 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 2060 } 2061 2062 void G1ConcurrentMark::verify_live_data() { 2063 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 2064 } 2065 2066 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 2067 _g1h->g1_rem_set()->clear_card_live_data(workers); 2068 } 2069 2070 #ifdef ASSERT 2071 void G1ConcurrentMark::verify_live_data_clear() { 2072 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 2073 } 2074 #endif 2075 2076 void G1ConcurrentMark::print_stats() { 2077 if (!log_is_enabled(Debug, gc, stats)) { 2078 return; 2079 } 2080 log_debug(gc, stats)("---------------------------------------------------------------------"); 2081 for (size_t i = 0; i < _active_tasks; ++i) { 2082 _tasks[i]->print_stats(); 2083 log_debug(gc, stats)("---------------------------------------------------------------------"); 2084 } 2085 } 2086 2087 void G1ConcurrentMark::abort() { 2088 if (!cmThread()->during_cycle() || _has_aborted) { 2089 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2090 return; 2091 } 2092 2093 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2094 // concurrent bitmap clearing. 2095 { 2096 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2097 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2098 } 2099 // Note we cannot clear the previous marking bitmap here 2100 // since VerifyDuringGC verifies the objects marked during 2101 // a full GC against the previous bitmap. 2102 2103 { 2104 GCTraceTime(Debug, gc)("Clear Live Data"); 2105 clear_live_data(_g1h->workers()); 2106 } 2107 DEBUG_ONLY({ 2108 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2109 verify_live_data_clear(); 2110 }) 2111 // Empty mark stack 2112 reset_marking_state(); 2113 for (uint i = 0; i < _max_worker_id; ++i) { 2114 _tasks[i]->clear_region_fields(); 2115 } 2116 _first_overflow_barrier_sync.abort(); 2117 _second_overflow_barrier_sync.abort(); 2118 _has_aborted = true; 2119 2120 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2121 satb_mq_set.abandon_partial_marking(); 2122 // This can be called either during or outside marking, we'll read 2123 // the expected_active value from the SATB queue set. 2124 satb_mq_set.set_active_all_threads( 2125 false, /* new active value */ 2126 satb_mq_set.is_active() /* expected_active */); 2127 } 2128 2129 static void print_ms_time_info(const char* prefix, const char* name, 2130 NumberSeq& ns) { 2131 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2132 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2133 if (ns.num() > 0) { 2134 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2135 prefix, ns.sd(), ns.maximum()); 2136 } 2137 } 2138 2139 void G1ConcurrentMark::print_summary_info() { 2140 Log(gc, marking) log; 2141 if (!log.is_trace()) { 2142 return; 2143 } 2144 2145 log.trace(" Concurrent marking:"); 2146 print_ms_time_info(" ", "init marks", _init_times); 2147 print_ms_time_info(" ", "remarks", _remark_times); 2148 { 2149 print_ms_time_info(" ", "final marks", _remark_mark_times); 2150 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2151 2152 } 2153 print_ms_time_info(" ", "cleanups", _cleanup_times); 2154 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2155 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2156 if (G1ScrubRemSets) { 2157 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2158 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2159 } 2160 log.trace(" Total stop_world time = %8.2f s.", 2161 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2162 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2163 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2164 } 2165 2166 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2167 _parallel_workers->print_worker_threads_on(st); 2168 } 2169 2170 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2171 _parallel_workers->threads_do(tc); 2172 } 2173 2174 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2175 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2176 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2177 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2178 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2179 } 2180 2181 // Closure for iteration over bitmaps 2182 class G1CMBitMapClosure : public BitMapClosure { 2183 private: 2184 // the bitmap that is being iterated over 2185 G1CMBitMap* _nextMarkBitMap; 2186 G1ConcurrentMark* _cm; 2187 G1CMTask* _task; 2188 2189 public: 2190 G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) : 2191 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2192 2193 bool do_bit(size_t offset) { 2194 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2195 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2196 assert( addr < _cm->finger(), "invariant"); 2197 assert(addr >= _task->finger(), "invariant"); 2198 2199 // We move that task's local finger along. 2200 _task->move_finger_to(addr); 2201 2202 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 2203 // we only partially drain the local queue and global stack 2204 _task->drain_local_queue(true); 2205 _task->drain_global_stack(true); 2206 2207 // if the has_aborted flag has been raised, we need to bail out of 2208 // the iteration 2209 return !_task->has_aborted(); 2210 } 2211 }; 2212 2213 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2214 ReferenceProcessor* result = g1h->ref_processor_cm(); 2215 assert(result != NULL, "CM reference processor should not be NULL"); 2216 return result; 2217 } 2218 2219 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2220 G1ConcurrentMark* cm, 2221 G1CMTask* task) 2222 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2223 _g1h(g1h), _cm(cm), _task(task) 2224 { } 2225 2226 void G1CMTask::setup_for_region(HeapRegion* hr) { 2227 assert(hr != NULL, 2228 "claim_region() should have filtered out NULL regions"); 2229 _curr_region = hr; 2230 _finger = hr->bottom(); 2231 update_region_limit(); 2232 } 2233 2234 void G1CMTask::update_region_limit() { 2235 HeapRegion* hr = _curr_region; 2236 HeapWord* bottom = hr->bottom(); 2237 HeapWord* limit = hr->next_top_at_mark_start(); 2238 2239 if (limit == bottom) { 2240 // The region was collected underneath our feet. 2241 // We set the finger to bottom to ensure that the bitmap 2242 // iteration that will follow this will not do anything. 2243 // (this is not a condition that holds when we set the region up, 2244 // as the region is not supposed to be empty in the first place) 2245 _finger = bottom; 2246 } else if (limit >= _region_limit) { 2247 assert(limit >= _finger, "peace of mind"); 2248 } else { 2249 assert(limit < _region_limit, "only way to get here"); 2250 // This can happen under some pretty unusual circumstances. An 2251 // evacuation pause empties the region underneath our feet (NTAMS 2252 // at bottom). We then do some allocation in the region (NTAMS 2253 // stays at bottom), followed by the region being used as a GC 2254 // alloc region (NTAMS will move to top() and the objects 2255 // originally below it will be grayed). All objects now marked in 2256 // the region are explicitly grayed, if below the global finger, 2257 // and we do not need in fact to scan anything else. So, we simply 2258 // set _finger to be limit to ensure that the bitmap iteration 2259 // doesn't do anything. 2260 _finger = limit; 2261 } 2262 2263 _region_limit = limit; 2264 } 2265 2266 void G1CMTask::giveup_current_region() { 2267 assert(_curr_region != NULL, "invariant"); 2268 clear_region_fields(); 2269 } 2270 2271 void G1CMTask::clear_region_fields() { 2272 // Values for these three fields that indicate that we're not 2273 // holding on to a region. 2274 _curr_region = NULL; 2275 _finger = NULL; 2276 _region_limit = NULL; 2277 } 2278 2279 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2280 if (cm_oop_closure == NULL) { 2281 assert(_cm_oop_closure != NULL, "invariant"); 2282 } else { 2283 assert(_cm_oop_closure == NULL, "invariant"); 2284 } 2285 _cm_oop_closure = cm_oop_closure; 2286 } 2287 2288 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2289 guarantee(nextMarkBitMap != NULL, "invariant"); 2290 _nextMarkBitMap = nextMarkBitMap; 2291 clear_region_fields(); 2292 2293 _calls = 0; 2294 _elapsed_time_ms = 0.0; 2295 _termination_time_ms = 0.0; 2296 _termination_start_time_ms = 0.0; 2297 } 2298 2299 bool G1CMTask::should_exit_termination() { 2300 regular_clock_call(); 2301 // This is called when we are in the termination protocol. We should 2302 // quit if, for some reason, this task wants to abort or the global 2303 // stack is not empty (this means that we can get work from it). 2304 return !_cm->mark_stack_empty() || has_aborted(); 2305 } 2306 2307 void G1CMTask::reached_limit() { 2308 assert(_words_scanned >= _words_scanned_limit || 2309 _refs_reached >= _refs_reached_limit , 2310 "shouldn't have been called otherwise"); 2311 regular_clock_call(); 2312 } 2313 2314 void G1CMTask::regular_clock_call() { 2315 if (has_aborted()) return; 2316 2317 // First, we need to recalculate the words scanned and refs reached 2318 // limits for the next clock call. 2319 recalculate_limits(); 2320 2321 // During the regular clock call we do the following 2322 2323 // (1) If an overflow has been flagged, then we abort. 2324 if (_cm->has_overflown()) { 2325 set_has_aborted(); 2326 return; 2327 } 2328 2329 // If we are not concurrent (i.e. we're doing remark) we don't need 2330 // to check anything else. The other steps are only needed during 2331 // the concurrent marking phase. 2332 if (!concurrent()) return; 2333 2334 // (2) If marking has been aborted for Full GC, then we also abort. 2335 if (_cm->has_aborted()) { 2336 set_has_aborted(); 2337 return; 2338 } 2339 2340 double curr_time_ms = os::elapsedVTime() * 1000.0; 2341 2342 // (4) We check whether we should yield. If we have to, then we abort. 2343 if (SuspendibleThreadSet::should_yield()) { 2344 // We should yield. To do this we abort the task. The caller is 2345 // responsible for yielding. 2346 set_has_aborted(); 2347 return; 2348 } 2349 2350 // (5) We check whether we've reached our time quota. If we have, 2351 // then we abort. 2352 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2353 if (elapsed_time_ms > _time_target_ms) { 2354 set_has_aborted(); 2355 _has_timed_out = true; 2356 return; 2357 } 2358 2359 // (6) Finally, we check whether there are enough completed STAB 2360 // buffers available for processing. If there are, we abort. 2361 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2362 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2363 // we do need to process SATB buffers, we'll abort and restart 2364 // the marking task to do so 2365 set_has_aborted(); 2366 return; 2367 } 2368 } 2369 2370 void G1CMTask::recalculate_limits() { 2371 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2372 _words_scanned_limit = _real_words_scanned_limit; 2373 2374 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2375 _refs_reached_limit = _real_refs_reached_limit; 2376 } 2377 2378 void G1CMTask::decrease_limits() { 2379 // This is called when we believe that we're going to do an infrequent 2380 // operation which will increase the per byte scanned cost (i.e. move 2381 // entries to/from the global stack). It basically tries to decrease the 2382 // scanning limit so that the clock is called earlier. 2383 2384 _words_scanned_limit = _real_words_scanned_limit - 2385 3 * words_scanned_period / 4; 2386 _refs_reached_limit = _real_refs_reached_limit - 2387 3 * refs_reached_period / 4; 2388 } 2389 2390 void G1CMTask::move_entries_to_global_stack() { 2391 // Local array where we'll store the entries that will be popped 2392 // from the local queue. 2393 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2394 2395 size_t n = 0; 2396 G1TaskQueueEntry task_entry; 2397 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2398 buffer[n] = task_entry; 2399 ++n; 2400 } 2401 if (n < G1CMMarkStack::EntriesPerChunk) { 2402 buffer[n] = G1TaskQueueEntry(); 2403 } 2404 2405 if (n > 0) { 2406 if (!_cm->mark_stack_push(buffer)) { 2407 set_has_aborted(); 2408 } 2409 } 2410 2411 // This operation was quite expensive, so decrease the limits. 2412 decrease_limits(); 2413 } 2414 2415 bool G1CMTask::get_entries_from_global_stack() { 2416 // Local array where we'll store the entries that will be popped 2417 // from the global stack. 2418 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2419 2420 if (!_cm->mark_stack_pop(buffer)) { 2421 return false; 2422 } 2423 2424 // We did actually pop at least one entry. 2425 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2426 G1TaskQueueEntry task_entry = buffer[i]; 2427 if (task_entry.is_null()) { 2428 break; 2429 } 2430 assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2431 bool success = _task_queue->push(task_entry); 2432 // We only call this when the local queue is empty or under a 2433 // given target limit. So, we do not expect this push to fail. 2434 assert(success, "invariant"); 2435 } 2436 2437 // This operation was quite expensive, so decrease the limits 2438 decrease_limits(); 2439 return true; 2440 } 2441 2442 void G1CMTask::drain_local_queue(bool partially) { 2443 if (has_aborted()) { 2444 return; 2445 } 2446 2447 // Decide what the target size is, depending whether we're going to 2448 // drain it partially (so that other tasks can steal if they run out 2449 // of things to do) or totally (at the very end). 2450 size_t target_size; 2451 if (partially) { 2452 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2453 } else { 2454 target_size = 0; 2455 } 2456 2457 if (_task_queue->size() > target_size) { 2458 G1TaskQueueEntry entry; 2459 bool ret = _task_queue->pop_local(entry); 2460 while (ret) { 2461 scan_task_entry(entry); 2462 if (_task_queue->size() <= target_size || has_aborted()) { 2463 ret = false; 2464 } else { 2465 ret = _task_queue->pop_local(entry); 2466 } 2467 } 2468 } 2469 } 2470 2471 void G1CMTask::drain_global_stack(bool partially) { 2472 if (has_aborted()) return; 2473 2474 // We have a policy to drain the local queue before we attempt to 2475 // drain the global stack. 2476 assert(partially || _task_queue->size() == 0, "invariant"); 2477 2478 // Decide what the target size is, depending whether we're going to 2479 // drain it partially (so that other tasks can steal if they run out 2480 // of things to do) or totally (at the very end). 2481 // Notice that when draining the global mark stack partially, due to the racyness 2482 // of the mark stack size update we might in fact drop below the target. But, 2483 // this is not a problem. 2484 // In case of total draining, we simply process until the global mark stack is 2485 // totally empty, disregarding the size counter. 2486 if (partially) { 2487 size_t const target_size = _cm->partial_mark_stack_size_target(); 2488 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2489 if (get_entries_from_global_stack()) { 2490 drain_local_queue(partially); 2491 } 2492 } 2493 } else { 2494 while (!has_aborted() && get_entries_from_global_stack()) { 2495 drain_local_queue(partially); 2496 } 2497 } 2498 } 2499 2500 // SATB Queue has several assumptions on whether to call the par or 2501 // non-par versions of the methods. this is why some of the code is 2502 // replicated. We should really get rid of the single-threaded version 2503 // of the code to simplify things. 2504 void G1CMTask::drain_satb_buffers() { 2505 if (has_aborted()) return; 2506 2507 // We set this so that the regular clock knows that we're in the 2508 // middle of draining buffers and doesn't set the abort flag when it 2509 // notices that SATB buffers are available for draining. It'd be 2510 // very counter productive if it did that. :-) 2511 _draining_satb_buffers = true; 2512 2513 G1CMSATBBufferClosure satb_cl(this, _g1h); 2514 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2515 2516 // This keeps claiming and applying the closure to completed buffers 2517 // until we run out of buffers or we need to abort. 2518 while (!has_aborted() && 2519 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2520 regular_clock_call(); 2521 } 2522 2523 _draining_satb_buffers = false; 2524 2525 assert(has_aborted() || 2526 concurrent() || 2527 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2528 2529 // again, this was a potentially expensive operation, decrease the 2530 // limits to get the regular clock call early 2531 decrease_limits(); 2532 } 2533 2534 void G1CMTask::print_stats() { 2535 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2536 _worker_id, _calls); 2537 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2538 _elapsed_time_ms, _termination_time_ms); 2539 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2540 _step_times_ms.num(), _step_times_ms.avg(), 2541 _step_times_ms.sd()); 2542 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2543 _step_times_ms.maximum(), _step_times_ms.sum()); 2544 } 2545 2546 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2547 return _task_queues->steal(worker_id, hash_seed, task_entry); 2548 } 2549 2550 /***************************************************************************** 2551 2552 The do_marking_step(time_target_ms, ...) method is the building 2553 block of the parallel marking framework. It can be called in parallel 2554 with other invocations of do_marking_step() on different tasks 2555 (but only one per task, obviously) and concurrently with the 2556 mutator threads, or during remark, hence it eliminates the need 2557 for two versions of the code. When called during remark, it will 2558 pick up from where the task left off during the concurrent marking 2559 phase. Interestingly, tasks are also claimable during evacuation 2560 pauses too, since do_marking_step() ensures that it aborts before 2561 it needs to yield. 2562 2563 The data structures that it uses to do marking work are the 2564 following: 2565 2566 (1) Marking Bitmap. If there are gray objects that appear only 2567 on the bitmap (this happens either when dealing with an overflow 2568 or when the initial marking phase has simply marked the roots 2569 and didn't push them on the stack), then tasks claim heap 2570 regions whose bitmap they then scan to find gray objects. A 2571 global finger indicates where the end of the last claimed region 2572 is. A local finger indicates how far into the region a task has 2573 scanned. The two fingers are used to determine how to gray an 2574 object (i.e. whether simply marking it is OK, as it will be 2575 visited by a task in the future, or whether it needs to be also 2576 pushed on a stack). 2577 2578 (2) Local Queue. The local queue of the task which is accessed 2579 reasonably efficiently by the task. Other tasks can steal from 2580 it when they run out of work. Throughout the marking phase, a 2581 task attempts to keep its local queue short but not totally 2582 empty, so that entries are available for stealing by other 2583 tasks. Only when there is no more work, a task will totally 2584 drain its local queue. 2585 2586 (3) Global Mark Stack. This handles local queue overflow. During 2587 marking only sets of entries are moved between it and the local 2588 queues, as access to it requires a mutex and more fine-grain 2589 interaction with it which might cause contention. If it 2590 overflows, then the marking phase should restart and iterate 2591 over the bitmap to identify gray objects. Throughout the marking 2592 phase, tasks attempt to keep the global mark stack at a small 2593 length but not totally empty, so that entries are available for 2594 popping by other tasks. Only when there is no more work, tasks 2595 will totally drain the global mark stack. 2596 2597 (4) SATB Buffer Queue. This is where completed SATB buffers are 2598 made available. Buffers are regularly removed from this queue 2599 and scanned for roots, so that the queue doesn't get too 2600 long. During remark, all completed buffers are processed, as 2601 well as the filled in parts of any uncompleted buffers. 2602 2603 The do_marking_step() method tries to abort when the time target 2604 has been reached. There are a few other cases when the 2605 do_marking_step() method also aborts: 2606 2607 (1) When the marking phase has been aborted (after a Full GC). 2608 2609 (2) When a global overflow (on the global stack) has been 2610 triggered. Before the task aborts, it will actually sync up with 2611 the other tasks to ensure that all the marking data structures 2612 (local queues, stacks, fingers etc.) are re-initialized so that 2613 when do_marking_step() completes, the marking phase can 2614 immediately restart. 2615 2616 (3) When enough completed SATB buffers are available. The 2617 do_marking_step() method only tries to drain SATB buffers right 2618 at the beginning. So, if enough buffers are available, the 2619 marking step aborts and the SATB buffers are processed at 2620 the beginning of the next invocation. 2621 2622 (4) To yield. when we have to yield then we abort and yield 2623 right at the end of do_marking_step(). This saves us from a lot 2624 of hassle as, by yielding we might allow a Full GC. If this 2625 happens then objects will be compacted underneath our feet, the 2626 heap might shrink, etc. We save checking for this by just 2627 aborting and doing the yield right at the end. 2628 2629 From the above it follows that the do_marking_step() method should 2630 be called in a loop (or, otherwise, regularly) until it completes. 2631 2632 If a marking step completes without its has_aborted() flag being 2633 true, it means it has completed the current marking phase (and 2634 also all other marking tasks have done so and have all synced up). 2635 2636 A method called regular_clock_call() is invoked "regularly" (in 2637 sub ms intervals) throughout marking. It is this clock method that 2638 checks all the abort conditions which were mentioned above and 2639 decides when the task should abort. A work-based scheme is used to 2640 trigger this clock method: when the number of object words the 2641 marking phase has scanned or the number of references the marking 2642 phase has visited reach a given limit. Additional invocations to 2643 the method clock have been planted in a few other strategic places 2644 too. The initial reason for the clock method was to avoid calling 2645 vtime too regularly, as it is quite expensive. So, once it was in 2646 place, it was natural to piggy-back all the other conditions on it 2647 too and not constantly check them throughout the code. 2648 2649 If do_termination is true then do_marking_step will enter its 2650 termination protocol. 2651 2652 The value of is_serial must be true when do_marking_step is being 2653 called serially (i.e. by the VMThread) and do_marking_step should 2654 skip any synchronization in the termination and overflow code. 2655 Examples include the serial remark code and the serial reference 2656 processing closures. 2657 2658 The value of is_serial must be false when do_marking_step is 2659 being called by any of the worker threads in a work gang. 2660 Examples include the concurrent marking code (CMMarkingTask), 2661 the MT remark code, and the MT reference processing closures. 2662 2663 *****************************************************************************/ 2664 2665 void G1CMTask::do_marking_step(double time_target_ms, 2666 bool do_termination, 2667 bool is_serial) { 2668 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2669 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2670 2671 G1Policy* g1_policy = _g1h->g1_policy(); 2672 assert(_task_queues != NULL, "invariant"); 2673 assert(_task_queue != NULL, "invariant"); 2674 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2675 2676 assert(!_claimed, 2677 "only one thread should claim this task at any one time"); 2678 2679 // OK, this doesn't safeguard again all possible scenarios, as it is 2680 // possible for two threads to set the _claimed flag at the same 2681 // time. But it is only for debugging purposes anyway and it will 2682 // catch most problems. 2683 _claimed = true; 2684 2685 _start_time_ms = os::elapsedVTime() * 1000.0; 2686 2687 // If do_stealing is true then do_marking_step will attempt to 2688 // steal work from the other G1CMTasks. It only makes sense to 2689 // enable stealing when the termination protocol is enabled 2690 // and do_marking_step() is not being called serially. 2691 bool do_stealing = do_termination && !is_serial; 2692 2693 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2694 _time_target_ms = time_target_ms - diff_prediction_ms; 2695 2696 // set up the variables that are used in the work-based scheme to 2697 // call the regular clock method 2698 _words_scanned = 0; 2699 _refs_reached = 0; 2700 recalculate_limits(); 2701 2702 // clear all flags 2703 clear_has_aborted(); 2704 _has_timed_out = false; 2705 _draining_satb_buffers = false; 2706 2707 ++_calls; 2708 2709 // Set up the bitmap and oop closures. Anything that uses them is 2710 // eventually called from this method, so it is OK to allocate these 2711 // statically. 2712 G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 2713 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2714 set_cm_oop_closure(&cm_oop_closure); 2715 2716 if (_cm->has_overflown()) { 2717 // This can happen if the mark stack overflows during a GC pause 2718 // and this task, after a yield point, restarts. We have to abort 2719 // as we need to get into the overflow protocol which happens 2720 // right at the end of this task. 2721 set_has_aborted(); 2722 } 2723 2724 // First drain any available SATB buffers. After this, we will not 2725 // look at SATB buffers before the next invocation of this method. 2726 // If enough completed SATB buffers are queued up, the regular clock 2727 // will abort this task so that it restarts. 2728 drain_satb_buffers(); 2729 // ...then partially drain the local queue and the global stack 2730 drain_local_queue(true); 2731 drain_global_stack(true); 2732 2733 do { 2734 if (!has_aborted() && _curr_region != NULL) { 2735 // This means that we're already holding on to a region. 2736 assert(_finger != NULL, "if region is not NULL, then the finger " 2737 "should not be NULL either"); 2738 2739 // We might have restarted this task after an evacuation pause 2740 // which might have evacuated the region we're holding on to 2741 // underneath our feet. Let's read its limit again to make sure 2742 // that we do not iterate over a region of the heap that 2743 // contains garbage (update_region_limit() will also move 2744 // _finger to the start of the region if it is found empty). 2745 update_region_limit(); 2746 // We will start from _finger not from the start of the region, 2747 // as we might be restarting this task after aborting half-way 2748 // through scanning this region. In this case, _finger points to 2749 // the address where we last found a marked object. If this is a 2750 // fresh region, _finger points to start(). 2751 MemRegion mr = MemRegion(_finger, _region_limit); 2752 2753 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2754 "humongous regions should go around loop once only"); 2755 2756 // Some special cases: 2757 // If the memory region is empty, we can just give up the region. 2758 // If the current region is humongous then we only need to check 2759 // the bitmap for the bit associated with the start of the object, 2760 // scan the object if it's live, and give up the region. 2761 // Otherwise, let's iterate over the bitmap of the part of the region 2762 // that is left. 2763 // If the iteration is successful, give up the region. 2764 if (mr.is_empty()) { 2765 giveup_current_region(); 2766 regular_clock_call(); 2767 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2768 if (_nextMarkBitMap->isMarked(mr.start())) { 2769 // The object is marked - apply the closure 2770 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 2771 bitmap_closure.do_bit(offset); 2772 } 2773 // Even if this task aborted while scanning the humongous object 2774 // we can (and should) give up the current region. 2775 giveup_current_region(); 2776 regular_clock_call(); 2777 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2778 giveup_current_region(); 2779 regular_clock_call(); 2780 } else { 2781 assert(has_aborted(), "currently the only way to do so"); 2782 // The only way to abort the bitmap iteration is to return 2783 // false from the do_bit() method. However, inside the 2784 // do_bit() method we move the _finger to point to the 2785 // object currently being looked at. So, if we bail out, we 2786 // have definitely set _finger to something non-null. 2787 assert(_finger != NULL, "invariant"); 2788 2789 // Region iteration was actually aborted. So now _finger 2790 // points to the address of the object we last scanned. If we 2791 // leave it there, when we restart this task, we will rescan 2792 // the object. It is easy to avoid this. We move the finger by 2793 // enough to point to the next possible object header (the 2794 // bitmap knows by how much we need to move it as it knows its 2795 // granularity). 2796 assert(_finger < _region_limit, "invariant"); 2797 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 2798 // Check if bitmap iteration was aborted while scanning the last object 2799 if (new_finger >= _region_limit) { 2800 giveup_current_region(); 2801 } else { 2802 move_finger_to(new_finger); 2803 } 2804 } 2805 } 2806 // At this point we have either completed iterating over the 2807 // region we were holding on to, or we have aborted. 2808 2809 // We then partially drain the local queue and the global stack. 2810 // (Do we really need this?) 2811 drain_local_queue(true); 2812 drain_global_stack(true); 2813 2814 // Read the note on the claim_region() method on why it might 2815 // return NULL with potentially more regions available for 2816 // claiming and why we have to check out_of_regions() to determine 2817 // whether we're done or not. 2818 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2819 // We are going to try to claim a new region. We should have 2820 // given up on the previous one. 2821 // Separated the asserts so that we know which one fires. 2822 assert(_curr_region == NULL, "invariant"); 2823 assert(_finger == NULL, "invariant"); 2824 assert(_region_limit == NULL, "invariant"); 2825 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2826 if (claimed_region != NULL) { 2827 // Yes, we managed to claim one 2828 setup_for_region(claimed_region); 2829 assert(_curr_region == claimed_region, "invariant"); 2830 } 2831 // It is important to call the regular clock here. It might take 2832 // a while to claim a region if, for example, we hit a large 2833 // block of empty regions. So we need to call the regular clock 2834 // method once round the loop to make sure it's called 2835 // frequently enough. 2836 regular_clock_call(); 2837 } 2838 2839 if (!has_aborted() && _curr_region == NULL) { 2840 assert(_cm->out_of_regions(), 2841 "at this point we should be out of regions"); 2842 } 2843 } while ( _curr_region != NULL && !has_aborted()); 2844 2845 if (!has_aborted()) { 2846 // We cannot check whether the global stack is empty, since other 2847 // tasks might be pushing objects to it concurrently. 2848 assert(_cm->out_of_regions(), 2849 "at this point we should be out of regions"); 2850 // Try to reduce the number of available SATB buffers so that 2851 // remark has less work to do. 2852 drain_satb_buffers(); 2853 } 2854 2855 // Since we've done everything else, we can now totally drain the 2856 // local queue and global stack. 2857 drain_local_queue(false); 2858 drain_global_stack(false); 2859 2860 // Attempt at work stealing from other task's queues. 2861 if (do_stealing && !has_aborted()) { 2862 // We have not aborted. This means that we have finished all that 2863 // we could. Let's try to do some stealing... 2864 2865 // We cannot check whether the global stack is empty, since other 2866 // tasks might be pushing objects to it concurrently. 2867 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2868 "only way to reach here"); 2869 while (!has_aborted()) { 2870 G1TaskQueueEntry entry; 2871 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2872 scan_task_entry(entry); 2873 2874 // And since we're towards the end, let's totally drain the 2875 // local queue and global stack. 2876 drain_local_queue(false); 2877 drain_global_stack(false); 2878 } else { 2879 break; 2880 } 2881 } 2882 } 2883 2884 // We still haven't aborted. Now, let's try to get into the 2885 // termination protocol. 2886 if (do_termination && !has_aborted()) { 2887 // We cannot check whether the global stack is empty, since other 2888 // tasks might be concurrently pushing objects on it. 2889 // Separated the asserts so that we know which one fires. 2890 assert(_cm->out_of_regions(), "only way to reach here"); 2891 assert(_task_queue->size() == 0, "only way to reach here"); 2892 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2893 2894 // The G1CMTask class also extends the TerminatorTerminator class, 2895 // hence its should_exit_termination() method will also decide 2896 // whether to exit the termination protocol or not. 2897 bool finished = (is_serial || 2898 _cm->terminator()->offer_termination(this)); 2899 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2900 _termination_time_ms += 2901 termination_end_time_ms - _termination_start_time_ms; 2902 2903 if (finished) { 2904 // We're all done. 2905 2906 if (_worker_id == 0) { 2907 // let's allow task 0 to do this 2908 if (concurrent()) { 2909 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2910 // we need to set this to false before the next 2911 // safepoint. This way we ensure that the marking phase 2912 // doesn't observe any more heap expansions. 2913 _cm->clear_concurrent_marking_in_progress(); 2914 } 2915 } 2916 2917 // We can now guarantee that the global stack is empty, since 2918 // all other tasks have finished. We separated the guarantees so 2919 // that, if a condition is false, we can immediately find out 2920 // which one. 2921 guarantee(_cm->out_of_regions(), "only way to reach here"); 2922 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2923 guarantee(_task_queue->size() == 0, "only way to reach here"); 2924 guarantee(!_cm->has_overflown(), "only way to reach here"); 2925 } else { 2926 // Apparently there's more work to do. Let's abort this task. It 2927 // will restart it and we can hopefully find more things to do. 2928 set_has_aborted(); 2929 } 2930 } 2931 2932 // Mainly for debugging purposes to make sure that a pointer to the 2933 // closure which was statically allocated in this frame doesn't 2934 // escape it by accident. 2935 set_cm_oop_closure(NULL); 2936 double end_time_ms = os::elapsedVTime() * 1000.0; 2937 double elapsed_time_ms = end_time_ms - _start_time_ms; 2938 // Update the step history. 2939 _step_times_ms.add(elapsed_time_ms); 2940 2941 if (has_aborted()) { 2942 // The task was aborted for some reason. 2943 if (_has_timed_out) { 2944 double diff_ms = elapsed_time_ms - _time_target_ms; 2945 // Keep statistics of how well we did with respect to hitting 2946 // our target only if we actually timed out (if we aborted for 2947 // other reasons, then the results might get skewed). 2948 _marking_step_diffs_ms.add(diff_ms); 2949 } 2950 2951 if (_cm->has_overflown()) { 2952 // This is the interesting one. We aborted because a global 2953 // overflow was raised. This means we have to restart the 2954 // marking phase and start iterating over regions. However, in 2955 // order to do this we have to make sure that all tasks stop 2956 // what they are doing and re-initialize in a safe manner. We 2957 // will achieve this with the use of two barrier sync points. 2958 2959 if (!is_serial) { 2960 // We only need to enter the sync barrier if being called 2961 // from a parallel context 2962 _cm->enter_first_sync_barrier(_worker_id); 2963 2964 // When we exit this sync barrier we know that all tasks have 2965 // stopped doing marking work. So, it's now safe to 2966 // re-initialize our data structures. At the end of this method, 2967 // task 0 will clear the global data structures. 2968 } 2969 2970 // We clear the local state of this task... 2971 clear_region_fields(); 2972 2973 if (!is_serial) { 2974 // ...and enter the second barrier. 2975 _cm->enter_second_sync_barrier(_worker_id); 2976 } 2977 // At this point, if we're during the concurrent phase of 2978 // marking, everything has been re-initialized and we're 2979 // ready to restart. 2980 } 2981 } 2982 2983 _claimed = false; 2984 } 2985 2986 G1CMTask::G1CMTask(uint worker_id, 2987 G1ConcurrentMark* cm, 2988 G1CMTaskQueue* task_queue, 2989 G1CMTaskQueueSet* task_queues) 2990 : _g1h(G1CollectedHeap::heap()), 2991 _worker_id(worker_id), _cm(cm), 2992 _objArray_processor(this), 2993 _claimed(false), 2994 _nextMarkBitMap(NULL), _hash_seed(17), 2995 _task_queue(task_queue), 2996 _task_queues(task_queues), 2997 _cm_oop_closure(NULL) { 2998 guarantee(task_queue != NULL, "invariant"); 2999 guarantee(task_queues != NULL, "invariant"); 3000 3001 _marking_step_diffs_ms.add(0.5); 3002 } 3003 3004 // These are formatting macros that are used below to ensure 3005 // consistent formatting. The *_H_* versions are used to format the 3006 // header for a particular value and they should be kept consistent 3007 // with the corresponding macro. Also note that most of the macros add 3008 // the necessary white space (as a prefix) which makes them a bit 3009 // easier to compose. 3010 3011 // All the output lines are prefixed with this string to be able to 3012 // identify them easily in a large log file. 3013 #define G1PPRL_LINE_PREFIX "###" 3014 3015 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3016 #ifdef _LP64 3017 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3018 #else // _LP64 3019 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3020 #endif // _LP64 3021 3022 // For per-region info 3023 #define G1PPRL_TYPE_FORMAT " %-4s" 3024 #define G1PPRL_TYPE_H_FORMAT " %4s" 3025 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3026 #define G1PPRL_BYTE_H_FORMAT " %9s" 3027 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3028 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3029 3030 // For summary info 3031 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3032 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3033 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3034 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3035 3036 G1PrintRegionLivenessInfoClosure:: 3037 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3038 : _total_used_bytes(0), _total_capacity_bytes(0), 3039 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3040 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3041 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3042 MemRegion g1_reserved = g1h->g1_reserved(); 3043 double now = os::elapsedTime(); 3044 3045 // Print the header of the output. 3046 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3047 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3048 G1PPRL_SUM_ADDR_FORMAT("reserved") 3049 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3050 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3051 HeapRegion::GrainBytes); 3052 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3053 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3054 G1PPRL_TYPE_H_FORMAT 3055 G1PPRL_ADDR_BASE_H_FORMAT 3056 G1PPRL_BYTE_H_FORMAT 3057 G1PPRL_BYTE_H_FORMAT 3058 G1PPRL_BYTE_H_FORMAT 3059 G1PPRL_DOUBLE_H_FORMAT 3060 G1PPRL_BYTE_H_FORMAT 3061 G1PPRL_BYTE_H_FORMAT, 3062 "type", "address-range", 3063 "used", "prev-live", "next-live", "gc-eff", 3064 "remset", "code-roots"); 3065 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3066 G1PPRL_TYPE_H_FORMAT 3067 G1PPRL_ADDR_BASE_H_FORMAT 3068 G1PPRL_BYTE_H_FORMAT 3069 G1PPRL_BYTE_H_FORMAT 3070 G1PPRL_BYTE_H_FORMAT 3071 G1PPRL_DOUBLE_H_FORMAT 3072 G1PPRL_BYTE_H_FORMAT 3073 G1PPRL_BYTE_H_FORMAT, 3074 "", "", 3075 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3076 "(bytes)", "(bytes)"); 3077 } 3078 3079 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3080 const char* type = r->get_type_str(); 3081 HeapWord* bottom = r->bottom(); 3082 HeapWord* end = r->end(); 3083 size_t capacity_bytes = r->capacity(); 3084 size_t used_bytes = r->used(); 3085 size_t prev_live_bytes = r->live_bytes(); 3086 size_t next_live_bytes = r->next_live_bytes(); 3087 double gc_eff = r->gc_efficiency(); 3088 size_t remset_bytes = r->rem_set()->mem_size(); 3089 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3090 3091 _total_used_bytes += used_bytes; 3092 _total_capacity_bytes += capacity_bytes; 3093 _total_prev_live_bytes += prev_live_bytes; 3094 _total_next_live_bytes += next_live_bytes; 3095 _total_remset_bytes += remset_bytes; 3096 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3097 3098 // Print a line for this particular region. 3099 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3100 G1PPRL_TYPE_FORMAT 3101 G1PPRL_ADDR_BASE_FORMAT 3102 G1PPRL_BYTE_FORMAT 3103 G1PPRL_BYTE_FORMAT 3104 G1PPRL_BYTE_FORMAT 3105 G1PPRL_DOUBLE_FORMAT 3106 G1PPRL_BYTE_FORMAT 3107 G1PPRL_BYTE_FORMAT, 3108 type, p2i(bottom), p2i(end), 3109 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3110 remset_bytes, strong_code_roots_bytes); 3111 3112 return false; 3113 } 3114 3115 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3116 // add static memory usages to remembered set sizes 3117 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3118 // Print the footer of the output. 3119 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3120 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3121 " SUMMARY" 3122 G1PPRL_SUM_MB_FORMAT("capacity") 3123 G1PPRL_SUM_MB_PERC_FORMAT("used") 3124 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3125 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3126 G1PPRL_SUM_MB_FORMAT("remset") 3127 G1PPRL_SUM_MB_FORMAT("code-roots"), 3128 bytes_to_mb(_total_capacity_bytes), 3129 bytes_to_mb(_total_used_bytes), 3130 perc(_total_used_bytes, _total_capacity_bytes), 3131 bytes_to_mb(_total_prev_live_bytes), 3132 perc(_total_prev_live_bytes, _total_capacity_bytes), 3133 bytes_to_mb(_total_next_live_bytes), 3134 perc(_total_next_live_bytes, _total_capacity_bytes), 3135 bytes_to_mb(_total_remset_bytes), 3136 bytes_to_mb(_total_strong_code_roots_bytes)); 3137 }