1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 #include "utilities/growableArray.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 G1CMBitMapRO::G1CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = (HeapWord*)align_size_up((intptr_t)addr, 75 HeapWordSize << _shifter); 76 size_t addrOffset = heapWordToOffset(addr); 77 assert(limit != NULL, "limit must not be NULL"); 78 size_t limitOffset = heapWordToOffset(limit); 79 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 80 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 81 assert(nextAddr >= addr, "get_next_one postcondition"); 82 assert(nextAddr == limit || isMarked(nextAddr), 83 "get_next_one postcondition"); 84 return nextAddr; 85 } 86 87 #ifndef PRODUCT 88 bool G1CMBitMapRO::covers(MemRegion heap_rs) const { 89 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 90 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 91 "size inconsistency"); 92 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 93 _bmWordSize == heap_rs.word_size(); 94 } 95 #endif 96 97 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 98 _bm.print_on_error(st, prefix); 99 } 100 101 size_t G1CMBitMap::compute_size(size_t heap_size) { 102 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 103 } 104 105 size_t G1CMBitMap::mark_distance() { 106 return MinObjAlignmentInBytes * BitsPerByte; 107 } 108 109 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 110 _bmStartWord = heap.start(); 111 _bmWordSize = heap.word_size(); 112 113 _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter); 114 115 storage->set_mapping_changed_listener(&_listener); 116 } 117 118 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 119 if (zero_filled) { 120 return; 121 } 122 // We need to clear the bitmap on commit, removing any existing information. 123 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 124 _bm->clear_range(mr); 125 } 126 127 void G1CMBitMap::clear_range(MemRegion mr) { 128 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 129 assert(!mr.is_empty(), "unexpected empty region"); 130 // convert address range into offset range 131 _bm.at_put_range(heapWordToOffset(mr.start()), 132 heapWordToOffset(mr.end()), false); 133 } 134 135 G1CMMarkStack::G1CMMarkStack() : 136 _max_chunk_capacity(0), 137 _base(NULL), 138 _chunk_capacity(0), 139 _should_expand(false) { 140 set_empty(); 141 } 142 143 bool G1CMMarkStack::resize(size_t new_capacity) { 144 assert(is_empty(), "Only resize when stack is empty."); 145 assert(new_capacity <= _max_chunk_capacity, 146 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 147 148 OopChunk* new_base = MmapArrayAllocator<OopChunk, mtGC>::allocate_or_null(new_capacity); 149 150 if (new_base == NULL) { 151 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk)); 152 return false; 153 } 154 // Release old mapping. 155 if (_base != NULL) { 156 MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity); 157 } 158 159 _base = new_base; 160 _chunk_capacity = new_capacity; 161 set_empty(); 162 _should_expand = false; 163 164 return true; 165 } 166 167 size_t G1CMMarkStack::capacity_alignment() { 168 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*); 169 } 170 171 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 172 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 173 174 size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*); 175 176 _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; 177 size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; 178 179 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 180 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 181 _max_chunk_capacity, 182 initial_chunk_capacity); 183 184 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 185 initial_chunk_capacity, _max_chunk_capacity); 186 187 return resize(initial_chunk_capacity); 188 } 189 190 void G1CMMarkStack::expand() { 191 // Clear expansion flag 192 _should_expand = false; 193 194 if (_chunk_capacity == _max_chunk_capacity) { 195 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 196 return; 197 } 198 size_t old_capacity = _chunk_capacity; 199 // Double capacity if possible 200 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 201 202 if (resize(new_capacity)) { 203 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 204 old_capacity, new_capacity); 205 } else { 206 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 207 old_capacity, new_capacity); 208 } 209 } 210 211 G1CMMarkStack::~G1CMMarkStack() { 212 if (_base != NULL) { 213 MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity); 214 } 215 } 216 217 void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) { 218 elem->next = *list; 219 *list = elem; 220 } 221 222 void G1CMMarkStack::add_chunk_to_chunk_list(OopChunk* elem) { 223 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 224 add_chunk_to_list(&_chunk_list, elem); 225 _chunks_in_chunk_list++; 226 } 227 228 void G1CMMarkStack::add_chunk_to_free_list(OopChunk* elem) { 229 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 230 add_chunk_to_list(&_free_list, elem); 231 } 232 233 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) { 234 OopChunk* result = *list; 235 if (result != NULL) { 236 *list = (*list)->next; 237 } 238 return result; 239 } 240 241 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 242 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 243 OopChunk* result = remove_chunk_from_list(&_chunk_list); 244 if (result != NULL) { 245 _chunks_in_chunk_list--; 246 } 247 return result; 248 } 249 250 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_free_list() { 251 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 252 return remove_chunk_from_list(&_free_list); 253 } 254 255 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() { 256 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 257 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 258 // wraparound of _hwm. 259 if (_hwm >= _chunk_capacity) { 260 return NULL; 261 } 262 263 size_t cur_idx = Atomic::add(1, &_hwm) - 1; 264 if (cur_idx >= _chunk_capacity) { 265 return NULL; 266 } 267 268 OopChunk* result = ::new (&_base[cur_idx]) OopChunk; 269 result->next = NULL; 270 return result; 271 } 272 273 bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) { 274 // Get a new chunk. 275 OopChunk* new_chunk = remove_chunk_from_free_list(); 276 277 if (new_chunk == NULL) { 278 // Did not get a chunk from the free list. Allocate from backing memory. 279 new_chunk = allocate_new_chunk(); 280 281 if (new_chunk == NULL) { 282 return false; 283 } 284 } 285 286 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, OopsPerChunk * sizeof(oop)); 287 288 add_chunk_to_chunk_list(new_chunk); 289 290 return true; 291 } 292 293 bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) { 294 OopChunk* cur = remove_chunk_from_chunk_list(); 295 296 if (cur == NULL) { 297 return false; 298 } 299 300 Copy::conjoint_memory_atomic(cur->data, ptr_arr, OopsPerChunk * sizeof(oop)); 301 302 add_chunk_to_free_list(cur); 303 return true; 304 } 305 306 void G1CMMarkStack::set_empty() { 307 _chunks_in_chunk_list = 0; 308 _hwm = 0; 309 _chunk_list = NULL; 310 _free_list = NULL; 311 } 312 313 G1CMRootRegions::G1CMRootRegions() : 314 _cm(NULL), _scan_in_progress(false), 315 _should_abort(false), _claimed_survivor_index(0) { } 316 317 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 318 _survivors = survivors; 319 _cm = cm; 320 } 321 322 void G1CMRootRegions::prepare_for_scan() { 323 assert(!scan_in_progress(), "pre-condition"); 324 325 // Currently, only survivors can be root regions. 326 _claimed_survivor_index = 0; 327 _scan_in_progress = _survivors->regions()->is_nonempty(); 328 _should_abort = false; 329 } 330 331 HeapRegion* G1CMRootRegions::claim_next() { 332 if (_should_abort) { 333 // If someone has set the should_abort flag, we return NULL to 334 // force the caller to bail out of their loop. 335 return NULL; 336 } 337 338 // Currently, only survivors can be root regions. 339 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 340 341 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 342 if (claimed_index < survivor_regions->length()) { 343 return survivor_regions->at(claimed_index); 344 } 345 return NULL; 346 } 347 348 uint G1CMRootRegions::num_root_regions() const { 349 return (uint)_survivors->regions()->length(); 350 } 351 352 void G1CMRootRegions::notify_scan_done() { 353 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 354 _scan_in_progress = false; 355 RootRegionScan_lock->notify_all(); 356 } 357 358 void G1CMRootRegions::cancel_scan() { 359 notify_scan_done(); 360 } 361 362 void G1CMRootRegions::scan_finished() { 363 assert(scan_in_progress(), "pre-condition"); 364 365 // Currently, only survivors can be root regions. 366 if (!_should_abort) { 367 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 368 assert((uint)_claimed_survivor_index >= _survivors->length(), 369 "we should have claimed all survivors, claimed index = %u, length = %u", 370 (uint)_claimed_survivor_index, _survivors->length()); 371 } 372 373 notify_scan_done(); 374 } 375 376 bool G1CMRootRegions::wait_until_scan_finished() { 377 if (!scan_in_progress()) return false; 378 379 { 380 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 381 while (scan_in_progress()) { 382 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 383 } 384 } 385 return true; 386 } 387 388 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 389 return MAX2((n_par_threads + 2) / 4, 1U); 390 } 391 392 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 393 _g1h(g1h), 394 _markBitMap1(), 395 _markBitMap2(), 396 _parallel_marking_threads(0), 397 _max_parallel_marking_threads(0), 398 _sleep_factor(0.0), 399 _marking_task_overhead(1.0), 400 _cleanup_list("Cleanup List"), 401 402 _prevMarkBitMap(&_markBitMap1), 403 _nextMarkBitMap(&_markBitMap2), 404 405 _global_mark_stack(), 406 // _finger set in set_non_marking_state 407 408 _max_worker_id(ParallelGCThreads), 409 // _active_tasks set in set_non_marking_state 410 // _tasks set inside the constructor 411 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 412 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 413 414 _has_overflown(false), 415 _concurrent(false), 416 _has_aborted(false), 417 _restart_for_overflow(false), 418 _concurrent_marking_in_progress(false), 419 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 420 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 421 422 // _verbose_level set below 423 424 _init_times(), 425 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 426 _cleanup_times(), 427 _total_counting_time(0.0), 428 _total_rs_scrub_time(0.0), 429 430 _parallel_workers(NULL), 431 432 _completed_initialization(false) { 433 434 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 435 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 436 437 // Create & start a ConcurrentMark thread. 438 _cmThread = new ConcurrentMarkThread(this); 439 assert(cmThread() != NULL, "CM Thread should have been created"); 440 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 441 if (_cmThread->osthread() == NULL) { 442 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 443 } 444 445 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 446 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 447 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 448 449 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 450 satb_qs.set_buffer_size(G1SATBBufferSize); 451 452 _root_regions.init(_g1h->survivor(), this); 453 454 if (ConcGCThreads > ParallelGCThreads) { 455 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 456 ConcGCThreads, ParallelGCThreads); 457 return; 458 } 459 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 460 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 461 // if both are set 462 _sleep_factor = 0.0; 463 _marking_task_overhead = 1.0; 464 } else if (G1MarkingOverheadPercent > 0) { 465 // We will calculate the number of parallel marking threads based 466 // on a target overhead with respect to the soft real-time goal 467 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 468 double overall_cm_overhead = 469 (double) MaxGCPauseMillis * marking_overhead / 470 (double) GCPauseIntervalMillis; 471 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 472 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 473 double marking_task_overhead = 474 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 475 double sleep_factor = 476 (1.0 - marking_task_overhead) / marking_task_overhead; 477 478 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 479 _sleep_factor = sleep_factor; 480 _marking_task_overhead = marking_task_overhead; 481 } else { 482 // Calculate the number of parallel marking threads by scaling 483 // the number of parallel GC threads. 484 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 485 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 486 _sleep_factor = 0.0; 487 _marking_task_overhead = 1.0; 488 } 489 490 assert(ConcGCThreads > 0, "Should have been set"); 491 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 492 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 493 _parallel_marking_threads = ConcGCThreads; 494 _max_parallel_marking_threads = _parallel_marking_threads; 495 496 _parallel_workers = new WorkGang("G1 Marker", 497 _max_parallel_marking_threads, false, true); 498 if (_parallel_workers == NULL) { 499 vm_exit_during_initialization("Failed necessary allocation."); 500 } else { 501 _parallel_workers->initialize_workers(); 502 } 503 504 if (FLAG_IS_DEFAULT(MarkStackSize)) { 505 size_t mark_stack_size = 506 MIN2(MarkStackSizeMax, 507 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 508 // Verify that the calculated value for MarkStackSize is in range. 509 // It would be nice to use the private utility routine from Arguments. 510 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 511 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 512 "must be between 1 and " SIZE_FORMAT, 513 mark_stack_size, MarkStackSizeMax); 514 return; 515 } 516 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 517 } else { 518 // Verify MarkStackSize is in range. 519 if (FLAG_IS_CMDLINE(MarkStackSize)) { 520 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 521 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 522 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 523 "must be between 1 and " SIZE_FORMAT, 524 MarkStackSize, MarkStackSizeMax); 525 return; 526 } 527 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 528 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 529 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 530 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 531 MarkStackSize, MarkStackSizeMax); 532 return; 533 } 534 } 535 } 536 } 537 538 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 539 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 540 } 541 542 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 543 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 544 545 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 546 _active_tasks = _max_worker_id; 547 548 for (uint i = 0; i < _max_worker_id; ++i) { 549 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 550 task_queue->initialize(); 551 _task_queues->register_queue(i, task_queue); 552 553 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 554 555 _accum_task_vtime[i] = 0.0; 556 } 557 558 // so that the call below can read a sensible value 559 _heap_start = g1h->reserved_region().start(); 560 set_non_marking_state(); 561 _completed_initialization = true; 562 } 563 564 void G1ConcurrentMark::reset() { 565 // Starting values for these two. This should be called in a STW 566 // phase. 567 MemRegion reserved = _g1h->g1_reserved(); 568 _heap_start = reserved.start(); 569 _heap_end = reserved.end(); 570 571 // Separated the asserts so that we know which one fires. 572 assert(_heap_start != NULL, "heap bounds should look ok"); 573 assert(_heap_end != NULL, "heap bounds should look ok"); 574 assert(_heap_start < _heap_end, "heap bounds should look ok"); 575 576 // Reset all the marking data structures and any necessary flags 577 reset_marking_state(); 578 579 // We do reset all of them, since different phases will use 580 // different number of active threads. So, it's easiest to have all 581 // of them ready. 582 for (uint i = 0; i < _max_worker_id; ++i) { 583 _tasks[i]->reset(_nextMarkBitMap); 584 } 585 586 // we need this to make sure that the flag is on during the evac 587 // pause with initial mark piggy-backed 588 set_concurrent_marking_in_progress(); 589 } 590 591 592 void G1ConcurrentMark::reset_marking_state() { 593 _global_mark_stack.set_should_expand(has_overflown()); 594 _global_mark_stack.set_empty(); 595 clear_has_overflown(); 596 _finger = _heap_start; 597 598 for (uint i = 0; i < _max_worker_id; ++i) { 599 G1CMTaskQueue* queue = _task_queues->queue(i); 600 queue->set_empty(); 601 } 602 } 603 604 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 605 assert(active_tasks <= _max_worker_id, "we should not have more"); 606 607 _active_tasks = active_tasks; 608 // Need to update the three data structures below according to the 609 // number of active threads for this phase. 610 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 611 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 612 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 613 } 614 615 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 616 set_concurrency(active_tasks); 617 618 _concurrent = concurrent; 619 // We propagate this to all tasks, not just the active ones. 620 for (uint i = 0; i < _max_worker_id; ++i) 621 _tasks[i]->set_concurrent(concurrent); 622 623 if (concurrent) { 624 set_concurrent_marking_in_progress(); 625 } else { 626 // We currently assume that the concurrent flag has been set to 627 // false before we start remark. At this point we should also be 628 // in a STW phase. 629 assert(!concurrent_marking_in_progress(), "invariant"); 630 assert(out_of_regions(), 631 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 632 p2i(_finger), p2i(_heap_end)); 633 } 634 } 635 636 void G1ConcurrentMark::set_non_marking_state() { 637 // We set the global marking state to some default values when we're 638 // not doing marking. 639 reset_marking_state(); 640 _active_tasks = 0; 641 clear_concurrent_marking_in_progress(); 642 } 643 644 G1ConcurrentMark::~G1ConcurrentMark() { 645 // The G1ConcurrentMark instance is never freed. 646 ShouldNotReachHere(); 647 } 648 649 class G1ClearBitMapTask : public AbstractGangTask { 650 public: 651 static size_t chunk_size() { return M; } 652 653 private: 654 // Heap region closure used for clearing the given mark bitmap. 655 class G1ClearBitmapHRClosure : public HeapRegionClosure { 656 private: 657 G1CMBitMap* _bitmap; 658 G1ConcurrentMark* _cm; 659 public: 660 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 661 } 662 663 virtual bool doHeapRegion(HeapRegion* r) { 664 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 665 666 HeapWord* cur = r->bottom(); 667 HeapWord* const end = r->end(); 668 669 while (cur < end) { 670 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 671 _bitmap->clear_range(mr); 672 673 cur += chunk_size_in_words; 674 675 // Abort iteration if after yielding the marking has been aborted. 676 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 677 return true; 678 } 679 // Repeat the asserts from before the start of the closure. We will do them 680 // as asserts here to minimize their overhead on the product. However, we 681 // will have them as guarantees at the beginning / end of the bitmap 682 // clearing to get some checking in the product. 683 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 684 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 685 } 686 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 687 688 return false; 689 } 690 }; 691 692 G1ClearBitmapHRClosure _cl; 693 HeapRegionClaimer _hr_claimer; 694 bool _suspendible; // If the task is suspendible, workers must join the STS. 695 696 public: 697 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 698 AbstractGangTask("G1 Clear Bitmap"), 699 _cl(bitmap, suspendible ? cm : NULL), 700 _hr_claimer(n_workers), 701 _suspendible(suspendible) 702 { } 703 704 void work(uint worker_id) { 705 SuspendibleThreadSetJoiner sts_join(_suspendible); 706 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true); 707 } 708 709 bool is_complete() { 710 return _cl.complete(); 711 } 712 }; 713 714 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 715 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 716 717 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 718 size_t const num_chunks = align_size_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 719 720 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 721 722 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 723 724 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 725 workers->run_task(&cl, num_workers); 726 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 727 } 728 729 void G1ConcurrentMark::cleanup_for_next_mark() { 730 // Make sure that the concurrent mark thread looks to still be in 731 // the current cycle. 732 guarantee(cmThread()->during_cycle(), "invariant"); 733 734 // We are finishing up the current cycle by clearing the next 735 // marking bitmap and getting it ready for the next cycle. During 736 // this time no other cycle can start. So, let's make sure that this 737 // is the case. 738 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 739 740 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 741 742 // Clear the live count data. If the marking has been aborted, the abort() 743 // call already did that. 744 if (!has_aborted()) { 745 clear_live_data(_parallel_workers); 746 DEBUG_ONLY(verify_live_data_clear()); 747 } 748 749 // Repeat the asserts from above. 750 guarantee(cmThread()->during_cycle(), "invariant"); 751 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 752 } 753 754 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 755 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 756 clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false); 757 } 758 759 class CheckBitmapClearHRClosure : public HeapRegionClosure { 760 G1CMBitMap* _bitmap; 761 bool _error; 762 public: 763 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 764 } 765 766 virtual bool doHeapRegion(HeapRegion* r) { 767 // This closure can be called concurrently to the mutator, so we must make sure 768 // that the result of the getNextMarkedWordAddress() call is compared to the 769 // value passed to it as limit to detect any found bits. 770 // end never changes in G1. 771 HeapWord* end = r->end(); 772 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 773 } 774 }; 775 776 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 777 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 778 _g1h->heap_region_iterate(&cl); 779 return cl.complete(); 780 } 781 782 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 783 public: 784 bool doHeapRegion(HeapRegion* r) { 785 r->note_start_of_marking(); 786 return false; 787 } 788 }; 789 790 void G1ConcurrentMark::checkpointRootsInitialPre() { 791 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 792 G1Policy* g1p = g1h->g1_policy(); 793 794 _has_aborted = false; 795 796 // Initialize marking structures. This has to be done in a STW phase. 797 reset(); 798 799 // For each region note start of marking. 800 NoteStartOfMarkHRClosure startcl; 801 g1h->heap_region_iterate(&startcl); 802 } 803 804 805 void G1ConcurrentMark::checkpointRootsInitialPost() { 806 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 807 808 // Start Concurrent Marking weak-reference discovery. 809 ReferenceProcessor* rp = g1h->ref_processor_cm(); 810 // enable ("weak") refs discovery 811 rp->enable_discovery(); 812 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 813 814 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 815 // This is the start of the marking cycle, we're expected all 816 // threads to have SATB queues with active set to false. 817 satb_mq_set.set_active_all_threads(true, /* new active value */ 818 false /* expected_active */); 819 820 _root_regions.prepare_for_scan(); 821 822 // update_g1_committed() will be called at the end of an evac pause 823 // when marking is on. So, it's also called at the end of the 824 // initial-mark pause to update the heap end, if the heap expands 825 // during it. No need to call it here. 826 } 827 828 /* 829 * Notice that in the next two methods, we actually leave the STS 830 * during the barrier sync and join it immediately afterwards. If we 831 * do not do this, the following deadlock can occur: one thread could 832 * be in the barrier sync code, waiting for the other thread to also 833 * sync up, whereas another one could be trying to yield, while also 834 * waiting for the other threads to sync up too. 835 * 836 * Note, however, that this code is also used during remark and in 837 * this case we should not attempt to leave / enter the STS, otherwise 838 * we'll either hit an assert (debug / fastdebug) or deadlock 839 * (product). So we should only leave / enter the STS if we are 840 * operating concurrently. 841 * 842 * Because the thread that does the sync barrier has left the STS, it 843 * is possible to be suspended for a Full GC or an evacuation pause 844 * could occur. This is actually safe, since the entering the sync 845 * barrier is one of the last things do_marking_step() does, and it 846 * doesn't manipulate any data structures afterwards. 847 */ 848 849 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 850 bool barrier_aborted; 851 { 852 SuspendibleThreadSetLeaver sts_leave(concurrent()); 853 barrier_aborted = !_first_overflow_barrier_sync.enter(); 854 } 855 856 // at this point everyone should have synced up and not be doing any 857 // more work 858 859 if (barrier_aborted) { 860 // If the barrier aborted we ignore the overflow condition and 861 // just abort the whole marking phase as quickly as possible. 862 return; 863 } 864 865 // If we're executing the concurrent phase of marking, reset the marking 866 // state; otherwise the marking state is reset after reference processing, 867 // during the remark pause. 868 // If we reset here as a result of an overflow during the remark we will 869 // see assertion failures from any subsequent set_concurrency_and_phase() 870 // calls. 871 if (concurrent()) { 872 // let the task associated with with worker 0 do this 873 if (worker_id == 0) { 874 // task 0 is responsible for clearing the global data structures 875 // We should be here because of an overflow. During STW we should 876 // not clear the overflow flag since we rely on it being true when 877 // we exit this method to abort the pause and restart concurrent 878 // marking. 879 reset_marking_state(); 880 881 log_info(gc, marking)("Concurrent Mark reset for overflow"); 882 } 883 } 884 885 // after this, each task should reset its own data structures then 886 // then go into the second barrier 887 } 888 889 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 890 SuspendibleThreadSetLeaver sts_leave(concurrent()); 891 _second_overflow_barrier_sync.enter(); 892 893 // at this point everything should be re-initialized and ready to go 894 } 895 896 class G1CMConcurrentMarkingTask: public AbstractGangTask { 897 private: 898 G1ConcurrentMark* _cm; 899 ConcurrentMarkThread* _cmt; 900 901 public: 902 void work(uint worker_id) { 903 assert(Thread::current()->is_ConcurrentGC_thread(), 904 "this should only be done by a conc GC thread"); 905 ResourceMark rm; 906 907 double start_vtime = os::elapsedVTime(); 908 909 { 910 SuspendibleThreadSetJoiner sts_join; 911 912 assert(worker_id < _cm->active_tasks(), "invariant"); 913 G1CMTask* the_task = _cm->task(worker_id); 914 the_task->record_start_time(); 915 if (!_cm->has_aborted()) { 916 do { 917 double start_vtime_sec = os::elapsedVTime(); 918 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 919 920 the_task->do_marking_step(mark_step_duration_ms, 921 true /* do_termination */, 922 false /* is_serial*/); 923 924 double end_vtime_sec = os::elapsedVTime(); 925 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 926 _cm->clear_has_overflown(); 927 928 _cm->do_yield_check(); 929 930 jlong sleep_time_ms; 931 if (!_cm->has_aborted() && the_task->has_aborted()) { 932 sleep_time_ms = 933 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 934 { 935 SuspendibleThreadSetLeaver sts_leave; 936 os::sleep(Thread::current(), sleep_time_ms, false); 937 } 938 } 939 } while (!_cm->has_aborted() && the_task->has_aborted()); 940 } 941 the_task->record_end_time(); 942 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 943 } 944 945 double end_vtime = os::elapsedVTime(); 946 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 947 } 948 949 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 950 ConcurrentMarkThread* cmt) : 951 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 952 953 ~G1CMConcurrentMarkingTask() { } 954 }; 955 956 // Calculates the number of active workers for a concurrent 957 // phase. 958 uint G1ConcurrentMark::calc_parallel_marking_threads() { 959 uint n_conc_workers = 0; 960 if (!UseDynamicNumberOfGCThreads || 961 (!FLAG_IS_DEFAULT(ConcGCThreads) && 962 !ForceDynamicNumberOfGCThreads)) { 963 n_conc_workers = max_parallel_marking_threads(); 964 } else { 965 n_conc_workers = 966 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 967 1, /* Minimum workers */ 968 parallel_marking_threads(), 969 Threads::number_of_non_daemon_threads()); 970 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 971 // that scaling has already gone into "_max_parallel_marking_threads". 972 } 973 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 974 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 975 max_parallel_marking_threads(), n_conc_workers); 976 return n_conc_workers; 977 } 978 979 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 980 // Currently, only survivors can be root regions. 981 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 982 G1RootRegionScanClosure cl(_g1h, this); 983 984 const uintx interval = PrefetchScanIntervalInBytes; 985 HeapWord* curr = hr->bottom(); 986 const HeapWord* end = hr->top(); 987 while (curr < end) { 988 Prefetch::read(curr, interval); 989 oop obj = oop(curr); 990 int size = obj->oop_iterate_size(&cl); 991 assert(size == obj->size(), "sanity"); 992 curr += size; 993 } 994 } 995 996 class G1CMRootRegionScanTask : public AbstractGangTask { 997 private: 998 G1ConcurrentMark* _cm; 999 1000 public: 1001 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 1002 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 1003 1004 void work(uint worker_id) { 1005 assert(Thread::current()->is_ConcurrentGC_thread(), 1006 "this should only be done by a conc GC thread"); 1007 1008 G1CMRootRegions* root_regions = _cm->root_regions(); 1009 HeapRegion* hr = root_regions->claim_next(); 1010 while (hr != NULL) { 1011 _cm->scanRootRegion(hr); 1012 hr = root_regions->claim_next(); 1013 } 1014 } 1015 }; 1016 1017 void G1ConcurrentMark::scan_root_regions() { 1018 // scan_in_progress() will have been set to true only if there was 1019 // at least one root region to scan. So, if it's false, we 1020 // should not attempt to do any further work. 1021 if (root_regions()->scan_in_progress()) { 1022 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 1023 1024 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 1025 // We distribute work on a per-region basis, so starting 1026 // more threads than that is useless. 1027 root_regions()->num_root_regions()); 1028 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1029 "Maximum number of marking threads exceeded"); 1030 1031 G1CMRootRegionScanTask task(this); 1032 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 1033 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 1034 _parallel_workers->run_task(&task, _parallel_marking_threads); 1035 1036 // It's possible that has_aborted() is true here without actually 1037 // aborting the survivor scan earlier. This is OK as it's 1038 // mainly used for sanity checking. 1039 root_regions()->scan_finished(); 1040 } 1041 } 1042 1043 void G1ConcurrentMark::concurrent_cycle_start() { 1044 _gc_timer_cm->register_gc_start(); 1045 1046 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 1047 1048 _g1h->trace_heap_before_gc(_gc_tracer_cm); 1049 } 1050 1051 void G1ConcurrentMark::concurrent_cycle_end() { 1052 _g1h->trace_heap_after_gc(_gc_tracer_cm); 1053 1054 if (has_aborted()) { 1055 _gc_tracer_cm->report_concurrent_mode_failure(); 1056 } 1057 1058 _gc_timer_cm->register_gc_end(); 1059 1060 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 1061 } 1062 1063 void G1ConcurrentMark::mark_from_roots() { 1064 // we might be tempted to assert that: 1065 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1066 // "inconsistent argument?"); 1067 // However that wouldn't be right, because it's possible that 1068 // a safepoint is indeed in progress as a younger generation 1069 // stop-the-world GC happens even as we mark in this generation. 1070 1071 _restart_for_overflow = false; 1072 1073 // _g1h has _n_par_threads 1074 _parallel_marking_threads = calc_parallel_marking_threads(); 1075 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1076 "Maximum number of marking threads exceeded"); 1077 1078 uint active_workers = MAX2(1U, parallel_marking_threads()); 1079 assert(active_workers > 0, "Should have been set"); 1080 1081 // Setting active workers is not guaranteed since fewer 1082 // worker threads may currently exist and more may not be 1083 // available. 1084 active_workers = _parallel_workers->update_active_workers(active_workers); 1085 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1086 1087 // Parallel task terminator is set in "set_concurrency_and_phase()" 1088 set_concurrency_and_phase(active_workers, true /* concurrent */); 1089 1090 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1091 _parallel_workers->run_task(&markingTask); 1092 print_stats(); 1093 } 1094 1095 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1096 // world is stopped at this checkpoint 1097 assert(SafepointSynchronize::is_at_safepoint(), 1098 "world should be stopped"); 1099 1100 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1101 1102 // If a full collection has happened, we shouldn't do this. 1103 if (has_aborted()) { 1104 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1105 return; 1106 } 1107 1108 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1109 1110 if (VerifyDuringGC) { 1111 HandleMark hm; // handle scope 1112 g1h->prepare_for_verify(); 1113 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1114 } 1115 g1h->verifier()->check_bitmaps("Remark Start"); 1116 1117 G1Policy* g1p = g1h->g1_policy(); 1118 g1p->record_concurrent_mark_remark_start(); 1119 1120 double start = os::elapsedTime(); 1121 1122 checkpointRootsFinalWork(); 1123 1124 double mark_work_end = os::elapsedTime(); 1125 1126 weakRefsWork(clear_all_soft_refs); 1127 1128 if (has_overflown()) { 1129 // We overflowed. Restart concurrent marking. 1130 _restart_for_overflow = true; 1131 1132 // Verify the heap w.r.t. the previous marking bitmap. 1133 if (VerifyDuringGC) { 1134 HandleMark hm; // handle scope 1135 g1h->prepare_for_verify(); 1136 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1137 } 1138 1139 // Clear the marking state because we will be restarting 1140 // marking due to overflowing the global mark stack. 1141 reset_marking_state(); 1142 } else { 1143 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1144 // We're done with marking. 1145 // This is the end of the marking cycle, we're expected all 1146 // threads to have SATB queues with active set to true. 1147 satb_mq_set.set_active_all_threads(false, /* new active value */ 1148 true /* expected_active */); 1149 1150 if (VerifyDuringGC) { 1151 HandleMark hm; // handle scope 1152 g1h->prepare_for_verify(); 1153 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1154 } 1155 g1h->verifier()->check_bitmaps("Remark End"); 1156 assert(!restart_for_overflow(), "sanity"); 1157 // Completely reset the marking state since marking completed 1158 set_non_marking_state(); 1159 } 1160 1161 // Expand the marking stack, if we have to and if we can. 1162 if (_global_mark_stack.should_expand()) { 1163 _global_mark_stack.expand(); 1164 } 1165 1166 // Statistics 1167 double now = os::elapsedTime(); 1168 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1169 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1170 _remark_times.add((now - start) * 1000.0); 1171 1172 g1p->record_concurrent_mark_remark_end(); 1173 1174 G1CMIsAliveClosure is_alive(g1h); 1175 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1176 } 1177 1178 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1179 G1CollectedHeap* _g1; 1180 size_t _freed_bytes; 1181 FreeRegionList* _local_cleanup_list; 1182 uint _old_regions_removed; 1183 uint _humongous_regions_removed; 1184 HRRSCleanupTask* _hrrs_cleanup_task; 1185 1186 public: 1187 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1188 FreeRegionList* local_cleanup_list, 1189 HRRSCleanupTask* hrrs_cleanup_task) : 1190 _g1(g1), 1191 _freed_bytes(0), 1192 _local_cleanup_list(local_cleanup_list), 1193 _old_regions_removed(0), 1194 _humongous_regions_removed(0), 1195 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1196 1197 size_t freed_bytes() { return _freed_bytes; } 1198 const uint old_regions_removed() { return _old_regions_removed; } 1199 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1200 1201 bool doHeapRegion(HeapRegion *hr) { 1202 if (hr->is_archive()) { 1203 return false; 1204 } 1205 _g1->reset_gc_time_stamps(hr); 1206 hr->note_end_of_marking(); 1207 1208 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1209 _freed_bytes += hr->used(); 1210 hr->set_containing_set(NULL); 1211 if (hr->is_humongous()) { 1212 _humongous_regions_removed++; 1213 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1214 } else { 1215 _old_regions_removed++; 1216 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1217 } 1218 } else { 1219 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1220 } 1221 1222 return false; 1223 } 1224 }; 1225 1226 class G1ParNoteEndTask: public AbstractGangTask { 1227 friend class G1NoteEndOfConcMarkClosure; 1228 1229 protected: 1230 G1CollectedHeap* _g1h; 1231 FreeRegionList* _cleanup_list; 1232 HeapRegionClaimer _hrclaimer; 1233 1234 public: 1235 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1236 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1237 } 1238 1239 void work(uint worker_id) { 1240 FreeRegionList local_cleanup_list("Local Cleanup List"); 1241 HRRSCleanupTask hrrs_cleanup_task; 1242 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1243 &hrrs_cleanup_task); 1244 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1245 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1246 1247 // Now update the lists 1248 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1249 { 1250 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1251 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1252 1253 // If we iterate over the global cleanup list at the end of 1254 // cleanup to do this printing we will not guarantee to only 1255 // generate output for the newly-reclaimed regions (the list 1256 // might not be empty at the beginning of cleanup; we might 1257 // still be working on its previous contents). So we do the 1258 // printing here, before we append the new regions to the global 1259 // cleanup list. 1260 1261 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1262 if (hr_printer->is_active()) { 1263 FreeRegionListIterator iter(&local_cleanup_list); 1264 while (iter.more_available()) { 1265 HeapRegion* hr = iter.get_next(); 1266 hr_printer->cleanup(hr); 1267 } 1268 } 1269 1270 _cleanup_list->add_ordered(&local_cleanup_list); 1271 assert(local_cleanup_list.is_empty(), "post-condition"); 1272 1273 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1274 } 1275 } 1276 }; 1277 1278 void G1ConcurrentMark::cleanup() { 1279 // world is stopped at this checkpoint 1280 assert(SafepointSynchronize::is_at_safepoint(), 1281 "world should be stopped"); 1282 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1283 1284 // If a full collection has happened, we shouldn't do this. 1285 if (has_aborted()) { 1286 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1287 return; 1288 } 1289 1290 g1h->verifier()->verify_region_sets_optional(); 1291 1292 if (VerifyDuringGC) { 1293 HandleMark hm; // handle scope 1294 g1h->prepare_for_verify(); 1295 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1296 } 1297 g1h->verifier()->check_bitmaps("Cleanup Start"); 1298 1299 G1Policy* g1p = g1h->g1_policy(); 1300 g1p->record_concurrent_mark_cleanup_start(); 1301 1302 double start = os::elapsedTime(); 1303 1304 HeapRegionRemSet::reset_for_cleanup_tasks(); 1305 1306 { 1307 GCTraceTime(Debug, gc)("Finalize Live Data"); 1308 finalize_live_data(); 1309 } 1310 1311 if (VerifyDuringGC) { 1312 GCTraceTime(Debug, gc)("Verify Live Data"); 1313 verify_live_data(); 1314 } 1315 1316 g1h->collector_state()->set_mark_in_progress(false); 1317 1318 double count_end = os::elapsedTime(); 1319 double this_final_counting_time = (count_end - start); 1320 _total_counting_time += this_final_counting_time; 1321 1322 if (log_is_enabled(Trace, gc, liveness)) { 1323 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1324 _g1h->heap_region_iterate(&cl); 1325 } 1326 1327 // Install newly created mark bitMap as "prev". 1328 swapMarkBitMaps(); 1329 1330 g1h->reset_gc_time_stamp(); 1331 1332 uint n_workers = _g1h->workers()->active_workers(); 1333 1334 // Note end of marking in all heap regions. 1335 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1336 g1h->workers()->run_task(&g1_par_note_end_task); 1337 g1h->check_gc_time_stamps(); 1338 1339 if (!cleanup_list_is_empty()) { 1340 // The cleanup list is not empty, so we'll have to process it 1341 // concurrently. Notify anyone else that might be wanting free 1342 // regions that there will be more free regions coming soon. 1343 g1h->set_free_regions_coming(); 1344 } 1345 1346 // call below, since it affects the metric by which we sort the heap 1347 // regions. 1348 if (G1ScrubRemSets) { 1349 double rs_scrub_start = os::elapsedTime(); 1350 g1h->scrub_rem_set(); 1351 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1352 } 1353 1354 // this will also free any regions totally full of garbage objects, 1355 // and sort the regions. 1356 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1357 1358 // Statistics. 1359 double end = os::elapsedTime(); 1360 _cleanup_times.add((end - start) * 1000.0); 1361 1362 // Clean up will have freed any regions completely full of garbage. 1363 // Update the soft reference policy with the new heap occupancy. 1364 Universe::update_heap_info_at_gc(); 1365 1366 if (VerifyDuringGC) { 1367 HandleMark hm; // handle scope 1368 g1h->prepare_for_verify(); 1369 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1370 } 1371 1372 g1h->verifier()->check_bitmaps("Cleanup End"); 1373 1374 g1h->verifier()->verify_region_sets_optional(); 1375 1376 // We need to make this be a "collection" so any collection pause that 1377 // races with it goes around and waits for completeCleanup to finish. 1378 g1h->increment_total_collections(); 1379 1380 // Clean out dead classes and update Metaspace sizes. 1381 if (ClassUnloadingWithConcurrentMark) { 1382 ClassLoaderDataGraph::purge(); 1383 } 1384 MetaspaceGC::compute_new_size(); 1385 1386 // We reclaimed old regions so we should calculate the sizes to make 1387 // sure we update the old gen/space data. 1388 g1h->g1mm()->update_sizes(); 1389 g1h->allocation_context_stats().update_after_mark(); 1390 } 1391 1392 void G1ConcurrentMark::complete_cleanup() { 1393 if (has_aborted()) return; 1394 1395 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1396 1397 _cleanup_list.verify_optional(); 1398 FreeRegionList tmp_free_list("Tmp Free List"); 1399 1400 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1401 "cleanup list has %u entries", 1402 _cleanup_list.length()); 1403 1404 // No one else should be accessing the _cleanup_list at this point, 1405 // so it is not necessary to take any locks 1406 while (!_cleanup_list.is_empty()) { 1407 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1408 assert(hr != NULL, "Got NULL from a non-empty list"); 1409 hr->par_clear(); 1410 tmp_free_list.add_ordered(hr); 1411 1412 // Instead of adding one region at a time to the secondary_free_list, 1413 // we accumulate them in the local list and move them a few at a 1414 // time. This also cuts down on the number of notify_all() calls 1415 // we do during this process. We'll also append the local list when 1416 // _cleanup_list is empty (which means we just removed the last 1417 // region from the _cleanup_list). 1418 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1419 _cleanup_list.is_empty()) { 1420 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1421 "appending %u entries to the secondary_free_list, " 1422 "cleanup list still has %u entries", 1423 tmp_free_list.length(), 1424 _cleanup_list.length()); 1425 1426 { 1427 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1428 g1h->secondary_free_list_add(&tmp_free_list); 1429 SecondaryFreeList_lock->notify_all(); 1430 } 1431 #ifndef PRODUCT 1432 if (G1StressConcRegionFreeing) { 1433 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1434 os::sleep(Thread::current(), (jlong) 1, false); 1435 } 1436 } 1437 #endif 1438 } 1439 } 1440 assert(tmp_free_list.is_empty(), "post-condition"); 1441 } 1442 1443 // Supporting Object and Oop closures for reference discovery 1444 // and processing in during marking 1445 1446 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1447 HeapWord* addr = (HeapWord*)obj; 1448 return addr != NULL && 1449 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1450 } 1451 1452 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1453 // Uses the G1CMTask associated with a worker thread (for serial reference 1454 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1455 // trace referent objects. 1456 // 1457 // Using the G1CMTask and embedded local queues avoids having the worker 1458 // threads operating on the global mark stack. This reduces the risk 1459 // of overflowing the stack - which we would rather avoid at this late 1460 // state. Also using the tasks' local queues removes the potential 1461 // of the workers interfering with each other that could occur if 1462 // operating on the global stack. 1463 1464 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1465 G1ConcurrentMark* _cm; 1466 G1CMTask* _task; 1467 int _ref_counter_limit; 1468 int _ref_counter; 1469 bool _is_serial; 1470 public: 1471 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1472 _cm(cm), _task(task), _is_serial(is_serial), 1473 _ref_counter_limit(G1RefProcDrainInterval) { 1474 assert(_ref_counter_limit > 0, "sanity"); 1475 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1476 _ref_counter = _ref_counter_limit; 1477 } 1478 1479 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1480 virtual void do_oop( oop* p) { do_oop_work(p); } 1481 1482 template <class T> void do_oop_work(T* p) { 1483 if (!_cm->has_overflown()) { 1484 oop obj = oopDesc::load_decode_heap_oop(p); 1485 _task->deal_with_reference(obj); 1486 _ref_counter--; 1487 1488 if (_ref_counter == 0) { 1489 // We have dealt with _ref_counter_limit references, pushing them 1490 // and objects reachable from them on to the local stack (and 1491 // possibly the global stack). Call G1CMTask::do_marking_step() to 1492 // process these entries. 1493 // 1494 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1495 // there's nothing more to do (i.e. we're done with the entries that 1496 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1497 // above) or we overflow. 1498 // 1499 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1500 // flag while there may still be some work to do. (See the comment at 1501 // the beginning of G1CMTask::do_marking_step() for those conditions - 1502 // one of which is reaching the specified time target.) It is only 1503 // when G1CMTask::do_marking_step() returns without setting the 1504 // has_aborted() flag that the marking step has completed. 1505 do { 1506 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1507 _task->do_marking_step(mark_step_duration_ms, 1508 false /* do_termination */, 1509 _is_serial); 1510 } while (_task->has_aborted() && !_cm->has_overflown()); 1511 _ref_counter = _ref_counter_limit; 1512 } 1513 } 1514 } 1515 }; 1516 1517 // 'Drain' oop closure used by both serial and parallel reference processing. 1518 // Uses the G1CMTask associated with a given worker thread (for serial 1519 // reference processing the G1CMtask for worker 0 is used). Calls the 1520 // do_marking_step routine, with an unbelievably large timeout value, 1521 // to drain the marking data structures of the remaining entries 1522 // added by the 'keep alive' oop closure above. 1523 1524 class G1CMDrainMarkingStackClosure: public VoidClosure { 1525 G1ConcurrentMark* _cm; 1526 G1CMTask* _task; 1527 bool _is_serial; 1528 public: 1529 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1530 _cm(cm), _task(task), _is_serial(is_serial) { 1531 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1532 } 1533 1534 void do_void() { 1535 do { 1536 // We call G1CMTask::do_marking_step() to completely drain the local 1537 // and global marking stacks of entries pushed by the 'keep alive' 1538 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1539 // 1540 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1541 // if there's nothing more to do (i.e. we've completely drained the 1542 // entries that were pushed as a a result of applying the 'keep alive' 1543 // closure to the entries on the discovered ref lists) or we overflow 1544 // the global marking stack. 1545 // 1546 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1547 // flag while there may still be some work to do. (See the comment at 1548 // the beginning of G1CMTask::do_marking_step() for those conditions - 1549 // one of which is reaching the specified time target.) It is only 1550 // when G1CMTask::do_marking_step() returns without setting the 1551 // has_aborted() flag that the marking step has completed. 1552 1553 _task->do_marking_step(1000000000.0 /* something very large */, 1554 true /* do_termination */, 1555 _is_serial); 1556 } while (_task->has_aborted() && !_cm->has_overflown()); 1557 } 1558 }; 1559 1560 // Implementation of AbstractRefProcTaskExecutor for parallel 1561 // reference processing at the end of G1 concurrent marking 1562 1563 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1564 private: 1565 G1CollectedHeap* _g1h; 1566 G1ConcurrentMark* _cm; 1567 WorkGang* _workers; 1568 uint _active_workers; 1569 1570 public: 1571 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1572 G1ConcurrentMark* cm, 1573 WorkGang* workers, 1574 uint n_workers) : 1575 _g1h(g1h), _cm(cm), 1576 _workers(workers), _active_workers(n_workers) { } 1577 1578 // Executes the given task using concurrent marking worker threads. 1579 virtual void execute(ProcessTask& task); 1580 virtual void execute(EnqueueTask& task); 1581 }; 1582 1583 class G1CMRefProcTaskProxy: public AbstractGangTask { 1584 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1585 ProcessTask& _proc_task; 1586 G1CollectedHeap* _g1h; 1587 G1ConcurrentMark* _cm; 1588 1589 public: 1590 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1591 G1CollectedHeap* g1h, 1592 G1ConcurrentMark* cm) : 1593 AbstractGangTask("Process reference objects in parallel"), 1594 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1595 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1596 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1597 } 1598 1599 virtual void work(uint worker_id) { 1600 ResourceMark rm; 1601 HandleMark hm; 1602 G1CMTask* task = _cm->task(worker_id); 1603 G1CMIsAliveClosure g1_is_alive(_g1h); 1604 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1605 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1606 1607 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1608 } 1609 }; 1610 1611 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1612 assert(_workers != NULL, "Need parallel worker threads."); 1613 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1614 1615 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1616 1617 // We need to reset the concurrency level before each 1618 // proxy task execution, so that the termination protocol 1619 // and overflow handling in G1CMTask::do_marking_step() knows 1620 // how many workers to wait for. 1621 _cm->set_concurrency(_active_workers); 1622 _workers->run_task(&proc_task_proxy); 1623 } 1624 1625 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1626 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1627 EnqueueTask& _enq_task; 1628 1629 public: 1630 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1631 AbstractGangTask("Enqueue reference objects in parallel"), 1632 _enq_task(enq_task) { } 1633 1634 virtual void work(uint worker_id) { 1635 _enq_task.work(worker_id); 1636 } 1637 }; 1638 1639 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1640 assert(_workers != NULL, "Need parallel worker threads."); 1641 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1642 1643 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1644 1645 // Not strictly necessary but... 1646 // 1647 // We need to reset the concurrency level before each 1648 // proxy task execution, so that the termination protocol 1649 // and overflow handling in G1CMTask::do_marking_step() knows 1650 // how many workers to wait for. 1651 _cm->set_concurrency(_active_workers); 1652 _workers->run_task(&enq_task_proxy); 1653 } 1654 1655 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 1656 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 1657 } 1658 1659 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1660 if (has_overflown()) { 1661 // Skip processing the discovered references if we have 1662 // overflown the global marking stack. Reference objects 1663 // only get discovered once so it is OK to not 1664 // de-populate the discovered reference lists. We could have, 1665 // but the only benefit would be that, when marking restarts, 1666 // less reference objects are discovered. 1667 return; 1668 } 1669 1670 ResourceMark rm; 1671 HandleMark hm; 1672 1673 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1674 1675 // Is alive closure. 1676 G1CMIsAliveClosure g1_is_alive(g1h); 1677 1678 // Inner scope to exclude the cleaning of the string and symbol 1679 // tables from the displayed time. 1680 { 1681 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1682 1683 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1684 1685 // See the comment in G1CollectedHeap::ref_processing_init() 1686 // about how reference processing currently works in G1. 1687 1688 // Set the soft reference policy 1689 rp->setup_policy(clear_all_soft_refs); 1690 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1691 1692 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1693 // in serial reference processing. Note these closures are also 1694 // used for serially processing (by the the current thread) the 1695 // JNI references during parallel reference processing. 1696 // 1697 // These closures do not need to synchronize with the worker 1698 // threads involved in parallel reference processing as these 1699 // instances are executed serially by the current thread (e.g. 1700 // reference processing is not multi-threaded and is thus 1701 // performed by the current thread instead of a gang worker). 1702 // 1703 // The gang tasks involved in parallel reference processing create 1704 // their own instances of these closures, which do their own 1705 // synchronization among themselves. 1706 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1707 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1708 1709 // We need at least one active thread. If reference processing 1710 // is not multi-threaded we use the current (VMThread) thread, 1711 // otherwise we use the work gang from the G1CollectedHeap and 1712 // we utilize all the worker threads we can. 1713 bool processing_is_mt = rp->processing_is_mt(); 1714 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1715 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1716 1717 // Parallel processing task executor. 1718 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1719 g1h->workers(), active_workers); 1720 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1721 1722 // Set the concurrency level. The phase was already set prior to 1723 // executing the remark task. 1724 set_concurrency(active_workers); 1725 1726 // Set the degree of MT processing here. If the discovery was done MT, 1727 // the number of threads involved during discovery could differ from 1728 // the number of active workers. This is OK as long as the discovered 1729 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1730 rp->set_active_mt_degree(active_workers); 1731 1732 // Process the weak references. 1733 const ReferenceProcessorStats& stats = 1734 rp->process_discovered_references(&g1_is_alive, 1735 &g1_keep_alive, 1736 &g1_drain_mark_stack, 1737 executor, 1738 _gc_timer_cm); 1739 _gc_tracer_cm->report_gc_reference_stats(stats); 1740 1741 // The do_oop work routines of the keep_alive and drain_marking_stack 1742 // oop closures will set the has_overflown flag if we overflow the 1743 // global marking stack. 1744 1745 assert(has_overflown() || _global_mark_stack.is_empty(), 1746 "Mark stack should be empty (unless it is out of memory)"); 1747 1748 assert(rp->num_q() == active_workers, "why not"); 1749 1750 rp->enqueue_discovered_references(executor); 1751 1752 rp->verify_no_references_recorded(); 1753 assert(!rp->discovery_enabled(), "Post condition"); 1754 } 1755 1756 if (has_overflown()) { 1757 // We can not trust g1_is_alive if the marking stack overflowed 1758 return; 1759 } 1760 1761 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1762 1763 // Unload Klasses, String, Symbols, Code Cache, etc. 1764 if (ClassUnloadingWithConcurrentMark) { 1765 bool purged_classes; 1766 1767 { 1768 GCTraceTime(Debug, gc, phases) trace("System Dictionary Unloading", _gc_timer_cm); 1769 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 1770 } 1771 1772 { 1773 GCTraceTime(Debug, gc, phases) trace("Parallel Unloading", _gc_timer_cm); 1774 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 1775 } 1776 } 1777 1778 if (G1StringDedup::is_enabled()) { 1779 GCTraceTime(Debug, gc, phases) trace("String Deduplication Unlink", _gc_timer_cm); 1780 G1StringDedup::unlink(&g1_is_alive); 1781 } 1782 } 1783 1784 void G1ConcurrentMark::swapMarkBitMaps() { 1785 G1CMBitMapRO* temp = _prevMarkBitMap; 1786 _prevMarkBitMap = (G1CMBitMapRO*)_nextMarkBitMap; 1787 _nextMarkBitMap = (G1CMBitMap*) temp; 1788 } 1789 1790 // Closure for marking entries in SATB buffers. 1791 class G1CMSATBBufferClosure : public SATBBufferClosure { 1792 private: 1793 G1CMTask* _task; 1794 G1CollectedHeap* _g1h; 1795 1796 // This is very similar to G1CMTask::deal_with_reference, but with 1797 // more relaxed requirements for the argument, so this must be more 1798 // circumspect about treating the argument as an object. 1799 void do_entry(void* entry) const { 1800 _task->increment_refs_reached(); 1801 HeapRegion* hr = _g1h->heap_region_containing(entry); 1802 if (entry < hr->next_top_at_mark_start()) { 1803 // Until we get here, we don't know whether entry refers to a valid 1804 // object; it could instead have been a stale reference. 1805 oop obj = static_cast<oop>(entry); 1806 assert(obj->is_oop(true /* ignore mark word */), 1807 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 1808 _task->make_reference_grey(obj); 1809 } 1810 } 1811 1812 public: 1813 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1814 : _task(task), _g1h(g1h) { } 1815 1816 virtual void do_buffer(void** buffer, size_t size) { 1817 for (size_t i = 0; i < size; ++i) { 1818 do_entry(buffer[i]); 1819 } 1820 } 1821 }; 1822 1823 class G1RemarkThreadsClosure : public ThreadClosure { 1824 G1CMSATBBufferClosure _cm_satb_cl; 1825 G1CMOopClosure _cm_cl; 1826 MarkingCodeBlobClosure _code_cl; 1827 int _thread_parity; 1828 1829 public: 1830 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1831 _cm_satb_cl(task, g1h), 1832 _cm_cl(g1h, g1h->concurrent_mark(), task), 1833 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1834 _thread_parity(Threads::thread_claim_parity()) {} 1835 1836 void do_thread(Thread* thread) { 1837 if (thread->is_Java_thread()) { 1838 if (thread->claim_oops_do(true, _thread_parity)) { 1839 JavaThread* jt = (JavaThread*)thread; 1840 1841 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1842 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1843 // * Alive if on the stack of an executing method 1844 // * Weakly reachable otherwise 1845 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1846 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1847 jt->nmethods_do(&_code_cl); 1848 1849 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1850 } 1851 } else if (thread->is_VM_thread()) { 1852 if (thread->claim_oops_do(true, _thread_parity)) { 1853 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1854 } 1855 } 1856 } 1857 }; 1858 1859 class G1CMRemarkTask: public AbstractGangTask { 1860 private: 1861 G1ConcurrentMark* _cm; 1862 public: 1863 void work(uint worker_id) { 1864 // Since all available tasks are actually started, we should 1865 // only proceed if we're supposed to be active. 1866 if (worker_id < _cm->active_tasks()) { 1867 G1CMTask* task = _cm->task(worker_id); 1868 task->record_start_time(); 1869 { 1870 ResourceMark rm; 1871 HandleMark hm; 1872 1873 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1874 Threads::threads_do(&threads_f); 1875 } 1876 1877 do { 1878 task->do_marking_step(1000000000.0 /* something very large */, 1879 true /* do_termination */, 1880 false /* is_serial */); 1881 } while (task->has_aborted() && !_cm->has_overflown()); 1882 // If we overflow, then we do not want to restart. We instead 1883 // want to abort remark and do concurrent marking again. 1884 task->record_end_time(); 1885 } 1886 } 1887 1888 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1889 AbstractGangTask("Par Remark"), _cm(cm) { 1890 _cm->terminator()->reset_for_reuse(active_workers); 1891 } 1892 }; 1893 1894 void G1ConcurrentMark::checkpointRootsFinalWork() { 1895 ResourceMark rm; 1896 HandleMark hm; 1897 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1898 1899 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1900 1901 g1h->ensure_parsability(false); 1902 1903 // this is remark, so we'll use up all active threads 1904 uint active_workers = g1h->workers()->active_workers(); 1905 set_concurrency_and_phase(active_workers, false /* concurrent */); 1906 // Leave _parallel_marking_threads at it's 1907 // value originally calculated in the G1ConcurrentMark 1908 // constructor and pass values of the active workers 1909 // through the gang in the task. 1910 1911 { 1912 StrongRootsScope srs(active_workers); 1913 1914 G1CMRemarkTask remarkTask(this, active_workers); 1915 // We will start all available threads, even if we decide that the 1916 // active_workers will be fewer. The extra ones will just bail out 1917 // immediately. 1918 g1h->workers()->run_task(&remarkTask); 1919 } 1920 1921 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1922 guarantee(has_overflown() || 1923 satb_mq_set.completed_buffers_num() == 0, 1924 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1925 BOOL_TO_STR(has_overflown()), 1926 satb_mq_set.completed_buffers_num()); 1927 1928 print_stats(); 1929 } 1930 1931 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1932 // Note we are overriding the read-only view of the prev map here, via 1933 // the cast. 1934 ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr); 1935 } 1936 1937 HeapRegion* 1938 G1ConcurrentMark::claim_region(uint worker_id) { 1939 // "checkpoint" the finger 1940 HeapWord* finger = _finger; 1941 1942 // _heap_end will not change underneath our feet; it only changes at 1943 // yield points. 1944 while (finger < _heap_end) { 1945 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1946 1947 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1948 // Make sure that the reads below do not float before loading curr_region. 1949 OrderAccess::loadload(); 1950 // Above heap_region_containing may return NULL as we always scan claim 1951 // until the end of the heap. In this case, just jump to the next region. 1952 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1953 1954 // Is the gap between reading the finger and doing the CAS too long? 1955 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1956 if (res == finger && curr_region != NULL) { 1957 // we succeeded 1958 HeapWord* bottom = curr_region->bottom(); 1959 HeapWord* limit = curr_region->next_top_at_mark_start(); 1960 1961 // notice that _finger == end cannot be guaranteed here since, 1962 // someone else might have moved the finger even further 1963 assert(_finger >= end, "the finger should have moved forward"); 1964 1965 if (limit > bottom) { 1966 return curr_region; 1967 } else { 1968 assert(limit == bottom, 1969 "the region limit should be at bottom"); 1970 // we return NULL and the caller should try calling 1971 // claim_region() again. 1972 return NULL; 1973 } 1974 } else { 1975 assert(_finger > finger, "the finger should have moved forward"); 1976 // read it again 1977 finger = _finger; 1978 } 1979 } 1980 1981 return NULL; 1982 } 1983 1984 #ifndef PRODUCT 1985 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1986 private: 1987 G1CollectedHeap* _g1h; 1988 const char* _phase; 1989 int _info; 1990 1991 public: 1992 VerifyNoCSetOops(const char* phase, int info = -1) : 1993 _g1h(G1CollectedHeap::heap()), 1994 _phase(phase), 1995 _info(info) 1996 { } 1997 1998 void operator()(oop obj) const { 1999 guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(), 2000 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2001 p2i(obj), _phase, _info); 2002 guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_in_cset(obj), 2003 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2004 p2i(obj), _phase, _info); 2005 } 2006 }; 2007 2008 void G1ConcurrentMark::verify_no_cset_oops() { 2009 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2010 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2011 return; 2012 } 2013 2014 // Verify entries on the global mark stack 2015 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 2016 2017 // Verify entries on the task queues 2018 for (uint i = 0; i < _max_worker_id; ++i) { 2019 G1CMTaskQueue* queue = _task_queues->queue(i); 2020 queue->iterate(VerifyNoCSetOops("Queue", i)); 2021 } 2022 2023 // Verify the global finger 2024 HeapWord* global_finger = finger(); 2025 if (global_finger != NULL && global_finger < _heap_end) { 2026 // Since we always iterate over all regions, we might get a NULL HeapRegion 2027 // here. 2028 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2029 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2030 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2031 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2032 } 2033 2034 // Verify the task fingers 2035 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2036 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2037 G1CMTask* task = _tasks[i]; 2038 HeapWord* task_finger = task->finger(); 2039 if (task_finger != NULL && task_finger < _heap_end) { 2040 // See above note on the global finger verification. 2041 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2042 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2043 !task_hr->in_collection_set(), 2044 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2045 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2046 } 2047 } 2048 } 2049 #endif // PRODUCT 2050 void G1ConcurrentMark::create_live_data() { 2051 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 2052 } 2053 2054 void G1ConcurrentMark::finalize_live_data() { 2055 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 2056 } 2057 2058 void G1ConcurrentMark::verify_live_data() { 2059 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 2060 } 2061 2062 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 2063 _g1h->g1_rem_set()->clear_card_live_data(workers); 2064 } 2065 2066 #ifdef ASSERT 2067 void G1ConcurrentMark::verify_live_data_clear() { 2068 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 2069 } 2070 #endif 2071 2072 void G1ConcurrentMark::print_stats() { 2073 if (!log_is_enabled(Debug, gc, stats)) { 2074 return; 2075 } 2076 log_debug(gc, stats)("---------------------------------------------------------------------"); 2077 for (size_t i = 0; i < _active_tasks; ++i) { 2078 _tasks[i]->print_stats(); 2079 log_debug(gc, stats)("---------------------------------------------------------------------"); 2080 } 2081 } 2082 2083 void G1ConcurrentMark::abort() { 2084 if (!cmThread()->during_cycle() || _has_aborted) { 2085 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2086 return; 2087 } 2088 2089 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2090 // concurrent bitmap clearing. 2091 { 2092 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2093 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2094 } 2095 // Note we cannot clear the previous marking bitmap here 2096 // since VerifyDuringGC verifies the objects marked during 2097 // a full GC against the previous bitmap. 2098 2099 { 2100 GCTraceTime(Debug, gc)("Clear Live Data"); 2101 clear_live_data(_g1h->workers()); 2102 } 2103 DEBUG_ONLY({ 2104 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2105 verify_live_data_clear(); 2106 }) 2107 // Empty mark stack 2108 reset_marking_state(); 2109 for (uint i = 0; i < _max_worker_id; ++i) { 2110 _tasks[i]->clear_region_fields(); 2111 } 2112 _first_overflow_barrier_sync.abort(); 2113 _second_overflow_barrier_sync.abort(); 2114 _has_aborted = true; 2115 2116 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2117 satb_mq_set.abandon_partial_marking(); 2118 // This can be called either during or outside marking, we'll read 2119 // the expected_active value from the SATB queue set. 2120 satb_mq_set.set_active_all_threads( 2121 false, /* new active value */ 2122 satb_mq_set.is_active() /* expected_active */); 2123 } 2124 2125 static void print_ms_time_info(const char* prefix, const char* name, 2126 NumberSeq& ns) { 2127 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2128 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2129 if (ns.num() > 0) { 2130 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2131 prefix, ns.sd(), ns.maximum()); 2132 } 2133 } 2134 2135 void G1ConcurrentMark::print_summary_info() { 2136 Log(gc, marking) log; 2137 if (!log.is_trace()) { 2138 return; 2139 } 2140 2141 log.trace(" Concurrent marking:"); 2142 print_ms_time_info(" ", "init marks", _init_times); 2143 print_ms_time_info(" ", "remarks", _remark_times); 2144 { 2145 print_ms_time_info(" ", "final marks", _remark_mark_times); 2146 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2147 2148 } 2149 print_ms_time_info(" ", "cleanups", _cleanup_times); 2150 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2151 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2152 if (G1ScrubRemSets) { 2153 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2154 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2155 } 2156 log.trace(" Total stop_world time = %8.2f s.", 2157 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2158 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2159 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2160 } 2161 2162 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2163 _parallel_workers->print_worker_threads_on(st); 2164 } 2165 2166 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2167 _parallel_workers->threads_do(tc); 2168 } 2169 2170 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2171 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2172 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2173 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2174 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2175 } 2176 2177 // Closure for iteration over bitmaps 2178 class G1CMBitMapClosure : public BitMapClosure { 2179 private: 2180 // the bitmap that is being iterated over 2181 G1CMBitMap* _nextMarkBitMap; 2182 G1ConcurrentMark* _cm; 2183 G1CMTask* _task; 2184 2185 public: 2186 G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) : 2187 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2188 2189 bool do_bit(size_t offset) { 2190 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2191 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2192 assert( addr < _cm->finger(), "invariant"); 2193 assert(addr >= _task->finger(), "invariant"); 2194 2195 // We move that task's local finger along. 2196 _task->move_finger_to(addr); 2197 2198 _task->scan_object(oop(addr)); 2199 // we only partially drain the local queue and global stack 2200 _task->drain_local_queue(true); 2201 _task->drain_global_stack(true); 2202 2203 // if the has_aborted flag has been raised, we need to bail out of 2204 // the iteration 2205 return !_task->has_aborted(); 2206 } 2207 }; 2208 2209 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2210 ReferenceProcessor* result = g1h->ref_processor_cm(); 2211 assert(result != NULL, "CM reference processor should not be NULL"); 2212 return result; 2213 } 2214 2215 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2216 G1ConcurrentMark* cm, 2217 G1CMTask* task) 2218 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2219 _g1h(g1h), _cm(cm), _task(task) 2220 { } 2221 2222 void G1CMTask::setup_for_region(HeapRegion* hr) { 2223 assert(hr != NULL, 2224 "claim_region() should have filtered out NULL regions"); 2225 _curr_region = hr; 2226 _finger = hr->bottom(); 2227 update_region_limit(); 2228 } 2229 2230 void G1CMTask::update_region_limit() { 2231 HeapRegion* hr = _curr_region; 2232 HeapWord* bottom = hr->bottom(); 2233 HeapWord* limit = hr->next_top_at_mark_start(); 2234 2235 if (limit == bottom) { 2236 // The region was collected underneath our feet. 2237 // We set the finger to bottom to ensure that the bitmap 2238 // iteration that will follow this will not do anything. 2239 // (this is not a condition that holds when we set the region up, 2240 // as the region is not supposed to be empty in the first place) 2241 _finger = bottom; 2242 } else if (limit >= _region_limit) { 2243 assert(limit >= _finger, "peace of mind"); 2244 } else { 2245 assert(limit < _region_limit, "only way to get here"); 2246 // This can happen under some pretty unusual circumstances. An 2247 // evacuation pause empties the region underneath our feet (NTAMS 2248 // at bottom). We then do some allocation in the region (NTAMS 2249 // stays at bottom), followed by the region being used as a GC 2250 // alloc region (NTAMS will move to top() and the objects 2251 // originally below it will be grayed). All objects now marked in 2252 // the region are explicitly grayed, if below the global finger, 2253 // and we do not need in fact to scan anything else. So, we simply 2254 // set _finger to be limit to ensure that the bitmap iteration 2255 // doesn't do anything. 2256 _finger = limit; 2257 } 2258 2259 _region_limit = limit; 2260 } 2261 2262 void G1CMTask::giveup_current_region() { 2263 assert(_curr_region != NULL, "invariant"); 2264 clear_region_fields(); 2265 } 2266 2267 void G1CMTask::clear_region_fields() { 2268 // Values for these three fields that indicate that we're not 2269 // holding on to a region. 2270 _curr_region = NULL; 2271 _finger = NULL; 2272 _region_limit = NULL; 2273 } 2274 2275 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2276 if (cm_oop_closure == NULL) { 2277 assert(_cm_oop_closure != NULL, "invariant"); 2278 } else { 2279 assert(_cm_oop_closure == NULL, "invariant"); 2280 } 2281 _cm_oop_closure = cm_oop_closure; 2282 } 2283 2284 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2285 guarantee(nextMarkBitMap != NULL, "invariant"); 2286 _nextMarkBitMap = nextMarkBitMap; 2287 clear_region_fields(); 2288 2289 _calls = 0; 2290 _elapsed_time_ms = 0.0; 2291 _termination_time_ms = 0.0; 2292 _termination_start_time_ms = 0.0; 2293 } 2294 2295 bool G1CMTask::should_exit_termination() { 2296 regular_clock_call(); 2297 // This is called when we are in the termination protocol. We should 2298 // quit if, for some reason, this task wants to abort or the global 2299 // stack is not empty (this means that we can get work from it). 2300 return !_cm->mark_stack_empty() || has_aborted(); 2301 } 2302 2303 void G1CMTask::reached_limit() { 2304 assert(_words_scanned >= _words_scanned_limit || 2305 _refs_reached >= _refs_reached_limit , 2306 "shouldn't have been called otherwise"); 2307 regular_clock_call(); 2308 } 2309 2310 void G1CMTask::regular_clock_call() { 2311 if (has_aborted()) return; 2312 2313 // First, we need to recalculate the words scanned and refs reached 2314 // limits for the next clock call. 2315 recalculate_limits(); 2316 2317 // During the regular clock call we do the following 2318 2319 // (1) If an overflow has been flagged, then we abort. 2320 if (_cm->has_overflown()) { 2321 set_has_aborted(); 2322 return; 2323 } 2324 2325 // If we are not concurrent (i.e. we're doing remark) we don't need 2326 // to check anything else. The other steps are only needed during 2327 // the concurrent marking phase. 2328 if (!concurrent()) return; 2329 2330 // (2) If marking has been aborted for Full GC, then we also abort. 2331 if (_cm->has_aborted()) { 2332 set_has_aborted(); 2333 return; 2334 } 2335 2336 double curr_time_ms = os::elapsedVTime() * 1000.0; 2337 2338 // (4) We check whether we should yield. If we have to, then we abort. 2339 if (SuspendibleThreadSet::should_yield()) { 2340 // We should yield. To do this we abort the task. The caller is 2341 // responsible for yielding. 2342 set_has_aborted(); 2343 return; 2344 } 2345 2346 // (5) We check whether we've reached our time quota. If we have, 2347 // then we abort. 2348 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2349 if (elapsed_time_ms > _time_target_ms) { 2350 set_has_aborted(); 2351 _has_timed_out = true; 2352 return; 2353 } 2354 2355 // (6) Finally, we check whether there are enough completed STAB 2356 // buffers available for processing. If there are, we abort. 2357 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2358 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2359 // we do need to process SATB buffers, we'll abort and restart 2360 // the marking task to do so 2361 set_has_aborted(); 2362 return; 2363 } 2364 } 2365 2366 void G1CMTask::recalculate_limits() { 2367 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2368 _words_scanned_limit = _real_words_scanned_limit; 2369 2370 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2371 _refs_reached_limit = _real_refs_reached_limit; 2372 } 2373 2374 void G1CMTask::decrease_limits() { 2375 // This is called when we believe that we're going to do an infrequent 2376 // operation which will increase the per byte scanned cost (i.e. move 2377 // entries to/from the global stack). It basically tries to decrease the 2378 // scanning limit so that the clock is called earlier. 2379 2380 _words_scanned_limit = _real_words_scanned_limit - 2381 3 * words_scanned_period / 4; 2382 _refs_reached_limit = _real_refs_reached_limit - 2383 3 * refs_reached_period / 4; 2384 } 2385 2386 void G1CMTask::move_entries_to_global_stack() { 2387 // Local array where we'll store the entries that will be popped 2388 // from the local queue. 2389 oop buffer[G1CMMarkStack::OopsPerChunk]; 2390 2391 size_t n = 0; 2392 oop obj; 2393 while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) { 2394 buffer[n] = obj; 2395 ++n; 2396 } 2397 if (n < G1CMMarkStack::OopsPerChunk) { 2398 buffer[n] = NULL; 2399 } 2400 2401 if (n > 0) { 2402 if (!_cm->mark_stack_push(buffer)) { 2403 set_has_aborted(); 2404 } 2405 } 2406 2407 // This operation was quite expensive, so decrease the limits. 2408 decrease_limits(); 2409 } 2410 2411 bool G1CMTask::get_entries_from_global_stack() { 2412 // Local array where we'll store the entries that will be popped 2413 // from the global stack. 2414 oop buffer[G1CMMarkStack::OopsPerChunk]; 2415 2416 if (!_cm->mark_stack_pop(buffer)) { 2417 return false; 2418 } 2419 2420 // We did actually pop at least one entry. 2421 for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) { 2422 oop elem = buffer[i]; 2423 if (elem == NULL) { 2424 break; 2425 } 2426 assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem)); 2427 bool success = _task_queue->push(elem); 2428 // We only call this when the local queue is empty or under a 2429 // given target limit. So, we do not expect this push to fail. 2430 assert(success, "invariant"); 2431 } 2432 2433 // This operation was quite expensive, so decrease the limits 2434 decrease_limits(); 2435 return true; 2436 } 2437 2438 void G1CMTask::drain_local_queue(bool partially) { 2439 if (has_aborted()) { 2440 return; 2441 } 2442 2443 // Decide what the target size is, depending whether we're going to 2444 // drain it partially (so that other tasks can steal if they run out 2445 // of things to do) or totally (at the very end). 2446 size_t target_size; 2447 if (partially) { 2448 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2449 } else { 2450 target_size = 0; 2451 } 2452 2453 if (_task_queue->size() > target_size) { 2454 oop obj; 2455 bool ret = _task_queue->pop_local(obj); 2456 while (ret) { 2457 scan_object(obj); 2458 if (_task_queue->size() <= target_size || has_aborted()) { 2459 ret = false; 2460 } else { 2461 ret = _task_queue->pop_local(obj); 2462 } 2463 } 2464 } 2465 } 2466 2467 void G1CMTask::drain_global_stack(bool partially) { 2468 if (has_aborted()) return; 2469 2470 // We have a policy to drain the local queue before we attempt to 2471 // drain the global stack. 2472 assert(partially || _task_queue->size() == 0, "invariant"); 2473 2474 // Decide what the target size is, depending whether we're going to 2475 // drain it partially (so that other tasks can steal if they run out 2476 // of things to do) or totally (at the very end). 2477 // Notice that when draining the global mark stack partially, due to the racyness 2478 // of the mark stack size update we might in fact drop below the target. But, 2479 // this is not a problem. 2480 // In case of total draining, we simply process until the global mark stack is 2481 // totally empty, disregarding the size counter. 2482 if (partially) { 2483 size_t const target_size = _cm->partial_mark_stack_size_target(); 2484 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2485 if (get_entries_from_global_stack()) { 2486 drain_local_queue(partially); 2487 } 2488 } 2489 } else { 2490 while (!has_aborted() && get_entries_from_global_stack()) { 2491 drain_local_queue(partially); 2492 } 2493 } 2494 } 2495 2496 // SATB Queue has several assumptions on whether to call the par or 2497 // non-par versions of the methods. this is why some of the code is 2498 // replicated. We should really get rid of the single-threaded version 2499 // of the code to simplify things. 2500 void G1CMTask::drain_satb_buffers() { 2501 if (has_aborted()) return; 2502 2503 // We set this so that the regular clock knows that we're in the 2504 // middle of draining buffers and doesn't set the abort flag when it 2505 // notices that SATB buffers are available for draining. It'd be 2506 // very counter productive if it did that. :-) 2507 _draining_satb_buffers = true; 2508 2509 G1CMSATBBufferClosure satb_cl(this, _g1h); 2510 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2511 2512 // This keeps claiming and applying the closure to completed buffers 2513 // until we run out of buffers or we need to abort. 2514 while (!has_aborted() && 2515 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2516 regular_clock_call(); 2517 } 2518 2519 _draining_satb_buffers = false; 2520 2521 assert(has_aborted() || 2522 concurrent() || 2523 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2524 2525 // again, this was a potentially expensive operation, decrease the 2526 // limits to get the regular clock call early 2527 decrease_limits(); 2528 } 2529 2530 void G1CMTask::print_stats() { 2531 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2532 _worker_id, _calls); 2533 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2534 _elapsed_time_ms, _termination_time_ms); 2535 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2536 _step_times_ms.num(), _step_times_ms.avg(), 2537 _step_times_ms.sd()); 2538 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2539 _step_times_ms.maximum(), _step_times_ms.sum()); 2540 } 2541 2542 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 2543 return _task_queues->steal(worker_id, hash_seed, obj); 2544 } 2545 2546 /***************************************************************************** 2547 2548 The do_marking_step(time_target_ms, ...) method is the building 2549 block of the parallel marking framework. It can be called in parallel 2550 with other invocations of do_marking_step() on different tasks 2551 (but only one per task, obviously) and concurrently with the 2552 mutator threads, or during remark, hence it eliminates the need 2553 for two versions of the code. When called during remark, it will 2554 pick up from where the task left off during the concurrent marking 2555 phase. Interestingly, tasks are also claimable during evacuation 2556 pauses too, since do_marking_step() ensures that it aborts before 2557 it needs to yield. 2558 2559 The data structures that it uses to do marking work are the 2560 following: 2561 2562 (1) Marking Bitmap. If there are gray objects that appear only 2563 on the bitmap (this happens either when dealing with an overflow 2564 or when the initial marking phase has simply marked the roots 2565 and didn't push them on the stack), then tasks claim heap 2566 regions whose bitmap they then scan to find gray objects. A 2567 global finger indicates where the end of the last claimed region 2568 is. A local finger indicates how far into the region a task has 2569 scanned. The two fingers are used to determine how to gray an 2570 object (i.e. whether simply marking it is OK, as it will be 2571 visited by a task in the future, or whether it needs to be also 2572 pushed on a stack). 2573 2574 (2) Local Queue. The local queue of the task which is accessed 2575 reasonably efficiently by the task. Other tasks can steal from 2576 it when they run out of work. Throughout the marking phase, a 2577 task attempts to keep its local queue short but not totally 2578 empty, so that entries are available for stealing by other 2579 tasks. Only when there is no more work, a task will totally 2580 drain its local queue. 2581 2582 (3) Global Mark Stack. This handles local queue overflow. During 2583 marking only sets of entries are moved between it and the local 2584 queues, as access to it requires a mutex and more fine-grain 2585 interaction with it which might cause contention. If it 2586 overflows, then the marking phase should restart and iterate 2587 over the bitmap to identify gray objects. Throughout the marking 2588 phase, tasks attempt to keep the global mark stack at a small 2589 length but not totally empty, so that entries are available for 2590 popping by other tasks. Only when there is no more work, tasks 2591 will totally drain the global mark stack. 2592 2593 (4) SATB Buffer Queue. This is where completed SATB buffers are 2594 made available. Buffers are regularly removed from this queue 2595 and scanned for roots, so that the queue doesn't get too 2596 long. During remark, all completed buffers are processed, as 2597 well as the filled in parts of any uncompleted buffers. 2598 2599 The do_marking_step() method tries to abort when the time target 2600 has been reached. There are a few other cases when the 2601 do_marking_step() method also aborts: 2602 2603 (1) When the marking phase has been aborted (after a Full GC). 2604 2605 (2) When a global overflow (on the global stack) has been 2606 triggered. Before the task aborts, it will actually sync up with 2607 the other tasks to ensure that all the marking data structures 2608 (local queues, stacks, fingers etc.) are re-initialized so that 2609 when do_marking_step() completes, the marking phase can 2610 immediately restart. 2611 2612 (3) When enough completed SATB buffers are available. The 2613 do_marking_step() method only tries to drain SATB buffers right 2614 at the beginning. So, if enough buffers are available, the 2615 marking step aborts and the SATB buffers are processed at 2616 the beginning of the next invocation. 2617 2618 (4) To yield. when we have to yield then we abort and yield 2619 right at the end of do_marking_step(). This saves us from a lot 2620 of hassle as, by yielding we might allow a Full GC. If this 2621 happens then objects will be compacted underneath our feet, the 2622 heap might shrink, etc. We save checking for this by just 2623 aborting and doing the yield right at the end. 2624 2625 From the above it follows that the do_marking_step() method should 2626 be called in a loop (or, otherwise, regularly) until it completes. 2627 2628 If a marking step completes without its has_aborted() flag being 2629 true, it means it has completed the current marking phase (and 2630 also all other marking tasks have done so and have all synced up). 2631 2632 A method called regular_clock_call() is invoked "regularly" (in 2633 sub ms intervals) throughout marking. It is this clock method that 2634 checks all the abort conditions which were mentioned above and 2635 decides when the task should abort. A work-based scheme is used to 2636 trigger this clock method: when the number of object words the 2637 marking phase has scanned or the number of references the marking 2638 phase has visited reach a given limit. Additional invocations to 2639 the method clock have been planted in a few other strategic places 2640 too. The initial reason for the clock method was to avoid calling 2641 vtime too regularly, as it is quite expensive. So, once it was in 2642 place, it was natural to piggy-back all the other conditions on it 2643 too and not constantly check them throughout the code. 2644 2645 If do_termination is true then do_marking_step will enter its 2646 termination protocol. 2647 2648 The value of is_serial must be true when do_marking_step is being 2649 called serially (i.e. by the VMThread) and do_marking_step should 2650 skip any synchronization in the termination and overflow code. 2651 Examples include the serial remark code and the serial reference 2652 processing closures. 2653 2654 The value of is_serial must be false when do_marking_step is 2655 being called by any of the worker threads in a work gang. 2656 Examples include the concurrent marking code (CMMarkingTask), 2657 the MT remark code, and the MT reference processing closures. 2658 2659 *****************************************************************************/ 2660 2661 void G1CMTask::do_marking_step(double time_target_ms, 2662 bool do_termination, 2663 bool is_serial) { 2664 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2665 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2666 2667 G1Policy* g1_policy = _g1h->g1_policy(); 2668 assert(_task_queues != NULL, "invariant"); 2669 assert(_task_queue != NULL, "invariant"); 2670 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2671 2672 assert(!_claimed, 2673 "only one thread should claim this task at any one time"); 2674 2675 // OK, this doesn't safeguard again all possible scenarios, as it is 2676 // possible for two threads to set the _claimed flag at the same 2677 // time. But it is only for debugging purposes anyway and it will 2678 // catch most problems. 2679 _claimed = true; 2680 2681 _start_time_ms = os::elapsedVTime() * 1000.0; 2682 2683 // If do_stealing is true then do_marking_step will attempt to 2684 // steal work from the other G1CMTasks. It only makes sense to 2685 // enable stealing when the termination protocol is enabled 2686 // and do_marking_step() is not being called serially. 2687 bool do_stealing = do_termination && !is_serial; 2688 2689 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2690 _time_target_ms = time_target_ms - diff_prediction_ms; 2691 2692 // set up the variables that are used in the work-based scheme to 2693 // call the regular clock method 2694 _words_scanned = 0; 2695 _refs_reached = 0; 2696 recalculate_limits(); 2697 2698 // clear all flags 2699 clear_has_aborted(); 2700 _has_timed_out = false; 2701 _draining_satb_buffers = false; 2702 2703 ++_calls; 2704 2705 // Set up the bitmap and oop closures. Anything that uses them is 2706 // eventually called from this method, so it is OK to allocate these 2707 // statically. 2708 G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 2709 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2710 set_cm_oop_closure(&cm_oop_closure); 2711 2712 if (_cm->has_overflown()) { 2713 // This can happen if the mark stack overflows during a GC pause 2714 // and this task, after a yield point, restarts. We have to abort 2715 // as we need to get into the overflow protocol which happens 2716 // right at the end of this task. 2717 set_has_aborted(); 2718 } 2719 2720 // First drain any available SATB buffers. After this, we will not 2721 // look at SATB buffers before the next invocation of this method. 2722 // If enough completed SATB buffers are queued up, the regular clock 2723 // will abort this task so that it restarts. 2724 drain_satb_buffers(); 2725 // ...then partially drain the local queue and the global stack 2726 drain_local_queue(true); 2727 drain_global_stack(true); 2728 2729 do { 2730 if (!has_aborted() && _curr_region != NULL) { 2731 // This means that we're already holding on to a region. 2732 assert(_finger != NULL, "if region is not NULL, then the finger " 2733 "should not be NULL either"); 2734 2735 // We might have restarted this task after an evacuation pause 2736 // which might have evacuated the region we're holding on to 2737 // underneath our feet. Let's read its limit again to make sure 2738 // that we do not iterate over a region of the heap that 2739 // contains garbage (update_region_limit() will also move 2740 // _finger to the start of the region if it is found empty). 2741 update_region_limit(); 2742 // We will start from _finger not from the start of the region, 2743 // as we might be restarting this task after aborting half-way 2744 // through scanning this region. In this case, _finger points to 2745 // the address where we last found a marked object. If this is a 2746 // fresh region, _finger points to start(). 2747 MemRegion mr = MemRegion(_finger, _region_limit); 2748 2749 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2750 "humongous regions should go around loop once only"); 2751 2752 // Some special cases: 2753 // If the memory region is empty, we can just give up the region. 2754 // If the current region is humongous then we only need to check 2755 // the bitmap for the bit associated with the start of the object, 2756 // scan the object if it's live, and give up the region. 2757 // Otherwise, let's iterate over the bitmap of the part of the region 2758 // that is left. 2759 // If the iteration is successful, give up the region. 2760 if (mr.is_empty()) { 2761 giveup_current_region(); 2762 regular_clock_call(); 2763 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2764 if (_nextMarkBitMap->isMarked(mr.start())) { 2765 // The object is marked - apply the closure 2766 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 2767 bitmap_closure.do_bit(offset); 2768 } 2769 // Even if this task aborted while scanning the humongous object 2770 // we can (and should) give up the current region. 2771 giveup_current_region(); 2772 regular_clock_call(); 2773 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2774 giveup_current_region(); 2775 regular_clock_call(); 2776 } else { 2777 assert(has_aborted(), "currently the only way to do so"); 2778 // The only way to abort the bitmap iteration is to return 2779 // false from the do_bit() method. However, inside the 2780 // do_bit() method we move the _finger to point to the 2781 // object currently being looked at. So, if we bail out, we 2782 // have definitely set _finger to something non-null. 2783 assert(_finger != NULL, "invariant"); 2784 2785 // Region iteration was actually aborted. So now _finger 2786 // points to the address of the object we last scanned. If we 2787 // leave it there, when we restart this task, we will rescan 2788 // the object. It is easy to avoid this. We move the finger by 2789 // enough to point to the next possible object header (the 2790 // bitmap knows by how much we need to move it as it knows its 2791 // granularity). 2792 assert(_finger < _region_limit, "invariant"); 2793 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 2794 // Check if bitmap iteration was aborted while scanning the last object 2795 if (new_finger >= _region_limit) { 2796 giveup_current_region(); 2797 } else { 2798 move_finger_to(new_finger); 2799 } 2800 } 2801 } 2802 // At this point we have either completed iterating over the 2803 // region we were holding on to, or we have aborted. 2804 2805 // We then partially drain the local queue and the global stack. 2806 // (Do we really need this?) 2807 drain_local_queue(true); 2808 drain_global_stack(true); 2809 2810 // Read the note on the claim_region() method on why it might 2811 // return NULL with potentially more regions available for 2812 // claiming and why we have to check out_of_regions() to determine 2813 // whether we're done or not. 2814 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2815 // We are going to try to claim a new region. We should have 2816 // given up on the previous one. 2817 // Separated the asserts so that we know which one fires. 2818 assert(_curr_region == NULL, "invariant"); 2819 assert(_finger == NULL, "invariant"); 2820 assert(_region_limit == NULL, "invariant"); 2821 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2822 if (claimed_region != NULL) { 2823 // Yes, we managed to claim one 2824 setup_for_region(claimed_region); 2825 assert(_curr_region == claimed_region, "invariant"); 2826 } 2827 // It is important to call the regular clock here. It might take 2828 // a while to claim a region if, for example, we hit a large 2829 // block of empty regions. So we need to call the regular clock 2830 // method once round the loop to make sure it's called 2831 // frequently enough. 2832 regular_clock_call(); 2833 } 2834 2835 if (!has_aborted() && _curr_region == NULL) { 2836 assert(_cm->out_of_regions(), 2837 "at this point we should be out of regions"); 2838 } 2839 } while ( _curr_region != NULL && !has_aborted()); 2840 2841 if (!has_aborted()) { 2842 // We cannot check whether the global stack is empty, since other 2843 // tasks might be pushing objects to it concurrently. 2844 assert(_cm->out_of_regions(), 2845 "at this point we should be out of regions"); 2846 // Try to reduce the number of available SATB buffers so that 2847 // remark has less work to do. 2848 drain_satb_buffers(); 2849 } 2850 2851 // Since we've done everything else, we can now totally drain the 2852 // local queue and global stack. 2853 drain_local_queue(false); 2854 drain_global_stack(false); 2855 2856 // Attempt at work stealing from other task's queues. 2857 if (do_stealing && !has_aborted()) { 2858 // We have not aborted. This means that we have finished all that 2859 // we could. Let's try to do some stealing... 2860 2861 // We cannot check whether the global stack is empty, since other 2862 // tasks might be pushing objects to it concurrently. 2863 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2864 "only way to reach here"); 2865 while (!has_aborted()) { 2866 oop obj; 2867 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 2868 scan_object(obj); 2869 2870 // And since we're towards the end, let's totally drain the 2871 // local queue and global stack. 2872 drain_local_queue(false); 2873 drain_global_stack(false); 2874 } else { 2875 break; 2876 } 2877 } 2878 } 2879 2880 // We still haven't aborted. Now, let's try to get into the 2881 // termination protocol. 2882 if (do_termination && !has_aborted()) { 2883 // We cannot check whether the global stack is empty, since other 2884 // tasks might be concurrently pushing objects on it. 2885 // Separated the asserts so that we know which one fires. 2886 assert(_cm->out_of_regions(), "only way to reach here"); 2887 assert(_task_queue->size() == 0, "only way to reach here"); 2888 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2889 2890 // The G1CMTask class also extends the TerminatorTerminator class, 2891 // hence its should_exit_termination() method will also decide 2892 // whether to exit the termination protocol or not. 2893 bool finished = (is_serial || 2894 _cm->terminator()->offer_termination(this)); 2895 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2896 _termination_time_ms += 2897 termination_end_time_ms - _termination_start_time_ms; 2898 2899 if (finished) { 2900 // We're all done. 2901 2902 if (_worker_id == 0) { 2903 // let's allow task 0 to do this 2904 if (concurrent()) { 2905 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2906 // we need to set this to false before the next 2907 // safepoint. This way we ensure that the marking phase 2908 // doesn't observe any more heap expansions. 2909 _cm->clear_concurrent_marking_in_progress(); 2910 } 2911 } 2912 2913 // We can now guarantee that the global stack is empty, since 2914 // all other tasks have finished. We separated the guarantees so 2915 // that, if a condition is false, we can immediately find out 2916 // which one. 2917 guarantee(_cm->out_of_regions(), "only way to reach here"); 2918 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2919 guarantee(_task_queue->size() == 0, "only way to reach here"); 2920 guarantee(!_cm->has_overflown(), "only way to reach here"); 2921 } else { 2922 // Apparently there's more work to do. Let's abort this task. It 2923 // will restart it and we can hopefully find more things to do. 2924 set_has_aborted(); 2925 } 2926 } 2927 2928 // Mainly for debugging purposes to make sure that a pointer to the 2929 // closure which was statically allocated in this frame doesn't 2930 // escape it by accident. 2931 set_cm_oop_closure(NULL); 2932 double end_time_ms = os::elapsedVTime() * 1000.0; 2933 double elapsed_time_ms = end_time_ms - _start_time_ms; 2934 // Update the step history. 2935 _step_times_ms.add(elapsed_time_ms); 2936 2937 if (has_aborted()) { 2938 // The task was aborted for some reason. 2939 if (_has_timed_out) { 2940 double diff_ms = elapsed_time_ms - _time_target_ms; 2941 // Keep statistics of how well we did with respect to hitting 2942 // our target only if we actually timed out (if we aborted for 2943 // other reasons, then the results might get skewed). 2944 _marking_step_diffs_ms.add(diff_ms); 2945 } 2946 2947 if (_cm->has_overflown()) { 2948 // This is the interesting one. We aborted because a global 2949 // overflow was raised. This means we have to restart the 2950 // marking phase and start iterating over regions. However, in 2951 // order to do this we have to make sure that all tasks stop 2952 // what they are doing and re-initialize in a safe manner. We 2953 // will achieve this with the use of two barrier sync points. 2954 2955 if (!is_serial) { 2956 // We only need to enter the sync barrier if being called 2957 // from a parallel context 2958 _cm->enter_first_sync_barrier(_worker_id); 2959 2960 // When we exit this sync barrier we know that all tasks have 2961 // stopped doing marking work. So, it's now safe to 2962 // re-initialize our data structures. At the end of this method, 2963 // task 0 will clear the global data structures. 2964 } 2965 2966 // We clear the local state of this task... 2967 clear_region_fields(); 2968 2969 if (!is_serial) { 2970 // ...and enter the second barrier. 2971 _cm->enter_second_sync_barrier(_worker_id); 2972 } 2973 // At this point, if we're during the concurrent phase of 2974 // marking, everything has been re-initialized and we're 2975 // ready to restart. 2976 } 2977 } 2978 2979 _claimed = false; 2980 } 2981 2982 G1CMTask::G1CMTask(uint worker_id, 2983 G1ConcurrentMark* cm, 2984 G1CMTaskQueue* task_queue, 2985 G1CMTaskQueueSet* task_queues) 2986 : _g1h(G1CollectedHeap::heap()), 2987 _worker_id(worker_id), _cm(cm), 2988 _objArray_processor(this), 2989 _claimed(false), 2990 _nextMarkBitMap(NULL), _hash_seed(17), 2991 _task_queue(task_queue), 2992 _task_queues(task_queues), 2993 _cm_oop_closure(NULL) { 2994 guarantee(task_queue != NULL, "invariant"); 2995 guarantee(task_queues != NULL, "invariant"); 2996 2997 _marking_step_diffs_ms.add(0.5); 2998 } 2999 3000 // These are formatting macros that are used below to ensure 3001 // consistent formatting. The *_H_* versions are used to format the 3002 // header for a particular value and they should be kept consistent 3003 // with the corresponding macro. Also note that most of the macros add 3004 // the necessary white space (as a prefix) which makes them a bit 3005 // easier to compose. 3006 3007 // All the output lines are prefixed with this string to be able to 3008 // identify them easily in a large log file. 3009 #define G1PPRL_LINE_PREFIX "###" 3010 3011 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3012 #ifdef _LP64 3013 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3014 #else // _LP64 3015 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3016 #endif // _LP64 3017 3018 // For per-region info 3019 #define G1PPRL_TYPE_FORMAT " %-4s" 3020 #define G1PPRL_TYPE_H_FORMAT " %4s" 3021 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3022 #define G1PPRL_BYTE_H_FORMAT " %9s" 3023 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3024 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3025 3026 // For summary info 3027 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3028 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3029 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3030 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3031 3032 G1PrintRegionLivenessInfoClosure:: 3033 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3034 : _total_used_bytes(0), _total_capacity_bytes(0), 3035 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3036 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3037 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3038 MemRegion g1_reserved = g1h->g1_reserved(); 3039 double now = os::elapsedTime(); 3040 3041 // Print the header of the output. 3042 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3043 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3044 G1PPRL_SUM_ADDR_FORMAT("reserved") 3045 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3046 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3047 HeapRegion::GrainBytes); 3048 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3049 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3050 G1PPRL_TYPE_H_FORMAT 3051 G1PPRL_ADDR_BASE_H_FORMAT 3052 G1PPRL_BYTE_H_FORMAT 3053 G1PPRL_BYTE_H_FORMAT 3054 G1PPRL_BYTE_H_FORMAT 3055 G1PPRL_DOUBLE_H_FORMAT 3056 G1PPRL_BYTE_H_FORMAT 3057 G1PPRL_BYTE_H_FORMAT, 3058 "type", "address-range", 3059 "used", "prev-live", "next-live", "gc-eff", 3060 "remset", "code-roots"); 3061 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3062 G1PPRL_TYPE_H_FORMAT 3063 G1PPRL_ADDR_BASE_H_FORMAT 3064 G1PPRL_BYTE_H_FORMAT 3065 G1PPRL_BYTE_H_FORMAT 3066 G1PPRL_BYTE_H_FORMAT 3067 G1PPRL_DOUBLE_H_FORMAT 3068 G1PPRL_BYTE_H_FORMAT 3069 G1PPRL_BYTE_H_FORMAT, 3070 "", "", 3071 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3072 "(bytes)", "(bytes)"); 3073 } 3074 3075 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3076 const char* type = r->get_type_str(); 3077 HeapWord* bottom = r->bottom(); 3078 HeapWord* end = r->end(); 3079 size_t capacity_bytes = r->capacity(); 3080 size_t used_bytes = r->used(); 3081 size_t prev_live_bytes = r->live_bytes(); 3082 size_t next_live_bytes = r->next_live_bytes(); 3083 double gc_eff = r->gc_efficiency(); 3084 size_t remset_bytes = r->rem_set()->mem_size(); 3085 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3086 3087 _total_used_bytes += used_bytes; 3088 _total_capacity_bytes += capacity_bytes; 3089 _total_prev_live_bytes += prev_live_bytes; 3090 _total_next_live_bytes += next_live_bytes; 3091 _total_remset_bytes += remset_bytes; 3092 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3093 3094 // Print a line for this particular region. 3095 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3096 G1PPRL_TYPE_FORMAT 3097 G1PPRL_ADDR_BASE_FORMAT 3098 G1PPRL_BYTE_FORMAT 3099 G1PPRL_BYTE_FORMAT 3100 G1PPRL_BYTE_FORMAT 3101 G1PPRL_DOUBLE_FORMAT 3102 G1PPRL_BYTE_FORMAT 3103 G1PPRL_BYTE_FORMAT, 3104 type, p2i(bottom), p2i(end), 3105 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3106 remset_bytes, strong_code_roots_bytes); 3107 3108 return false; 3109 } 3110 3111 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3112 // add static memory usages to remembered set sizes 3113 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3114 // Print the footer of the output. 3115 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3116 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3117 " SUMMARY" 3118 G1PPRL_SUM_MB_FORMAT("capacity") 3119 G1PPRL_SUM_MB_PERC_FORMAT("used") 3120 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3121 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3122 G1PPRL_SUM_MB_FORMAT("remset") 3123 G1PPRL_SUM_MB_FORMAT("code-roots"), 3124 bytes_to_mb(_total_capacity_bytes), 3125 bytes_to_mb(_total_used_bytes), 3126 perc(_total_used_bytes, _total_capacity_bytes), 3127 bytes_to_mb(_total_prev_live_bytes), 3128 perc(_total_prev_live_bytes, _total_capacity_bytes), 3129 bytes_to_mb(_total_next_live_bytes), 3130 perc(_total_next_live_bytes, _total_capacity_bytes), 3131 bytes_to_mb(_total_remset_bytes), 3132 bytes_to_mb(_total_strong_code_roots_bytes)); 3133 }