1 /* 2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMarkThread.inline.hpp" 30 #include "gc/g1/g1CollectedHeap.inline.hpp" 31 #include "gc/g1/g1CollectorState.hpp" 32 #include "gc/g1/g1ConcurrentMark.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1CardLiveData.inline.hpp" 36 #include "gc/g1/g1Policy.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/g1/suspendibleThreadSet.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/taskqueue.inline.hpp" 50 #include "gc/shared/vmGCOperations.hpp" 51 #include "logging/log.hpp" 52 #include "memory/allocation.hpp" 53 #include "memory/resourceArea.hpp" 54 #include "oops/oop.inline.hpp" 55 #include "runtime/atomic.hpp" 56 #include "runtime/handles.inline.hpp" 57 #include "runtime/java.hpp" 58 #include "runtime/prefetch.inline.hpp" 59 #include "services/memTracker.hpp" 60 #include "utilities/growableArray.hpp" 61 62 // Concurrent marking bit map wrapper 63 64 G1CMBitMapRO::G1CMBitMapRO(int shifter) : 65 _bm(), 66 _shifter(shifter) { 67 _bmStartWord = 0; 68 _bmWordSize = 0; 69 } 70 71 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 72 const HeapWord* limit) const { 73 // First we must round addr *up* to a possible object boundary. 74 addr = align_ptr_up(addr, HeapWordSize << _shifter); 75 size_t addrOffset = heapWordToOffset(addr); 76 assert(limit != NULL, "limit must not be NULL"); 77 size_t limitOffset = heapWordToOffset(limit); 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 80 assert(nextAddr >= addr, "get_next_one postcondition"); 81 assert(nextAddr == limit || isMarked(nextAddr), 82 "get_next_one postcondition"); 83 return nextAddr; 84 } 85 86 #ifndef PRODUCT 87 bool G1CMBitMapRO::covers(MemRegion heap_rs) const { 88 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 89 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 90 "size inconsistency"); 91 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 92 _bmWordSize == heap_rs.word_size(); 93 } 94 #endif 95 96 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 97 _bm.print_on_error(st, prefix); 98 } 99 100 size_t G1CMBitMap::compute_size(size_t heap_size) { 101 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 102 } 103 104 size_t G1CMBitMap::mark_distance() { 105 return MinObjAlignmentInBytes * BitsPerByte; 106 } 107 108 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 109 _bmStartWord = heap.start(); 110 _bmWordSize = heap.word_size(); 111 112 _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter); 113 114 storage->set_mapping_changed_listener(&_listener); 115 } 116 117 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 118 if (zero_filled) { 119 return; 120 } 121 // We need to clear the bitmap on commit, removing any existing information. 122 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 123 _bm->clear_range(mr); 124 } 125 126 void G1CMBitMap::clear_range(MemRegion mr) { 127 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 128 assert(!mr.is_empty(), "unexpected empty region"); 129 // convert address range into offset range 130 _bm.at_put_range(heapWordToOffset(mr.start()), 131 heapWordToOffset(mr.end()), false); 132 } 133 134 G1CMMarkStack::G1CMMarkStack() : 135 _max_chunk_capacity(0), 136 _base(NULL), 137 _chunk_capacity(0) { 138 set_empty(); 139 } 140 141 bool G1CMMarkStack::resize(size_t new_capacity) { 142 assert(is_empty(), "Only resize when stack is empty."); 143 assert(new_capacity <= _max_chunk_capacity, 144 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 145 146 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity); 147 148 if (new_base == NULL) { 149 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 150 return false; 151 } 152 // Release old mapping. 153 if (_base != NULL) { 154 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 155 } 156 157 _base = new_base; 158 _chunk_capacity = new_capacity; 159 set_empty(); 160 161 return true; 162 } 163 164 size_t G1CMMarkStack::capacity_alignment() { 165 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 166 } 167 168 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 169 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 170 171 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 172 173 _max_chunk_capacity = align_size_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 174 size_t initial_chunk_capacity = align_size_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 175 176 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 177 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 178 _max_chunk_capacity, 179 initial_chunk_capacity); 180 181 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 182 initial_chunk_capacity, _max_chunk_capacity); 183 184 return resize(initial_chunk_capacity); 185 } 186 187 void G1CMMarkStack::expand() { 188 if (_chunk_capacity == _max_chunk_capacity) { 189 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 190 return; 191 } 192 size_t old_capacity = _chunk_capacity; 193 // Double capacity if possible 194 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 195 196 if (resize(new_capacity)) { 197 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 198 old_capacity, new_capacity); 199 } else { 200 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 201 old_capacity, new_capacity); 202 } 203 } 204 205 G1CMMarkStack::~G1CMMarkStack() { 206 if (_base != NULL) { 207 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity); 208 } 209 } 210 211 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 212 elem->next = *list; 213 *list = elem; 214 } 215 216 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 217 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 218 add_chunk_to_list(&_chunk_list, elem); 219 _chunks_in_chunk_list++; 220 } 221 222 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 223 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 224 add_chunk_to_list(&_free_list, elem); 225 } 226 227 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 228 TaskQueueEntryChunk* result = *list; 229 if (result != NULL) { 230 *list = (*list)->next; 231 } 232 return result; 233 } 234 235 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 236 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 237 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 238 if (result != NULL) { 239 _chunks_in_chunk_list--; 240 } 241 return result; 242 } 243 244 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 245 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 246 return remove_chunk_from_list(&_free_list); 247 } 248 249 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 250 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 251 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 252 // wraparound of _hwm. 253 if (_hwm >= _chunk_capacity) { 254 return NULL; 255 } 256 257 size_t cur_idx = Atomic::add(1, &_hwm) - 1; 258 if (cur_idx >= _chunk_capacity) { 259 return NULL; 260 } 261 262 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 263 result->next = NULL; 264 return result; 265 } 266 267 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 268 // Get a new chunk. 269 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 270 271 if (new_chunk == NULL) { 272 // Did not get a chunk from the free list. Allocate from backing memory. 273 new_chunk = allocate_new_chunk(); 274 275 if (new_chunk == NULL) { 276 return false; 277 } 278 } 279 280 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 281 282 add_chunk_to_chunk_list(new_chunk); 283 284 return true; 285 } 286 287 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 288 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 289 290 if (cur == NULL) { 291 return false; 292 } 293 294 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 295 296 add_chunk_to_free_list(cur); 297 return true; 298 } 299 300 void G1CMMarkStack::set_empty() { 301 _chunks_in_chunk_list = 0; 302 _hwm = 0; 303 _chunk_list = NULL; 304 _free_list = NULL; 305 } 306 307 G1CMRootRegions::G1CMRootRegions() : 308 _cm(NULL), _scan_in_progress(false), 309 _should_abort(false), _claimed_survivor_index(0) { } 310 311 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 312 _survivors = survivors; 313 _cm = cm; 314 } 315 316 void G1CMRootRegions::prepare_for_scan() { 317 assert(!scan_in_progress(), "pre-condition"); 318 319 // Currently, only survivors can be root regions. 320 _claimed_survivor_index = 0; 321 _scan_in_progress = _survivors->regions()->is_nonempty(); 322 _should_abort = false; 323 } 324 325 HeapRegion* G1CMRootRegions::claim_next() { 326 if (_should_abort) { 327 // If someone has set the should_abort flag, we return NULL to 328 // force the caller to bail out of their loop. 329 return NULL; 330 } 331 332 // Currently, only survivors can be root regions. 333 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 334 335 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 336 if (claimed_index < survivor_regions->length()) { 337 return survivor_regions->at(claimed_index); 338 } 339 return NULL; 340 } 341 342 uint G1CMRootRegions::num_root_regions() const { 343 return (uint)_survivors->regions()->length(); 344 } 345 346 void G1CMRootRegions::notify_scan_done() { 347 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 348 _scan_in_progress = false; 349 RootRegionScan_lock->notify_all(); 350 } 351 352 void G1CMRootRegions::cancel_scan() { 353 notify_scan_done(); 354 } 355 356 void G1CMRootRegions::scan_finished() { 357 assert(scan_in_progress(), "pre-condition"); 358 359 // Currently, only survivors can be root regions. 360 if (!_should_abort) { 361 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 362 assert((uint)_claimed_survivor_index >= _survivors->length(), 363 "we should have claimed all survivors, claimed index = %u, length = %u", 364 (uint)_claimed_survivor_index, _survivors->length()); 365 } 366 367 notify_scan_done(); 368 } 369 370 bool G1CMRootRegions::wait_until_scan_finished() { 371 if (!scan_in_progress()) return false; 372 373 { 374 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 375 while (scan_in_progress()) { 376 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 377 } 378 } 379 return true; 380 } 381 382 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 383 return MAX2((n_par_threads + 2) / 4, 1U); 384 } 385 386 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 387 _g1h(g1h), 388 _markBitMap1(), 389 _markBitMap2(), 390 _parallel_marking_threads(0), 391 _max_parallel_marking_threads(0), 392 _sleep_factor(0.0), 393 _marking_task_overhead(1.0), 394 _cleanup_list("Cleanup List"), 395 396 _prevMarkBitMap(&_markBitMap1), 397 _nextMarkBitMap(&_markBitMap2), 398 399 _global_mark_stack(), 400 // _finger set in set_non_marking_state 401 402 _max_worker_id(ParallelGCThreads), 403 // _active_tasks set in set_non_marking_state 404 // _tasks set inside the constructor 405 _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)), 406 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 407 408 _has_overflown(false), 409 _concurrent(false), 410 _has_aborted(false), 411 _restart_for_overflow(false), 412 _concurrent_marking_in_progress(false), 413 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 414 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 415 416 // _verbose_level set below 417 418 _init_times(), 419 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 420 _cleanup_times(), 421 _total_counting_time(0.0), 422 _total_rs_scrub_time(0.0), 423 424 _parallel_workers(NULL), 425 426 _completed_initialization(false) { 427 428 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 429 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 430 431 // Create & start a ConcurrentMark thread. 432 _cmThread = new ConcurrentMarkThread(this); 433 assert(cmThread() != NULL, "CM Thread should have been created"); 434 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 435 if (_cmThread->osthread() == NULL) { 436 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 437 } 438 439 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 440 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 441 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 442 443 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 444 satb_qs.set_buffer_size(G1SATBBufferSize); 445 446 _root_regions.init(_g1h->survivor(), this); 447 448 if (ConcGCThreads > ParallelGCThreads) { 449 log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).", 450 ConcGCThreads, ParallelGCThreads); 451 return; 452 } 453 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 454 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 455 // if both are set 456 _sleep_factor = 0.0; 457 _marking_task_overhead = 1.0; 458 } else if (G1MarkingOverheadPercent > 0) { 459 // We will calculate the number of parallel marking threads based 460 // on a target overhead with respect to the soft real-time goal 461 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 462 double overall_cm_overhead = 463 (double) MaxGCPauseMillis * marking_overhead / 464 (double) GCPauseIntervalMillis; 465 double cpu_ratio = 1.0 / os::initial_active_processor_count(); 466 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 467 double marking_task_overhead = 468 overall_cm_overhead / marking_thread_num * os::initial_active_processor_count(); 469 double sleep_factor = 470 (1.0 - marking_task_overhead) / marking_task_overhead; 471 472 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 473 _sleep_factor = sleep_factor; 474 _marking_task_overhead = marking_task_overhead; 475 } else { 476 // Calculate the number of parallel marking threads by scaling 477 // the number of parallel GC threads. 478 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 479 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 480 _sleep_factor = 0.0; 481 _marking_task_overhead = 1.0; 482 } 483 484 assert(ConcGCThreads > 0, "Should have been set"); 485 log_debug(gc)("ConcGCThreads: %u", ConcGCThreads); 486 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 487 _parallel_marking_threads = ConcGCThreads; 488 _max_parallel_marking_threads = _parallel_marking_threads; 489 490 _parallel_workers = new WorkGang("G1 Marker", 491 _max_parallel_marking_threads, false, true); 492 if (_parallel_workers == NULL) { 493 vm_exit_during_initialization("Failed necessary allocation."); 494 } else { 495 _parallel_workers->initialize_workers(); 496 } 497 498 if (FLAG_IS_DEFAULT(MarkStackSize)) { 499 size_t mark_stack_size = 500 MIN2(MarkStackSizeMax, 501 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 502 // Verify that the calculated value for MarkStackSize is in range. 503 // It would be nice to use the private utility routine from Arguments. 504 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 505 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 506 "must be between 1 and " SIZE_FORMAT, 507 mark_stack_size, MarkStackSizeMax); 508 return; 509 } 510 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 511 } else { 512 // Verify MarkStackSize is in range. 513 if (FLAG_IS_CMDLINE(MarkStackSize)) { 514 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 515 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 516 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 517 "must be between 1 and " SIZE_FORMAT, 518 MarkStackSize, MarkStackSizeMax); 519 return; 520 } 521 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 522 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 523 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 524 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 525 MarkStackSize, MarkStackSizeMax); 526 return; 527 } 528 } 529 } 530 } 531 532 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 533 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 534 } 535 536 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); 537 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 538 539 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 540 _active_tasks = _max_worker_id; 541 542 for (uint i = 0; i < _max_worker_id; ++i) { 543 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 544 task_queue->initialize(); 545 _task_queues->register_queue(i, task_queue); 546 547 _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); 548 549 _accum_task_vtime[i] = 0.0; 550 } 551 552 // so that the call below can read a sensible value 553 _heap_start = g1h->reserved_region().start(); 554 set_non_marking_state(); 555 _completed_initialization = true; 556 } 557 558 void G1ConcurrentMark::reset() { 559 // Starting values for these two. This should be called in a STW 560 // phase. 561 MemRegion reserved = _g1h->g1_reserved(); 562 _heap_start = reserved.start(); 563 _heap_end = reserved.end(); 564 565 // Separated the asserts so that we know which one fires. 566 assert(_heap_start != NULL, "heap bounds should look ok"); 567 assert(_heap_end != NULL, "heap bounds should look ok"); 568 assert(_heap_start < _heap_end, "heap bounds should look ok"); 569 570 // Reset all the marking data structures and any necessary flags 571 reset_marking_state(); 572 573 // We do reset all of them, since different phases will use 574 // different number of active threads. So, it's easiest to have all 575 // of them ready. 576 for (uint i = 0; i < _max_worker_id; ++i) { 577 _tasks[i]->reset(_nextMarkBitMap); 578 } 579 580 // we need this to make sure that the flag is on during the evac 581 // pause with initial mark piggy-backed 582 set_concurrent_marking_in_progress(); 583 } 584 585 586 void G1ConcurrentMark::reset_marking_state() { 587 _global_mark_stack.set_empty(); 588 589 // Expand the marking stack, if we have to and if we can. 590 if (has_overflown()) { 591 _global_mark_stack.expand(); 592 } 593 594 clear_has_overflown(); 595 _finger = _heap_start; 596 597 for (uint i = 0; i < _max_worker_id; ++i) { 598 G1CMTaskQueue* queue = _task_queues->queue(i); 599 queue->set_empty(); 600 } 601 } 602 603 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 604 assert(active_tasks <= _max_worker_id, "we should not have more"); 605 606 _active_tasks = active_tasks; 607 // Need to update the three data structures below according to the 608 // number of active threads for this phase. 609 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 610 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 611 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 612 } 613 614 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 615 set_concurrency(active_tasks); 616 617 _concurrent = concurrent; 618 // We propagate this to all tasks, not just the active ones. 619 for (uint i = 0; i < _max_worker_id; ++i) 620 _tasks[i]->set_concurrent(concurrent); 621 622 if (concurrent) { 623 set_concurrent_marking_in_progress(); 624 } else { 625 // We currently assume that the concurrent flag has been set to 626 // false before we start remark. At this point we should also be 627 // in a STW phase. 628 assert(!concurrent_marking_in_progress(), "invariant"); 629 assert(out_of_regions(), 630 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 631 p2i(_finger), p2i(_heap_end)); 632 } 633 } 634 635 void G1ConcurrentMark::set_non_marking_state() { 636 // We set the global marking state to some default values when we're 637 // not doing marking. 638 reset_marking_state(); 639 _active_tasks = 0; 640 clear_concurrent_marking_in_progress(); 641 } 642 643 G1ConcurrentMark::~G1ConcurrentMark() { 644 // The G1ConcurrentMark instance is never freed. 645 ShouldNotReachHere(); 646 } 647 648 class G1ClearBitMapTask : public AbstractGangTask { 649 public: 650 static size_t chunk_size() { return M; } 651 652 private: 653 // Heap region closure used for clearing the given mark bitmap. 654 class G1ClearBitmapHRClosure : public HeapRegionClosure { 655 private: 656 G1CMBitMap* _bitmap; 657 G1ConcurrentMark* _cm; 658 public: 659 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 660 } 661 662 virtual bool doHeapRegion(HeapRegion* r) { 663 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 664 665 HeapWord* cur = r->bottom(); 666 HeapWord* const end = r->end(); 667 668 while (cur < end) { 669 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 670 _bitmap->clear_range(mr); 671 672 cur += chunk_size_in_words; 673 674 // Abort iteration if after yielding the marking has been aborted. 675 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 676 return true; 677 } 678 // Repeat the asserts from before the start of the closure. We will do them 679 // as asserts here to minimize their overhead on the product. However, we 680 // will have them as guarantees at the beginning / end of the bitmap 681 // clearing to get some checking in the product. 682 assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant"); 683 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 684 } 685 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 686 687 return false; 688 } 689 }; 690 691 G1ClearBitmapHRClosure _cl; 692 HeapRegionClaimer _hr_claimer; 693 bool _suspendible; // If the task is suspendible, workers must join the STS. 694 695 public: 696 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 697 AbstractGangTask("G1 Clear Bitmap"), 698 _cl(bitmap, suspendible ? cm : NULL), 699 _hr_claimer(n_workers), 700 _suspendible(suspendible) 701 { } 702 703 void work(uint worker_id) { 704 SuspendibleThreadSetJoiner sts_join(_suspendible); 705 G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer); 706 } 707 708 bool is_complete() { 709 return _cl.complete(); 710 } 711 }; 712 713 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 714 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 715 716 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 717 size_t const num_chunks = align_size_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 718 719 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 720 721 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 722 723 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 724 workers->run_task(&cl, num_workers); 725 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 726 } 727 728 void G1ConcurrentMark::cleanup_for_next_mark() { 729 // Make sure that the concurrent mark thread looks to still be in 730 // the current cycle. 731 guarantee(cmThread()->during_cycle(), "invariant"); 732 733 // We are finishing up the current cycle by clearing the next 734 // marking bitmap and getting it ready for the next cycle. During 735 // this time no other cycle can start. So, let's make sure that this 736 // is the case. 737 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 738 739 clear_bitmap(_nextMarkBitMap, _parallel_workers, true); 740 741 // Clear the live count data. If the marking has been aborted, the abort() 742 // call already did that. 743 if (!has_aborted()) { 744 clear_live_data(_parallel_workers); 745 DEBUG_ONLY(verify_live_data_clear()); 746 } 747 748 // Repeat the asserts from above. 749 guarantee(cmThread()->during_cycle(), "invariant"); 750 guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant"); 751 } 752 753 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 754 assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint."); 755 clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false); 756 } 757 758 class CheckBitmapClearHRClosure : public HeapRegionClosure { 759 G1CMBitMap* _bitmap; 760 bool _error; 761 public: 762 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 763 } 764 765 virtual bool doHeapRegion(HeapRegion* r) { 766 // This closure can be called concurrently to the mutator, so we must make sure 767 // that the result of the getNextMarkedWordAddress() call is compared to the 768 // value passed to it as limit to detect any found bits. 769 // end never changes in G1. 770 HeapWord* end = r->end(); 771 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 772 } 773 }; 774 775 bool G1ConcurrentMark::nextMarkBitmapIsClear() { 776 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 777 _g1h->heap_region_iterate(&cl); 778 return cl.complete(); 779 } 780 781 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 782 public: 783 bool doHeapRegion(HeapRegion* r) { 784 r->note_start_of_marking(); 785 return false; 786 } 787 }; 788 789 void G1ConcurrentMark::checkpointRootsInitialPre() { 790 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 791 G1Policy* g1p = g1h->g1_policy(); 792 793 _has_aborted = false; 794 795 // Initialize marking structures. This has to be done in a STW phase. 796 reset(); 797 798 // For each region note start of marking. 799 NoteStartOfMarkHRClosure startcl; 800 g1h->heap_region_iterate(&startcl); 801 } 802 803 804 void G1ConcurrentMark::checkpointRootsInitialPost() { 805 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 806 807 // Start Concurrent Marking weak-reference discovery. 808 ReferenceProcessor* rp = g1h->ref_processor_cm(); 809 // enable ("weak") refs discovery 810 rp->enable_discovery(); 811 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 812 813 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 814 // This is the start of the marking cycle, we're expected all 815 // threads to have SATB queues with active set to false. 816 satb_mq_set.set_active_all_threads(true, /* new active value */ 817 false /* expected_active */); 818 819 _root_regions.prepare_for_scan(); 820 821 // update_g1_committed() will be called at the end of an evac pause 822 // when marking is on. So, it's also called at the end of the 823 // initial-mark pause to update the heap end, if the heap expands 824 // during it. No need to call it here. 825 } 826 827 /* 828 * Notice that in the next two methods, we actually leave the STS 829 * during the barrier sync and join it immediately afterwards. If we 830 * do not do this, the following deadlock can occur: one thread could 831 * be in the barrier sync code, waiting for the other thread to also 832 * sync up, whereas another one could be trying to yield, while also 833 * waiting for the other threads to sync up too. 834 * 835 * Note, however, that this code is also used during remark and in 836 * this case we should not attempt to leave / enter the STS, otherwise 837 * we'll either hit an assert (debug / fastdebug) or deadlock 838 * (product). So we should only leave / enter the STS if we are 839 * operating concurrently. 840 * 841 * Because the thread that does the sync barrier has left the STS, it 842 * is possible to be suspended for a Full GC or an evacuation pause 843 * could occur. This is actually safe, since the entering the sync 844 * barrier is one of the last things do_marking_step() does, and it 845 * doesn't manipulate any data structures afterwards. 846 */ 847 848 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 849 bool barrier_aborted; 850 { 851 SuspendibleThreadSetLeaver sts_leave(concurrent()); 852 barrier_aborted = !_first_overflow_barrier_sync.enter(); 853 } 854 855 // at this point everyone should have synced up and not be doing any 856 // more work 857 858 if (barrier_aborted) { 859 // If the barrier aborted we ignore the overflow condition and 860 // just abort the whole marking phase as quickly as possible. 861 return; 862 } 863 864 // If we're executing the concurrent phase of marking, reset the marking 865 // state; otherwise the marking state is reset after reference processing, 866 // during the remark pause. 867 // If we reset here as a result of an overflow during the remark we will 868 // see assertion failures from any subsequent set_concurrency_and_phase() 869 // calls. 870 if (concurrent()) { 871 // let the task associated with with worker 0 do this 872 if (worker_id == 0) { 873 // task 0 is responsible for clearing the global data structures 874 // We should be here because of an overflow. During STW we should 875 // not clear the overflow flag since we rely on it being true when 876 // we exit this method to abort the pause and restart concurrent 877 // marking. 878 reset_marking_state(); 879 880 log_info(gc, marking)("Concurrent Mark reset for overflow"); 881 } 882 } 883 884 // after this, each task should reset its own data structures then 885 // then go into the second barrier 886 } 887 888 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 889 SuspendibleThreadSetLeaver sts_leave(concurrent()); 890 _second_overflow_barrier_sync.enter(); 891 892 // at this point everything should be re-initialized and ready to go 893 } 894 895 class G1CMConcurrentMarkingTask: public AbstractGangTask { 896 private: 897 G1ConcurrentMark* _cm; 898 ConcurrentMarkThread* _cmt; 899 900 public: 901 void work(uint worker_id) { 902 assert(Thread::current()->is_ConcurrentGC_thread(), 903 "this should only be done by a conc GC thread"); 904 ResourceMark rm; 905 906 double start_vtime = os::elapsedVTime(); 907 908 { 909 SuspendibleThreadSetJoiner sts_join; 910 911 assert(worker_id < _cm->active_tasks(), "invariant"); 912 G1CMTask* the_task = _cm->task(worker_id); 913 the_task->record_start_time(); 914 if (!_cm->has_aborted()) { 915 do { 916 double start_vtime_sec = os::elapsedVTime(); 917 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 918 919 the_task->do_marking_step(mark_step_duration_ms, 920 true /* do_termination */, 921 false /* is_serial*/); 922 923 double end_vtime_sec = os::elapsedVTime(); 924 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 925 _cm->do_yield_check(); 926 927 jlong sleep_time_ms; 928 if (!_cm->has_aborted() && the_task->has_aborted()) { 929 sleep_time_ms = 930 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 931 { 932 SuspendibleThreadSetLeaver sts_leave; 933 os::sleep(Thread::current(), sleep_time_ms, false); 934 } 935 } 936 } while (!_cm->has_aborted() && the_task->has_aborted()); 937 } 938 the_task->record_end_time(); 939 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 940 } 941 942 double end_vtime = os::elapsedVTime(); 943 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 944 } 945 946 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm, 947 ConcurrentMarkThread* cmt) : 948 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 949 950 ~G1CMConcurrentMarkingTask() { } 951 }; 952 953 // Calculates the number of active workers for a concurrent 954 // phase. 955 uint G1ConcurrentMark::calc_parallel_marking_threads() { 956 uint n_conc_workers = 0; 957 if (!UseDynamicNumberOfGCThreads || 958 (!FLAG_IS_DEFAULT(ConcGCThreads) && 959 !ForceDynamicNumberOfGCThreads)) { 960 n_conc_workers = max_parallel_marking_threads(); 961 } else { 962 n_conc_workers = 963 AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(), 964 1, /* Minimum workers */ 965 parallel_marking_threads(), 966 Threads::number_of_non_daemon_threads()); 967 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 968 // that scaling has already gone into "_max_parallel_marking_threads". 969 } 970 assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(), 971 "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u", 972 max_parallel_marking_threads(), n_conc_workers); 973 return n_conc_workers; 974 } 975 976 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) { 977 // Currently, only survivors can be root regions. 978 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 979 G1RootRegionScanClosure cl(_g1h, this); 980 981 const uintx interval = PrefetchScanIntervalInBytes; 982 HeapWord* curr = hr->bottom(); 983 const HeapWord* end = hr->top(); 984 while (curr < end) { 985 Prefetch::read(curr, interval); 986 oop obj = oop(curr); 987 int size = obj->oop_iterate_size(&cl); 988 assert(size == obj->size(), "sanity"); 989 curr += size; 990 } 991 } 992 993 class G1CMRootRegionScanTask : public AbstractGangTask { 994 private: 995 G1ConcurrentMark* _cm; 996 997 public: 998 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 999 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 1000 1001 void work(uint worker_id) { 1002 assert(Thread::current()->is_ConcurrentGC_thread(), 1003 "this should only be done by a conc GC thread"); 1004 1005 G1CMRootRegions* root_regions = _cm->root_regions(); 1006 HeapRegion* hr = root_regions->claim_next(); 1007 while (hr != NULL) { 1008 _cm->scanRootRegion(hr); 1009 hr = root_regions->claim_next(); 1010 } 1011 } 1012 }; 1013 1014 void G1ConcurrentMark::scan_root_regions() { 1015 // scan_in_progress() will have been set to true only if there was 1016 // at least one root region to scan. So, if it's false, we 1017 // should not attempt to do any further work. 1018 if (root_regions()->scan_in_progress()) { 1019 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 1020 1021 _parallel_marking_threads = MIN2(calc_parallel_marking_threads(), 1022 // We distribute work on a per-region basis, so starting 1023 // more threads than that is useless. 1024 root_regions()->num_root_regions()); 1025 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1026 "Maximum number of marking threads exceeded"); 1027 1028 G1CMRootRegionScanTask task(this); 1029 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 1030 task.name(), _parallel_marking_threads, root_regions()->num_root_regions()); 1031 _parallel_workers->run_task(&task, _parallel_marking_threads); 1032 1033 // It's possible that has_aborted() is true here without actually 1034 // aborting the survivor scan earlier. This is OK as it's 1035 // mainly used for sanity checking. 1036 root_regions()->scan_finished(); 1037 } 1038 } 1039 1040 void G1ConcurrentMark::concurrent_cycle_start() { 1041 _gc_timer_cm->register_gc_start(); 1042 1043 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 1044 1045 _g1h->trace_heap_before_gc(_gc_tracer_cm); 1046 } 1047 1048 void G1ConcurrentMark::concurrent_cycle_end() { 1049 _g1h->trace_heap_after_gc(_gc_tracer_cm); 1050 1051 if (has_aborted()) { 1052 _gc_tracer_cm->report_concurrent_mode_failure(); 1053 } 1054 1055 _gc_timer_cm->register_gc_end(); 1056 1057 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 1058 } 1059 1060 void G1ConcurrentMark::mark_from_roots() { 1061 // we might be tempted to assert that: 1062 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1063 // "inconsistent argument?"); 1064 // However that wouldn't be right, because it's possible that 1065 // a safepoint is indeed in progress as a younger generation 1066 // stop-the-world GC happens even as we mark in this generation. 1067 1068 _restart_for_overflow = false; 1069 1070 // _g1h has _n_par_threads 1071 _parallel_marking_threads = calc_parallel_marking_threads(); 1072 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1073 "Maximum number of marking threads exceeded"); 1074 1075 uint active_workers = MAX2(1U, parallel_marking_threads()); 1076 assert(active_workers > 0, "Should have been set"); 1077 1078 // Setting active workers is not guaranteed since fewer 1079 // worker threads may currently exist and more may not be 1080 // available. 1081 active_workers = _parallel_workers->update_active_workers(active_workers); 1082 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers()); 1083 1084 // Parallel task terminator is set in "set_concurrency_and_phase()" 1085 set_concurrency_and_phase(active_workers, true /* concurrent */); 1086 1087 G1CMConcurrentMarkingTask markingTask(this, cmThread()); 1088 _parallel_workers->run_task(&markingTask); 1089 print_stats(); 1090 } 1091 1092 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1093 // world is stopped at this checkpoint 1094 assert(SafepointSynchronize::is_at_safepoint(), 1095 "world should be stopped"); 1096 1097 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1098 1099 // If a full collection has happened, we shouldn't do this. 1100 if (has_aborted()) { 1101 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1102 return; 1103 } 1104 1105 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1106 1107 if (VerifyDuringGC) { 1108 HandleMark hm; // handle scope 1109 g1h->prepare_for_verify(); 1110 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1111 } 1112 g1h->verifier()->check_bitmaps("Remark Start"); 1113 1114 G1Policy* g1p = g1h->g1_policy(); 1115 g1p->record_concurrent_mark_remark_start(); 1116 1117 double start = os::elapsedTime(); 1118 1119 checkpointRootsFinalWork(); 1120 1121 double mark_work_end = os::elapsedTime(); 1122 1123 weakRefsWork(clear_all_soft_refs); 1124 1125 if (has_overflown()) { 1126 // We overflowed. Restart concurrent marking. 1127 _restart_for_overflow = true; 1128 1129 // Verify the heap w.r.t. the previous marking bitmap. 1130 if (VerifyDuringGC) { 1131 HandleMark hm; // handle scope 1132 g1h->prepare_for_verify(); 1133 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1134 } 1135 1136 // Clear the marking state because we will be restarting 1137 // marking due to overflowing the global mark stack. 1138 reset_marking_state(); 1139 } else { 1140 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1141 // We're done with marking. 1142 // This is the end of the marking cycle, we're expected all 1143 // threads to have SATB queues with active set to true. 1144 satb_mq_set.set_active_all_threads(false, /* new active value */ 1145 true /* expected_active */); 1146 1147 if (VerifyDuringGC) { 1148 HandleMark hm; // handle scope 1149 g1h->prepare_for_verify(); 1150 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1151 } 1152 g1h->verifier()->check_bitmaps("Remark End"); 1153 assert(!restart_for_overflow(), "sanity"); 1154 // Completely reset the marking state since marking completed 1155 set_non_marking_state(); 1156 } 1157 1158 // Statistics 1159 double now = os::elapsedTime(); 1160 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1161 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1162 _remark_times.add((now - start) * 1000.0); 1163 1164 g1p->record_concurrent_mark_remark_end(); 1165 1166 G1CMIsAliveClosure is_alive(g1h); 1167 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1168 } 1169 1170 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1171 G1CollectedHeap* _g1; 1172 size_t _freed_bytes; 1173 FreeRegionList* _local_cleanup_list; 1174 uint _old_regions_removed; 1175 uint _humongous_regions_removed; 1176 HRRSCleanupTask* _hrrs_cleanup_task; 1177 1178 public: 1179 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1180 FreeRegionList* local_cleanup_list, 1181 HRRSCleanupTask* hrrs_cleanup_task) : 1182 _g1(g1), 1183 _freed_bytes(0), 1184 _local_cleanup_list(local_cleanup_list), 1185 _old_regions_removed(0), 1186 _humongous_regions_removed(0), 1187 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1188 1189 size_t freed_bytes() { return _freed_bytes; } 1190 const uint old_regions_removed() { return _old_regions_removed; } 1191 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1192 1193 bool doHeapRegion(HeapRegion *hr) { 1194 if (hr->is_archive()) { 1195 return false; 1196 } 1197 _g1->reset_gc_time_stamps(hr); 1198 hr->note_end_of_marking(); 1199 1200 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1201 _freed_bytes += hr->used(); 1202 hr->set_containing_set(NULL); 1203 if (hr->is_humongous()) { 1204 _humongous_regions_removed++; 1205 _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */); 1206 } else { 1207 _old_regions_removed++; 1208 _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */); 1209 } 1210 } else { 1211 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1212 } 1213 1214 return false; 1215 } 1216 }; 1217 1218 class G1ParNoteEndTask: public AbstractGangTask { 1219 friend class G1NoteEndOfConcMarkClosure; 1220 1221 protected: 1222 G1CollectedHeap* _g1h; 1223 FreeRegionList* _cleanup_list; 1224 HeapRegionClaimer _hrclaimer; 1225 1226 public: 1227 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1228 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1229 } 1230 1231 void work(uint worker_id) { 1232 FreeRegionList local_cleanup_list("Local Cleanup List"); 1233 HRRSCleanupTask hrrs_cleanup_task; 1234 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1235 &hrrs_cleanup_task); 1236 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1237 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1238 1239 // Now update the lists 1240 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1241 { 1242 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1243 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1244 1245 // If we iterate over the global cleanup list at the end of 1246 // cleanup to do this printing we will not guarantee to only 1247 // generate output for the newly-reclaimed regions (the list 1248 // might not be empty at the beginning of cleanup; we might 1249 // still be working on its previous contents). So we do the 1250 // printing here, before we append the new regions to the global 1251 // cleanup list. 1252 1253 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1254 if (hr_printer->is_active()) { 1255 FreeRegionListIterator iter(&local_cleanup_list); 1256 while (iter.more_available()) { 1257 HeapRegion* hr = iter.get_next(); 1258 hr_printer->cleanup(hr); 1259 } 1260 } 1261 1262 _cleanup_list->add_ordered(&local_cleanup_list); 1263 assert(local_cleanup_list.is_empty(), "post-condition"); 1264 1265 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1266 } 1267 } 1268 }; 1269 1270 void G1ConcurrentMark::cleanup() { 1271 // world is stopped at this checkpoint 1272 assert(SafepointSynchronize::is_at_safepoint(), 1273 "world should be stopped"); 1274 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1275 1276 // If a full collection has happened, we shouldn't do this. 1277 if (has_aborted()) { 1278 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1279 return; 1280 } 1281 1282 g1h->verifier()->verify_region_sets_optional(); 1283 1284 if (VerifyDuringGC) { 1285 HandleMark hm; // handle scope 1286 g1h->prepare_for_verify(); 1287 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1288 } 1289 g1h->verifier()->check_bitmaps("Cleanup Start"); 1290 1291 G1Policy* g1p = g1h->g1_policy(); 1292 g1p->record_concurrent_mark_cleanup_start(); 1293 1294 double start = os::elapsedTime(); 1295 1296 HeapRegionRemSet::reset_for_cleanup_tasks(); 1297 1298 { 1299 GCTraceTime(Debug, gc)("Finalize Live Data"); 1300 finalize_live_data(); 1301 } 1302 1303 if (VerifyDuringGC) { 1304 GCTraceTime(Debug, gc)("Verify Live Data"); 1305 verify_live_data(); 1306 } 1307 1308 g1h->collector_state()->set_mark_in_progress(false); 1309 1310 double count_end = os::elapsedTime(); 1311 double this_final_counting_time = (count_end - start); 1312 _total_counting_time += this_final_counting_time; 1313 1314 if (log_is_enabled(Trace, gc, liveness)) { 1315 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1316 _g1h->heap_region_iterate(&cl); 1317 } 1318 1319 // Install newly created mark bitMap as "prev". 1320 swapMarkBitMaps(); 1321 1322 g1h->reset_gc_time_stamp(); 1323 1324 uint n_workers = _g1h->workers()->active_workers(); 1325 1326 // Note end of marking in all heap regions. 1327 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1328 g1h->workers()->run_task(&g1_par_note_end_task); 1329 g1h->check_gc_time_stamps(); 1330 1331 if (!cleanup_list_is_empty()) { 1332 // The cleanup list is not empty, so we'll have to process it 1333 // concurrently. Notify anyone else that might be wanting free 1334 // regions that there will be more free regions coming soon. 1335 g1h->set_free_regions_coming(); 1336 } 1337 1338 // call below, since it affects the metric by which we sort the heap 1339 // regions. 1340 if (G1ScrubRemSets) { 1341 double rs_scrub_start = os::elapsedTime(); 1342 g1h->scrub_rem_set(); 1343 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1344 } 1345 1346 // this will also free any regions totally full of garbage objects, 1347 // and sort the regions. 1348 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1349 1350 // Statistics. 1351 double end = os::elapsedTime(); 1352 _cleanup_times.add((end - start) * 1000.0); 1353 1354 // Clean up will have freed any regions completely full of garbage. 1355 // Update the soft reference policy with the new heap occupancy. 1356 Universe::update_heap_info_at_gc(); 1357 1358 if (VerifyDuringGC) { 1359 HandleMark hm; // handle scope 1360 g1h->prepare_for_verify(); 1361 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1362 } 1363 1364 g1h->verifier()->check_bitmaps("Cleanup End"); 1365 1366 g1h->verifier()->verify_region_sets_optional(); 1367 1368 // We need to make this be a "collection" so any collection pause that 1369 // races with it goes around and waits for completeCleanup to finish. 1370 g1h->increment_total_collections(); 1371 1372 // Clean out dead classes and update Metaspace sizes. 1373 if (ClassUnloadingWithConcurrentMark) { 1374 ClassLoaderDataGraph::purge(); 1375 } 1376 MetaspaceGC::compute_new_size(); 1377 1378 // We reclaimed old regions so we should calculate the sizes to make 1379 // sure we update the old gen/space data. 1380 g1h->g1mm()->update_sizes(); 1381 g1h->allocation_context_stats().update_after_mark(); 1382 } 1383 1384 void G1ConcurrentMark::complete_cleanup() { 1385 if (has_aborted()) return; 1386 1387 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1388 1389 _cleanup_list.verify_optional(); 1390 FreeRegionList tmp_free_list("Tmp Free List"); 1391 1392 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1393 "cleanup list has %u entries", 1394 _cleanup_list.length()); 1395 1396 // No one else should be accessing the _cleanup_list at this point, 1397 // so it is not necessary to take any locks 1398 while (!_cleanup_list.is_empty()) { 1399 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1400 assert(hr != NULL, "Got NULL from a non-empty list"); 1401 hr->par_clear(); 1402 tmp_free_list.add_ordered(hr); 1403 1404 // Instead of adding one region at a time to the secondary_free_list, 1405 // we accumulate them in the local list and move them a few at a 1406 // time. This also cuts down on the number of notify_all() calls 1407 // we do during this process. We'll also append the local list when 1408 // _cleanup_list is empty (which means we just removed the last 1409 // region from the _cleanup_list). 1410 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1411 _cleanup_list.is_empty()) { 1412 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1413 "appending %u entries to the secondary_free_list, " 1414 "cleanup list still has %u entries", 1415 tmp_free_list.length(), 1416 _cleanup_list.length()); 1417 1418 { 1419 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1420 g1h->secondary_free_list_add(&tmp_free_list); 1421 SecondaryFreeList_lock->notify_all(); 1422 } 1423 #ifndef PRODUCT 1424 if (G1StressConcRegionFreeing) { 1425 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1426 os::sleep(Thread::current(), (jlong) 1, false); 1427 } 1428 } 1429 #endif 1430 } 1431 } 1432 assert(tmp_free_list.is_empty(), "post-condition"); 1433 } 1434 1435 // Supporting Object and Oop closures for reference discovery 1436 // and processing in during marking 1437 1438 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1439 HeapWord* addr = (HeapWord*)obj; 1440 return addr != NULL && 1441 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1442 } 1443 1444 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1445 // Uses the G1CMTask associated with a worker thread (for serial reference 1446 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1447 // trace referent objects. 1448 // 1449 // Using the G1CMTask and embedded local queues avoids having the worker 1450 // threads operating on the global mark stack. This reduces the risk 1451 // of overflowing the stack - which we would rather avoid at this late 1452 // state. Also using the tasks' local queues removes the potential 1453 // of the workers interfering with each other that could occur if 1454 // operating on the global stack. 1455 1456 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1457 G1ConcurrentMark* _cm; 1458 G1CMTask* _task; 1459 int _ref_counter_limit; 1460 int _ref_counter; 1461 bool _is_serial; 1462 public: 1463 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1464 _cm(cm), _task(task), _is_serial(is_serial), 1465 _ref_counter_limit(G1RefProcDrainInterval) { 1466 assert(_ref_counter_limit > 0, "sanity"); 1467 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1468 _ref_counter = _ref_counter_limit; 1469 } 1470 1471 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1472 virtual void do_oop( oop* p) { do_oop_work(p); } 1473 1474 template <class T> void do_oop_work(T* p) { 1475 if (!_cm->has_overflown()) { 1476 oop obj = oopDesc::load_decode_heap_oop(p); 1477 _task->deal_with_reference(obj); 1478 _ref_counter--; 1479 1480 if (_ref_counter == 0) { 1481 // We have dealt with _ref_counter_limit references, pushing them 1482 // and objects reachable from them on to the local stack (and 1483 // possibly the global stack). Call G1CMTask::do_marking_step() to 1484 // process these entries. 1485 // 1486 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1487 // there's nothing more to do (i.e. we're done with the entries that 1488 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1489 // above) or we overflow. 1490 // 1491 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1492 // flag while there may still be some work to do. (See the comment at 1493 // the beginning of G1CMTask::do_marking_step() for those conditions - 1494 // one of which is reaching the specified time target.) It is only 1495 // when G1CMTask::do_marking_step() returns without setting the 1496 // has_aborted() flag that the marking step has completed. 1497 do { 1498 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1499 _task->do_marking_step(mark_step_duration_ms, 1500 false /* do_termination */, 1501 _is_serial); 1502 } while (_task->has_aborted() && !_cm->has_overflown()); 1503 _ref_counter = _ref_counter_limit; 1504 } 1505 } 1506 } 1507 }; 1508 1509 // 'Drain' oop closure used by both serial and parallel reference processing. 1510 // Uses the G1CMTask associated with a given worker thread (for serial 1511 // reference processing the G1CMtask for worker 0 is used). Calls the 1512 // do_marking_step routine, with an unbelievably large timeout value, 1513 // to drain the marking data structures of the remaining entries 1514 // added by the 'keep alive' oop closure above. 1515 1516 class G1CMDrainMarkingStackClosure: public VoidClosure { 1517 G1ConcurrentMark* _cm; 1518 G1CMTask* _task; 1519 bool _is_serial; 1520 public: 1521 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1522 _cm(cm), _task(task), _is_serial(is_serial) { 1523 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1524 } 1525 1526 void do_void() { 1527 do { 1528 // We call G1CMTask::do_marking_step() to completely drain the local 1529 // and global marking stacks of entries pushed by the 'keep alive' 1530 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1531 // 1532 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1533 // if there's nothing more to do (i.e. we've completely drained the 1534 // entries that were pushed as a a result of applying the 'keep alive' 1535 // closure to the entries on the discovered ref lists) or we overflow 1536 // the global marking stack. 1537 // 1538 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1539 // flag while there may still be some work to do. (See the comment at 1540 // the beginning of G1CMTask::do_marking_step() for those conditions - 1541 // one of which is reaching the specified time target.) It is only 1542 // when G1CMTask::do_marking_step() returns without setting the 1543 // has_aborted() flag that the marking step has completed. 1544 1545 _task->do_marking_step(1000000000.0 /* something very large */, 1546 true /* do_termination */, 1547 _is_serial); 1548 } while (_task->has_aborted() && !_cm->has_overflown()); 1549 } 1550 }; 1551 1552 // Implementation of AbstractRefProcTaskExecutor for parallel 1553 // reference processing at the end of G1 concurrent marking 1554 1555 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1556 private: 1557 G1CollectedHeap* _g1h; 1558 G1ConcurrentMark* _cm; 1559 WorkGang* _workers; 1560 uint _active_workers; 1561 1562 public: 1563 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1564 G1ConcurrentMark* cm, 1565 WorkGang* workers, 1566 uint n_workers) : 1567 _g1h(g1h), _cm(cm), 1568 _workers(workers), _active_workers(n_workers) { } 1569 1570 // Executes the given task using concurrent marking worker threads. 1571 virtual void execute(ProcessTask& task); 1572 virtual void execute(EnqueueTask& task); 1573 }; 1574 1575 class G1CMRefProcTaskProxy: public AbstractGangTask { 1576 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1577 ProcessTask& _proc_task; 1578 G1CollectedHeap* _g1h; 1579 G1ConcurrentMark* _cm; 1580 1581 public: 1582 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1583 G1CollectedHeap* g1h, 1584 G1ConcurrentMark* cm) : 1585 AbstractGangTask("Process reference objects in parallel"), 1586 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1587 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1588 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1589 } 1590 1591 virtual void work(uint worker_id) { 1592 ResourceMark rm; 1593 HandleMark hm; 1594 G1CMTask* task = _cm->task(worker_id); 1595 G1CMIsAliveClosure g1_is_alive(_g1h); 1596 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1597 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1598 1599 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1600 } 1601 }; 1602 1603 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1604 assert(_workers != NULL, "Need parallel worker threads."); 1605 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1606 1607 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1608 1609 // We need to reset the concurrency level before each 1610 // proxy task execution, so that the termination protocol 1611 // and overflow handling in G1CMTask::do_marking_step() knows 1612 // how many workers to wait for. 1613 _cm->set_concurrency(_active_workers); 1614 _workers->run_task(&proc_task_proxy); 1615 } 1616 1617 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1618 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1619 EnqueueTask& _enq_task; 1620 1621 public: 1622 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1623 AbstractGangTask("Enqueue reference objects in parallel"), 1624 _enq_task(enq_task) { } 1625 1626 virtual void work(uint worker_id) { 1627 _enq_task.work(worker_id); 1628 } 1629 }; 1630 1631 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1632 assert(_workers != NULL, "Need parallel worker threads."); 1633 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1634 1635 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1636 1637 // Not strictly necessary but... 1638 // 1639 // We need to reset the concurrency level before each 1640 // proxy task execution, so that the termination protocol 1641 // and overflow handling in G1CMTask::do_marking_step() knows 1642 // how many workers to wait for. 1643 _cm->set_concurrency(_active_workers); 1644 _workers->run_task(&enq_task_proxy); 1645 } 1646 1647 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1648 if (has_overflown()) { 1649 // Skip processing the discovered references if we have 1650 // overflown the global marking stack. Reference objects 1651 // only get discovered once so it is OK to not 1652 // de-populate the discovered reference lists. We could have, 1653 // but the only benefit would be that, when marking restarts, 1654 // less reference objects are discovered. 1655 return; 1656 } 1657 1658 ResourceMark rm; 1659 HandleMark hm; 1660 1661 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1662 1663 // Is alive closure. 1664 G1CMIsAliveClosure g1_is_alive(g1h); 1665 1666 // Inner scope to exclude the cleaning of the string and symbol 1667 // tables from the displayed time. 1668 { 1669 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1670 1671 ReferenceProcessor* rp = g1h->ref_processor_cm(); 1672 1673 // See the comment in G1CollectedHeap::ref_processing_init() 1674 // about how reference processing currently works in G1. 1675 1676 // Set the soft reference policy 1677 rp->setup_policy(clear_all_soft_refs); 1678 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1679 1680 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1681 // in serial reference processing. Note these closures are also 1682 // used for serially processing (by the the current thread) the 1683 // JNI references during parallel reference processing. 1684 // 1685 // These closures do not need to synchronize with the worker 1686 // threads involved in parallel reference processing as these 1687 // instances are executed serially by the current thread (e.g. 1688 // reference processing is not multi-threaded and is thus 1689 // performed by the current thread instead of a gang worker). 1690 // 1691 // The gang tasks involved in parallel reference processing create 1692 // their own instances of these closures, which do their own 1693 // synchronization among themselves. 1694 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1695 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1696 1697 // We need at least one active thread. If reference processing 1698 // is not multi-threaded we use the current (VMThread) thread, 1699 // otherwise we use the work gang from the G1CollectedHeap and 1700 // we utilize all the worker threads we can. 1701 bool processing_is_mt = rp->processing_is_mt(); 1702 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 1703 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 1704 1705 // Parallel processing task executor. 1706 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 1707 g1h->workers(), active_workers); 1708 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1709 1710 // Set the concurrency level. The phase was already set prior to 1711 // executing the remark task. 1712 set_concurrency(active_workers); 1713 1714 // Set the degree of MT processing here. If the discovery was done MT, 1715 // the number of threads involved during discovery could differ from 1716 // the number of active workers. This is OK as long as the discovered 1717 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1718 rp->set_active_mt_degree(active_workers); 1719 1720 // Process the weak references. 1721 const ReferenceProcessorStats& stats = 1722 rp->process_discovered_references(&g1_is_alive, 1723 &g1_keep_alive, 1724 &g1_drain_mark_stack, 1725 executor, 1726 _gc_timer_cm); 1727 _gc_tracer_cm->report_gc_reference_stats(stats); 1728 1729 // The do_oop work routines of the keep_alive and drain_marking_stack 1730 // oop closures will set the has_overflown flag if we overflow the 1731 // global marking stack. 1732 1733 assert(has_overflown() || _global_mark_stack.is_empty(), 1734 "Mark stack should be empty (unless it has overflown)"); 1735 1736 assert(rp->num_q() == active_workers, "why not"); 1737 1738 rp->enqueue_discovered_references(executor); 1739 1740 rp->verify_no_references_recorded(); 1741 assert(!rp->discovery_enabled(), "Post condition"); 1742 } 1743 1744 if (has_overflown()) { 1745 // We can not trust g1_is_alive if the marking stack overflowed 1746 return; 1747 } 1748 1749 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1750 1751 // Unload Klasses, String, Symbols, Code Cache, etc. 1752 if (ClassUnloadingWithConcurrentMark) { 1753 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1754 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1755 g1h->complete_cleaning(&g1_is_alive, purged_classes); 1756 } else { 1757 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1758 // No need to clean string table and symbol table as they are treated as strong roots when 1759 // class unloading is disabled. 1760 g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1761 1762 } 1763 } 1764 1765 void G1ConcurrentMark::swapMarkBitMaps() { 1766 G1CMBitMapRO* temp = _prevMarkBitMap; 1767 _prevMarkBitMap = (G1CMBitMapRO*)_nextMarkBitMap; 1768 _nextMarkBitMap = (G1CMBitMap*) temp; 1769 } 1770 1771 // Closure for marking entries in SATB buffers. 1772 class G1CMSATBBufferClosure : public SATBBufferClosure { 1773 private: 1774 G1CMTask* _task; 1775 G1CollectedHeap* _g1h; 1776 1777 // This is very similar to G1CMTask::deal_with_reference, but with 1778 // more relaxed requirements for the argument, so this must be more 1779 // circumspect about treating the argument as an object. 1780 void do_entry(void* entry) const { 1781 _task->increment_refs_reached(); 1782 HeapRegion* hr = _g1h->heap_region_containing(entry); 1783 if (entry < hr->next_top_at_mark_start()) { 1784 // Until we get here, we don't know whether entry refers to a valid 1785 // object; it could instead have been a stale reference. 1786 oop obj = static_cast<oop>(entry); 1787 assert(obj->is_oop(true /* ignore mark word */), 1788 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 1789 _task->make_reference_grey(obj); 1790 } 1791 } 1792 1793 public: 1794 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1795 : _task(task), _g1h(g1h) { } 1796 1797 virtual void do_buffer(void** buffer, size_t size) { 1798 for (size_t i = 0; i < size; ++i) { 1799 do_entry(buffer[i]); 1800 } 1801 } 1802 }; 1803 1804 class G1RemarkThreadsClosure : public ThreadClosure { 1805 G1CMSATBBufferClosure _cm_satb_cl; 1806 G1CMOopClosure _cm_cl; 1807 MarkingCodeBlobClosure _code_cl; 1808 int _thread_parity; 1809 1810 public: 1811 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1812 _cm_satb_cl(task, g1h), 1813 _cm_cl(g1h, g1h->concurrent_mark(), task), 1814 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1815 _thread_parity(Threads::thread_claim_parity()) {} 1816 1817 void do_thread(Thread* thread) { 1818 if (thread->is_Java_thread()) { 1819 if (thread->claim_oops_do(true, _thread_parity)) { 1820 JavaThread* jt = (JavaThread*)thread; 1821 1822 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1823 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1824 // * Alive if on the stack of an executing method 1825 // * Weakly reachable otherwise 1826 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1827 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1828 jt->nmethods_do(&_code_cl); 1829 1830 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1831 } 1832 } else if (thread->is_VM_thread()) { 1833 if (thread->claim_oops_do(true, _thread_parity)) { 1834 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1835 } 1836 } 1837 } 1838 }; 1839 1840 class G1CMRemarkTask: public AbstractGangTask { 1841 private: 1842 G1ConcurrentMark* _cm; 1843 public: 1844 void work(uint worker_id) { 1845 // Since all available tasks are actually started, we should 1846 // only proceed if we're supposed to be active. 1847 if (worker_id < _cm->active_tasks()) { 1848 G1CMTask* task = _cm->task(worker_id); 1849 task->record_start_time(); 1850 { 1851 ResourceMark rm; 1852 HandleMark hm; 1853 1854 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1855 Threads::threads_do(&threads_f); 1856 } 1857 1858 do { 1859 task->do_marking_step(1000000000.0 /* something very large */, 1860 true /* do_termination */, 1861 false /* is_serial */); 1862 } while (task->has_aborted() && !_cm->has_overflown()); 1863 // If we overflow, then we do not want to restart. We instead 1864 // want to abort remark and do concurrent marking again. 1865 task->record_end_time(); 1866 } 1867 } 1868 1869 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1870 AbstractGangTask("Par Remark"), _cm(cm) { 1871 _cm->terminator()->reset_for_reuse(active_workers); 1872 } 1873 }; 1874 1875 void G1ConcurrentMark::checkpointRootsFinalWork() { 1876 ResourceMark rm; 1877 HandleMark hm; 1878 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1879 1880 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1881 1882 g1h->ensure_parsability(false); 1883 1884 // this is remark, so we'll use up all active threads 1885 uint active_workers = g1h->workers()->active_workers(); 1886 set_concurrency_and_phase(active_workers, false /* concurrent */); 1887 // Leave _parallel_marking_threads at it's 1888 // value originally calculated in the G1ConcurrentMark 1889 // constructor and pass values of the active workers 1890 // through the gang in the task. 1891 1892 { 1893 StrongRootsScope srs(active_workers); 1894 1895 G1CMRemarkTask remarkTask(this, active_workers); 1896 // We will start all available threads, even if we decide that the 1897 // active_workers will be fewer. The extra ones will just bail out 1898 // immediately. 1899 g1h->workers()->run_task(&remarkTask); 1900 } 1901 1902 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1903 guarantee(has_overflown() || 1904 satb_mq_set.completed_buffers_num() == 0, 1905 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1906 BOOL_TO_STR(has_overflown()), 1907 satb_mq_set.completed_buffers_num()); 1908 1909 print_stats(); 1910 } 1911 1912 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 1913 // Note we are overriding the read-only view of the prev map here, via 1914 // the cast. 1915 ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr); 1916 } 1917 1918 HeapRegion* 1919 G1ConcurrentMark::claim_region(uint worker_id) { 1920 // "checkpoint" the finger 1921 HeapWord* finger = _finger; 1922 1923 // _heap_end will not change underneath our feet; it only changes at 1924 // yield points. 1925 while (finger < _heap_end) { 1926 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1927 1928 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1929 // Make sure that the reads below do not float before loading curr_region. 1930 OrderAccess::loadload(); 1931 // Above heap_region_containing may return NULL as we always scan claim 1932 // until the end of the heap. In this case, just jump to the next region. 1933 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1934 1935 // Is the gap between reading the finger and doing the CAS too long? 1936 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1937 if (res == finger && curr_region != NULL) { 1938 // we succeeded 1939 HeapWord* bottom = curr_region->bottom(); 1940 HeapWord* limit = curr_region->next_top_at_mark_start(); 1941 1942 // notice that _finger == end cannot be guaranteed here since, 1943 // someone else might have moved the finger even further 1944 assert(_finger >= end, "the finger should have moved forward"); 1945 1946 if (limit > bottom) { 1947 return curr_region; 1948 } else { 1949 assert(limit == bottom, 1950 "the region limit should be at bottom"); 1951 // we return NULL and the caller should try calling 1952 // claim_region() again. 1953 return NULL; 1954 } 1955 } else { 1956 assert(_finger > finger, "the finger should have moved forward"); 1957 // read it again 1958 finger = _finger; 1959 } 1960 } 1961 1962 return NULL; 1963 } 1964 1965 #ifndef PRODUCT 1966 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 1967 private: 1968 G1CollectedHeap* _g1h; 1969 const char* _phase; 1970 int _info; 1971 1972 public: 1973 VerifyNoCSetOops(const char* phase, int info = -1) : 1974 _g1h(G1CollectedHeap::heap()), 1975 _phase(phase), 1976 _info(info) 1977 { } 1978 1979 void operator()(G1TaskQueueEntry task_entry) const { 1980 if (task_entry.is_array_slice()) { 1981 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1982 return; 1983 } 1984 guarantee(task_entry.obj()->is_oop(), 1985 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1986 p2i(task_entry.obj()), _phase, _info); 1987 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1988 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1989 p2i(task_entry.obj()), _phase, _info); 1990 } 1991 }; 1992 1993 void G1ConcurrentMark::verify_no_cset_oops() { 1994 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1995 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 1996 return; 1997 } 1998 1999 // Verify entries on the global mark stack 2000 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 2001 2002 // Verify entries on the task queues 2003 for (uint i = 0; i < _max_worker_id; ++i) { 2004 G1CMTaskQueue* queue = _task_queues->queue(i); 2005 queue->iterate(VerifyNoCSetOops("Queue", i)); 2006 } 2007 2008 // Verify the global finger 2009 HeapWord* global_finger = finger(); 2010 if (global_finger != NULL && global_finger < _heap_end) { 2011 // Since we always iterate over all regions, we might get a NULL HeapRegion 2012 // here. 2013 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2014 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2015 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2016 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2017 } 2018 2019 // Verify the task fingers 2020 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2021 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2022 G1CMTask* task = _tasks[i]; 2023 HeapWord* task_finger = task->finger(); 2024 if (task_finger != NULL && task_finger < _heap_end) { 2025 // See above note on the global finger verification. 2026 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2027 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2028 !task_hr->in_collection_set(), 2029 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2030 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2031 } 2032 } 2033 } 2034 #endif // PRODUCT 2035 void G1ConcurrentMark::create_live_data() { 2036 _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap); 2037 } 2038 2039 void G1ConcurrentMark::finalize_live_data() { 2040 _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap); 2041 } 2042 2043 void G1ConcurrentMark::verify_live_data() { 2044 _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap); 2045 } 2046 2047 void G1ConcurrentMark::clear_live_data(WorkGang* workers) { 2048 _g1h->g1_rem_set()->clear_card_live_data(workers); 2049 } 2050 2051 #ifdef ASSERT 2052 void G1ConcurrentMark::verify_live_data_clear() { 2053 _g1h->g1_rem_set()->verify_card_live_data_is_clear(); 2054 } 2055 #endif 2056 2057 void G1ConcurrentMark::print_stats() { 2058 if (!log_is_enabled(Debug, gc, stats)) { 2059 return; 2060 } 2061 log_debug(gc, stats)("---------------------------------------------------------------------"); 2062 for (size_t i = 0; i < _active_tasks; ++i) { 2063 _tasks[i]->print_stats(); 2064 log_debug(gc, stats)("---------------------------------------------------------------------"); 2065 } 2066 } 2067 2068 void G1ConcurrentMark::abort() { 2069 if (!cmThread()->during_cycle() || _has_aborted) { 2070 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2071 return; 2072 } 2073 2074 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2075 // concurrent bitmap clearing. 2076 { 2077 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 2078 clear_bitmap(_nextMarkBitMap, _g1h->workers(), false); 2079 } 2080 // Note we cannot clear the previous marking bitmap here 2081 // since VerifyDuringGC verifies the objects marked during 2082 // a full GC against the previous bitmap. 2083 2084 { 2085 GCTraceTime(Debug, gc)("Clear Live Data"); 2086 clear_live_data(_g1h->workers()); 2087 } 2088 DEBUG_ONLY({ 2089 GCTraceTime(Debug, gc)("Verify Live Data Clear"); 2090 verify_live_data_clear(); 2091 }) 2092 // Empty mark stack 2093 reset_marking_state(); 2094 for (uint i = 0; i < _max_worker_id; ++i) { 2095 _tasks[i]->clear_region_fields(); 2096 } 2097 _first_overflow_barrier_sync.abort(); 2098 _second_overflow_barrier_sync.abort(); 2099 _has_aborted = true; 2100 2101 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2102 satb_mq_set.abandon_partial_marking(); 2103 // This can be called either during or outside marking, we'll read 2104 // the expected_active value from the SATB queue set. 2105 satb_mq_set.set_active_all_threads( 2106 false, /* new active value */ 2107 satb_mq_set.is_active() /* expected_active */); 2108 } 2109 2110 static void print_ms_time_info(const char* prefix, const char* name, 2111 NumberSeq& ns) { 2112 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2113 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2114 if (ns.num() > 0) { 2115 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2116 prefix, ns.sd(), ns.maximum()); 2117 } 2118 } 2119 2120 void G1ConcurrentMark::print_summary_info() { 2121 Log(gc, marking) log; 2122 if (!log.is_trace()) { 2123 return; 2124 } 2125 2126 log.trace(" Concurrent marking:"); 2127 print_ms_time_info(" ", "init marks", _init_times); 2128 print_ms_time_info(" ", "remarks", _remark_times); 2129 { 2130 print_ms_time_info(" ", "final marks", _remark_mark_times); 2131 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2132 2133 } 2134 print_ms_time_info(" ", "cleanups", _cleanup_times); 2135 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2136 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2137 if (G1ScrubRemSets) { 2138 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2139 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2140 } 2141 log.trace(" Total stop_world time = %8.2f s.", 2142 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2143 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2144 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2145 } 2146 2147 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2148 _parallel_workers->print_worker_threads_on(st); 2149 } 2150 2151 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2152 _parallel_workers->threads_do(tc); 2153 } 2154 2155 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2156 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2157 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2158 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2159 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2160 } 2161 2162 // Closure for iteration over bitmaps 2163 class G1CMBitMapClosure : public BitMapClosure { 2164 private: 2165 // the bitmap that is being iterated over 2166 G1CMBitMap* _nextMarkBitMap; 2167 G1ConcurrentMark* _cm; 2168 G1CMTask* _task; 2169 2170 public: 2171 G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) : 2172 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2173 2174 bool do_bit(size_t offset) { 2175 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2176 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2177 assert( addr < _cm->finger(), "invariant"); 2178 assert(addr >= _task->finger(), "invariant"); 2179 2180 // We move that task's local finger along. 2181 _task->move_finger_to(addr); 2182 2183 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 2184 // we only partially drain the local queue and global stack 2185 _task->drain_local_queue(true); 2186 _task->drain_global_stack(true); 2187 2188 // if the has_aborted flag has been raised, we need to bail out of 2189 // the iteration 2190 return !_task->has_aborted(); 2191 } 2192 }; 2193 2194 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2195 ReferenceProcessor* result = g1h->ref_processor_cm(); 2196 assert(result != NULL, "CM reference processor should not be NULL"); 2197 return result; 2198 } 2199 2200 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2201 G1ConcurrentMark* cm, 2202 G1CMTask* task) 2203 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2204 _g1h(g1h), _cm(cm), _task(task) 2205 { } 2206 2207 void G1CMTask::setup_for_region(HeapRegion* hr) { 2208 assert(hr != NULL, 2209 "claim_region() should have filtered out NULL regions"); 2210 _curr_region = hr; 2211 _finger = hr->bottom(); 2212 update_region_limit(); 2213 } 2214 2215 void G1CMTask::update_region_limit() { 2216 HeapRegion* hr = _curr_region; 2217 HeapWord* bottom = hr->bottom(); 2218 HeapWord* limit = hr->next_top_at_mark_start(); 2219 2220 if (limit == bottom) { 2221 // The region was collected underneath our feet. 2222 // We set the finger to bottom to ensure that the bitmap 2223 // iteration that will follow this will not do anything. 2224 // (this is not a condition that holds when we set the region up, 2225 // as the region is not supposed to be empty in the first place) 2226 _finger = bottom; 2227 } else if (limit >= _region_limit) { 2228 assert(limit >= _finger, "peace of mind"); 2229 } else { 2230 assert(limit < _region_limit, "only way to get here"); 2231 // This can happen under some pretty unusual circumstances. An 2232 // evacuation pause empties the region underneath our feet (NTAMS 2233 // at bottom). We then do some allocation in the region (NTAMS 2234 // stays at bottom), followed by the region being used as a GC 2235 // alloc region (NTAMS will move to top() and the objects 2236 // originally below it will be grayed). All objects now marked in 2237 // the region are explicitly grayed, if below the global finger, 2238 // and we do not need in fact to scan anything else. So, we simply 2239 // set _finger to be limit to ensure that the bitmap iteration 2240 // doesn't do anything. 2241 _finger = limit; 2242 } 2243 2244 _region_limit = limit; 2245 } 2246 2247 void G1CMTask::giveup_current_region() { 2248 assert(_curr_region != NULL, "invariant"); 2249 clear_region_fields(); 2250 } 2251 2252 void G1CMTask::clear_region_fields() { 2253 // Values for these three fields that indicate that we're not 2254 // holding on to a region. 2255 _curr_region = NULL; 2256 _finger = NULL; 2257 _region_limit = NULL; 2258 } 2259 2260 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2261 if (cm_oop_closure == NULL) { 2262 assert(_cm_oop_closure != NULL, "invariant"); 2263 } else { 2264 assert(_cm_oop_closure == NULL, "invariant"); 2265 } 2266 _cm_oop_closure = cm_oop_closure; 2267 } 2268 2269 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) { 2270 guarantee(nextMarkBitMap != NULL, "invariant"); 2271 _nextMarkBitMap = nextMarkBitMap; 2272 clear_region_fields(); 2273 2274 _calls = 0; 2275 _elapsed_time_ms = 0.0; 2276 _termination_time_ms = 0.0; 2277 _termination_start_time_ms = 0.0; 2278 } 2279 2280 bool G1CMTask::should_exit_termination() { 2281 regular_clock_call(); 2282 // This is called when we are in the termination protocol. We should 2283 // quit if, for some reason, this task wants to abort or the global 2284 // stack is not empty (this means that we can get work from it). 2285 return !_cm->mark_stack_empty() || has_aborted(); 2286 } 2287 2288 void G1CMTask::reached_limit() { 2289 assert(_words_scanned >= _words_scanned_limit || 2290 _refs_reached >= _refs_reached_limit , 2291 "shouldn't have been called otherwise"); 2292 regular_clock_call(); 2293 } 2294 2295 void G1CMTask::regular_clock_call() { 2296 if (has_aborted()) return; 2297 2298 // First, we need to recalculate the words scanned and refs reached 2299 // limits for the next clock call. 2300 recalculate_limits(); 2301 2302 // During the regular clock call we do the following 2303 2304 // (1) If an overflow has been flagged, then we abort. 2305 if (_cm->has_overflown()) { 2306 set_has_aborted(); 2307 return; 2308 } 2309 2310 // If we are not concurrent (i.e. we're doing remark) we don't need 2311 // to check anything else. The other steps are only needed during 2312 // the concurrent marking phase. 2313 if (!concurrent()) return; 2314 2315 // (2) If marking has been aborted for Full GC, then we also abort. 2316 if (_cm->has_aborted()) { 2317 set_has_aborted(); 2318 return; 2319 } 2320 2321 double curr_time_ms = os::elapsedVTime() * 1000.0; 2322 2323 // (4) We check whether we should yield. If we have to, then we abort. 2324 if (SuspendibleThreadSet::should_yield()) { 2325 // We should yield. To do this we abort the task. The caller is 2326 // responsible for yielding. 2327 set_has_aborted(); 2328 return; 2329 } 2330 2331 // (5) We check whether we've reached our time quota. If we have, 2332 // then we abort. 2333 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2334 if (elapsed_time_ms > _time_target_ms) { 2335 set_has_aborted(); 2336 _has_timed_out = true; 2337 return; 2338 } 2339 2340 // (6) Finally, we check whether there are enough completed STAB 2341 // buffers available for processing. If there are, we abort. 2342 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2343 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2344 // we do need to process SATB buffers, we'll abort and restart 2345 // the marking task to do so 2346 set_has_aborted(); 2347 return; 2348 } 2349 } 2350 2351 void G1CMTask::recalculate_limits() { 2352 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2353 _words_scanned_limit = _real_words_scanned_limit; 2354 2355 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2356 _refs_reached_limit = _real_refs_reached_limit; 2357 } 2358 2359 void G1CMTask::decrease_limits() { 2360 // This is called when we believe that we're going to do an infrequent 2361 // operation which will increase the per byte scanned cost (i.e. move 2362 // entries to/from the global stack). It basically tries to decrease the 2363 // scanning limit so that the clock is called earlier. 2364 2365 _words_scanned_limit = _real_words_scanned_limit - 2366 3 * words_scanned_period / 4; 2367 _refs_reached_limit = _real_refs_reached_limit - 2368 3 * refs_reached_period / 4; 2369 } 2370 2371 void G1CMTask::move_entries_to_global_stack() { 2372 // Local array where we'll store the entries that will be popped 2373 // from the local queue. 2374 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2375 2376 size_t n = 0; 2377 G1TaskQueueEntry task_entry; 2378 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2379 buffer[n] = task_entry; 2380 ++n; 2381 } 2382 if (n < G1CMMarkStack::EntriesPerChunk) { 2383 buffer[n] = G1TaskQueueEntry(); 2384 } 2385 2386 if (n > 0) { 2387 if (!_cm->mark_stack_push(buffer)) { 2388 set_has_aborted(); 2389 } 2390 } 2391 2392 // This operation was quite expensive, so decrease the limits. 2393 decrease_limits(); 2394 } 2395 2396 bool G1CMTask::get_entries_from_global_stack() { 2397 // Local array where we'll store the entries that will be popped 2398 // from the global stack. 2399 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2400 2401 if (!_cm->mark_stack_pop(buffer)) { 2402 return false; 2403 } 2404 2405 // We did actually pop at least one entry. 2406 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2407 G1TaskQueueEntry task_entry = buffer[i]; 2408 if (task_entry.is_null()) { 2409 break; 2410 } 2411 assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2412 bool success = _task_queue->push(task_entry); 2413 // We only call this when the local queue is empty or under a 2414 // given target limit. So, we do not expect this push to fail. 2415 assert(success, "invariant"); 2416 } 2417 2418 // This operation was quite expensive, so decrease the limits 2419 decrease_limits(); 2420 return true; 2421 } 2422 2423 void G1CMTask::drain_local_queue(bool partially) { 2424 if (has_aborted()) { 2425 return; 2426 } 2427 2428 // Decide what the target size is, depending whether we're going to 2429 // drain it partially (so that other tasks can steal if they run out 2430 // of things to do) or totally (at the very end). 2431 size_t target_size; 2432 if (partially) { 2433 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2434 } else { 2435 target_size = 0; 2436 } 2437 2438 if (_task_queue->size() > target_size) { 2439 G1TaskQueueEntry entry; 2440 bool ret = _task_queue->pop_local(entry); 2441 while (ret) { 2442 scan_task_entry(entry); 2443 if (_task_queue->size() <= target_size || has_aborted()) { 2444 ret = false; 2445 } else { 2446 ret = _task_queue->pop_local(entry); 2447 } 2448 } 2449 } 2450 } 2451 2452 void G1CMTask::drain_global_stack(bool partially) { 2453 if (has_aborted()) return; 2454 2455 // We have a policy to drain the local queue before we attempt to 2456 // drain the global stack. 2457 assert(partially || _task_queue->size() == 0, "invariant"); 2458 2459 // Decide what the target size is, depending whether we're going to 2460 // drain it partially (so that other tasks can steal if they run out 2461 // of things to do) or totally (at the very end). 2462 // Notice that when draining the global mark stack partially, due to the racyness 2463 // of the mark stack size update we might in fact drop below the target. But, 2464 // this is not a problem. 2465 // In case of total draining, we simply process until the global mark stack is 2466 // totally empty, disregarding the size counter. 2467 if (partially) { 2468 size_t const target_size = _cm->partial_mark_stack_size_target(); 2469 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2470 if (get_entries_from_global_stack()) { 2471 drain_local_queue(partially); 2472 } 2473 } 2474 } else { 2475 while (!has_aborted() && get_entries_from_global_stack()) { 2476 drain_local_queue(partially); 2477 } 2478 } 2479 } 2480 2481 // SATB Queue has several assumptions on whether to call the par or 2482 // non-par versions of the methods. this is why some of the code is 2483 // replicated. We should really get rid of the single-threaded version 2484 // of the code to simplify things. 2485 void G1CMTask::drain_satb_buffers() { 2486 if (has_aborted()) return; 2487 2488 // We set this so that the regular clock knows that we're in the 2489 // middle of draining buffers and doesn't set the abort flag when it 2490 // notices that SATB buffers are available for draining. It'd be 2491 // very counter productive if it did that. :-) 2492 _draining_satb_buffers = true; 2493 2494 G1CMSATBBufferClosure satb_cl(this, _g1h); 2495 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2496 2497 // This keeps claiming and applying the closure to completed buffers 2498 // until we run out of buffers or we need to abort. 2499 while (!has_aborted() && 2500 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2501 regular_clock_call(); 2502 } 2503 2504 _draining_satb_buffers = false; 2505 2506 assert(has_aborted() || 2507 concurrent() || 2508 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2509 2510 // again, this was a potentially expensive operation, decrease the 2511 // limits to get the regular clock call early 2512 decrease_limits(); 2513 } 2514 2515 void G1CMTask::print_stats() { 2516 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 2517 _worker_id, _calls); 2518 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2519 _elapsed_time_ms, _termination_time_ms); 2520 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 2521 _step_times_ms.num(), _step_times_ms.avg(), 2522 _step_times_ms.sd()); 2523 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 2524 _step_times_ms.maximum(), _step_times_ms.sum()); 2525 } 2526 2527 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2528 return _task_queues->steal(worker_id, hash_seed, task_entry); 2529 } 2530 2531 /***************************************************************************** 2532 2533 The do_marking_step(time_target_ms, ...) method is the building 2534 block of the parallel marking framework. It can be called in parallel 2535 with other invocations of do_marking_step() on different tasks 2536 (but only one per task, obviously) and concurrently with the 2537 mutator threads, or during remark, hence it eliminates the need 2538 for two versions of the code. When called during remark, it will 2539 pick up from where the task left off during the concurrent marking 2540 phase. Interestingly, tasks are also claimable during evacuation 2541 pauses too, since do_marking_step() ensures that it aborts before 2542 it needs to yield. 2543 2544 The data structures that it uses to do marking work are the 2545 following: 2546 2547 (1) Marking Bitmap. If there are gray objects that appear only 2548 on the bitmap (this happens either when dealing with an overflow 2549 or when the initial marking phase has simply marked the roots 2550 and didn't push them on the stack), then tasks claim heap 2551 regions whose bitmap they then scan to find gray objects. A 2552 global finger indicates where the end of the last claimed region 2553 is. A local finger indicates how far into the region a task has 2554 scanned. The two fingers are used to determine how to gray an 2555 object (i.e. whether simply marking it is OK, as it will be 2556 visited by a task in the future, or whether it needs to be also 2557 pushed on a stack). 2558 2559 (2) Local Queue. The local queue of the task which is accessed 2560 reasonably efficiently by the task. Other tasks can steal from 2561 it when they run out of work. Throughout the marking phase, a 2562 task attempts to keep its local queue short but not totally 2563 empty, so that entries are available for stealing by other 2564 tasks. Only when there is no more work, a task will totally 2565 drain its local queue. 2566 2567 (3) Global Mark Stack. This handles local queue overflow. During 2568 marking only sets of entries are moved between it and the local 2569 queues, as access to it requires a mutex and more fine-grain 2570 interaction with it which might cause contention. If it 2571 overflows, then the marking phase should restart and iterate 2572 over the bitmap to identify gray objects. Throughout the marking 2573 phase, tasks attempt to keep the global mark stack at a small 2574 length but not totally empty, so that entries are available for 2575 popping by other tasks. Only when there is no more work, tasks 2576 will totally drain the global mark stack. 2577 2578 (4) SATB Buffer Queue. This is where completed SATB buffers are 2579 made available. Buffers are regularly removed from this queue 2580 and scanned for roots, so that the queue doesn't get too 2581 long. During remark, all completed buffers are processed, as 2582 well as the filled in parts of any uncompleted buffers. 2583 2584 The do_marking_step() method tries to abort when the time target 2585 has been reached. There are a few other cases when the 2586 do_marking_step() method also aborts: 2587 2588 (1) When the marking phase has been aborted (after a Full GC). 2589 2590 (2) When a global overflow (on the global stack) has been 2591 triggered. Before the task aborts, it will actually sync up with 2592 the other tasks to ensure that all the marking data structures 2593 (local queues, stacks, fingers etc.) are re-initialized so that 2594 when do_marking_step() completes, the marking phase can 2595 immediately restart. 2596 2597 (3) When enough completed SATB buffers are available. The 2598 do_marking_step() method only tries to drain SATB buffers right 2599 at the beginning. So, if enough buffers are available, the 2600 marking step aborts and the SATB buffers are processed at 2601 the beginning of the next invocation. 2602 2603 (4) To yield. when we have to yield then we abort and yield 2604 right at the end of do_marking_step(). This saves us from a lot 2605 of hassle as, by yielding we might allow a Full GC. If this 2606 happens then objects will be compacted underneath our feet, the 2607 heap might shrink, etc. We save checking for this by just 2608 aborting and doing the yield right at the end. 2609 2610 From the above it follows that the do_marking_step() method should 2611 be called in a loop (or, otherwise, regularly) until it completes. 2612 2613 If a marking step completes without its has_aborted() flag being 2614 true, it means it has completed the current marking phase (and 2615 also all other marking tasks have done so and have all synced up). 2616 2617 A method called regular_clock_call() is invoked "regularly" (in 2618 sub ms intervals) throughout marking. It is this clock method that 2619 checks all the abort conditions which were mentioned above and 2620 decides when the task should abort. A work-based scheme is used to 2621 trigger this clock method: when the number of object words the 2622 marking phase has scanned or the number of references the marking 2623 phase has visited reach a given limit. Additional invocations to 2624 the method clock have been planted in a few other strategic places 2625 too. The initial reason for the clock method was to avoid calling 2626 vtime too regularly, as it is quite expensive. So, once it was in 2627 place, it was natural to piggy-back all the other conditions on it 2628 too and not constantly check them throughout the code. 2629 2630 If do_termination is true then do_marking_step will enter its 2631 termination protocol. 2632 2633 The value of is_serial must be true when do_marking_step is being 2634 called serially (i.e. by the VMThread) and do_marking_step should 2635 skip any synchronization in the termination and overflow code. 2636 Examples include the serial remark code and the serial reference 2637 processing closures. 2638 2639 The value of is_serial must be false when do_marking_step is 2640 being called by any of the worker threads in a work gang. 2641 Examples include the concurrent marking code (CMMarkingTask), 2642 the MT remark code, and the MT reference processing closures. 2643 2644 *****************************************************************************/ 2645 2646 void G1CMTask::do_marking_step(double time_target_ms, 2647 bool do_termination, 2648 bool is_serial) { 2649 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2650 assert(concurrent() == _cm->concurrent(), "they should be the same"); 2651 2652 G1Policy* g1_policy = _g1h->g1_policy(); 2653 assert(_task_queues != NULL, "invariant"); 2654 assert(_task_queue != NULL, "invariant"); 2655 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 2656 2657 assert(!_claimed, 2658 "only one thread should claim this task at any one time"); 2659 2660 // OK, this doesn't safeguard again all possible scenarios, as it is 2661 // possible for two threads to set the _claimed flag at the same 2662 // time. But it is only for debugging purposes anyway and it will 2663 // catch most problems. 2664 _claimed = true; 2665 2666 _start_time_ms = os::elapsedVTime() * 1000.0; 2667 2668 // If do_stealing is true then do_marking_step will attempt to 2669 // steal work from the other G1CMTasks. It only makes sense to 2670 // enable stealing when the termination protocol is enabled 2671 // and do_marking_step() is not being called serially. 2672 bool do_stealing = do_termination && !is_serial; 2673 2674 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2675 _time_target_ms = time_target_ms - diff_prediction_ms; 2676 2677 // set up the variables that are used in the work-based scheme to 2678 // call the regular clock method 2679 _words_scanned = 0; 2680 _refs_reached = 0; 2681 recalculate_limits(); 2682 2683 // clear all flags 2684 clear_has_aborted(); 2685 _has_timed_out = false; 2686 _draining_satb_buffers = false; 2687 2688 ++_calls; 2689 2690 // Set up the bitmap and oop closures. Anything that uses them is 2691 // eventually called from this method, so it is OK to allocate these 2692 // statically. 2693 G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 2694 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 2695 set_cm_oop_closure(&cm_oop_closure); 2696 2697 if (_cm->has_overflown()) { 2698 // This can happen if the mark stack overflows during a GC pause 2699 // and this task, after a yield point, restarts. We have to abort 2700 // as we need to get into the overflow protocol which happens 2701 // right at the end of this task. 2702 set_has_aborted(); 2703 } 2704 2705 // First drain any available SATB buffers. After this, we will not 2706 // look at SATB buffers before the next invocation of this method. 2707 // If enough completed SATB buffers are queued up, the regular clock 2708 // will abort this task so that it restarts. 2709 drain_satb_buffers(); 2710 // ...then partially drain the local queue and the global stack 2711 drain_local_queue(true); 2712 drain_global_stack(true); 2713 2714 do { 2715 if (!has_aborted() && _curr_region != NULL) { 2716 // This means that we're already holding on to a region. 2717 assert(_finger != NULL, "if region is not NULL, then the finger " 2718 "should not be NULL either"); 2719 2720 // We might have restarted this task after an evacuation pause 2721 // which might have evacuated the region we're holding on to 2722 // underneath our feet. Let's read its limit again to make sure 2723 // that we do not iterate over a region of the heap that 2724 // contains garbage (update_region_limit() will also move 2725 // _finger to the start of the region if it is found empty). 2726 update_region_limit(); 2727 // We will start from _finger not from the start of the region, 2728 // as we might be restarting this task after aborting half-way 2729 // through scanning this region. In this case, _finger points to 2730 // the address where we last found a marked object. If this is a 2731 // fresh region, _finger points to start(). 2732 MemRegion mr = MemRegion(_finger, _region_limit); 2733 2734 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2735 "humongous regions should go around loop once only"); 2736 2737 // Some special cases: 2738 // If the memory region is empty, we can just give up the region. 2739 // If the current region is humongous then we only need to check 2740 // the bitmap for the bit associated with the start of the object, 2741 // scan the object if it's live, and give up the region. 2742 // Otherwise, let's iterate over the bitmap of the part of the region 2743 // that is left. 2744 // If the iteration is successful, give up the region. 2745 if (mr.is_empty()) { 2746 giveup_current_region(); 2747 regular_clock_call(); 2748 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2749 if (_nextMarkBitMap->isMarked(mr.start())) { 2750 // The object is marked - apply the closure 2751 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 2752 bitmap_closure.do_bit(offset); 2753 } 2754 // Even if this task aborted while scanning the humongous object 2755 // we can (and should) give up the current region. 2756 giveup_current_region(); 2757 regular_clock_call(); 2758 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 2759 giveup_current_region(); 2760 regular_clock_call(); 2761 } else { 2762 assert(has_aborted(), "currently the only way to do so"); 2763 // The only way to abort the bitmap iteration is to return 2764 // false from the do_bit() method. However, inside the 2765 // do_bit() method we move the _finger to point to the 2766 // object currently being looked at. So, if we bail out, we 2767 // have definitely set _finger to something non-null. 2768 assert(_finger != NULL, "invariant"); 2769 2770 // Region iteration was actually aborted. So now _finger 2771 // points to the address of the object we last scanned. If we 2772 // leave it there, when we restart this task, we will rescan 2773 // the object. It is easy to avoid this. We move the finger by 2774 // enough to point to the next possible object header (the 2775 // bitmap knows by how much we need to move it as it knows its 2776 // granularity). 2777 assert(_finger < _region_limit, "invariant"); 2778 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 2779 // Check if bitmap iteration was aborted while scanning the last object 2780 if (new_finger >= _region_limit) { 2781 giveup_current_region(); 2782 } else { 2783 move_finger_to(new_finger); 2784 } 2785 } 2786 } 2787 // At this point we have either completed iterating over the 2788 // region we were holding on to, or we have aborted. 2789 2790 // We then partially drain the local queue and the global stack. 2791 // (Do we really need this?) 2792 drain_local_queue(true); 2793 drain_global_stack(true); 2794 2795 // Read the note on the claim_region() method on why it might 2796 // return NULL with potentially more regions available for 2797 // claiming and why we have to check out_of_regions() to determine 2798 // whether we're done or not. 2799 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2800 // We are going to try to claim a new region. We should have 2801 // given up on the previous one. 2802 // Separated the asserts so that we know which one fires. 2803 assert(_curr_region == NULL, "invariant"); 2804 assert(_finger == NULL, "invariant"); 2805 assert(_region_limit == NULL, "invariant"); 2806 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2807 if (claimed_region != NULL) { 2808 // Yes, we managed to claim one 2809 setup_for_region(claimed_region); 2810 assert(_curr_region == claimed_region, "invariant"); 2811 } 2812 // It is important to call the regular clock here. It might take 2813 // a while to claim a region if, for example, we hit a large 2814 // block of empty regions. So we need to call the regular clock 2815 // method once round the loop to make sure it's called 2816 // frequently enough. 2817 regular_clock_call(); 2818 } 2819 2820 if (!has_aborted() && _curr_region == NULL) { 2821 assert(_cm->out_of_regions(), 2822 "at this point we should be out of regions"); 2823 } 2824 } while ( _curr_region != NULL && !has_aborted()); 2825 2826 if (!has_aborted()) { 2827 // We cannot check whether the global stack is empty, since other 2828 // tasks might be pushing objects to it concurrently. 2829 assert(_cm->out_of_regions(), 2830 "at this point we should be out of regions"); 2831 // Try to reduce the number of available SATB buffers so that 2832 // remark has less work to do. 2833 drain_satb_buffers(); 2834 } 2835 2836 // Since we've done everything else, we can now totally drain the 2837 // local queue and global stack. 2838 drain_local_queue(false); 2839 drain_global_stack(false); 2840 2841 // Attempt at work stealing from other task's queues. 2842 if (do_stealing && !has_aborted()) { 2843 // We have not aborted. This means that we have finished all that 2844 // we could. Let's try to do some stealing... 2845 2846 // We cannot check whether the global stack is empty, since other 2847 // tasks might be pushing objects to it concurrently. 2848 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2849 "only way to reach here"); 2850 while (!has_aborted()) { 2851 G1TaskQueueEntry entry; 2852 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2853 scan_task_entry(entry); 2854 2855 // And since we're towards the end, let's totally drain the 2856 // local queue and global stack. 2857 drain_local_queue(false); 2858 drain_global_stack(false); 2859 } else { 2860 break; 2861 } 2862 } 2863 } 2864 2865 // We still haven't aborted. Now, let's try to get into the 2866 // termination protocol. 2867 if (do_termination && !has_aborted()) { 2868 // We cannot check whether the global stack is empty, since other 2869 // tasks might be concurrently pushing objects on it. 2870 // Separated the asserts so that we know which one fires. 2871 assert(_cm->out_of_regions(), "only way to reach here"); 2872 assert(_task_queue->size() == 0, "only way to reach here"); 2873 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2874 2875 // The G1CMTask class also extends the TerminatorTerminator class, 2876 // hence its should_exit_termination() method will also decide 2877 // whether to exit the termination protocol or not. 2878 bool finished = (is_serial || 2879 _cm->terminator()->offer_termination(this)); 2880 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2881 _termination_time_ms += 2882 termination_end_time_ms - _termination_start_time_ms; 2883 2884 if (finished) { 2885 // We're all done. 2886 2887 if (_worker_id == 0) { 2888 // let's allow task 0 to do this 2889 if (concurrent()) { 2890 assert(_cm->concurrent_marking_in_progress(), "invariant"); 2891 // we need to set this to false before the next 2892 // safepoint. This way we ensure that the marking phase 2893 // doesn't observe any more heap expansions. 2894 _cm->clear_concurrent_marking_in_progress(); 2895 } 2896 } 2897 2898 // We can now guarantee that the global stack is empty, since 2899 // all other tasks have finished. We separated the guarantees so 2900 // that, if a condition is false, we can immediately find out 2901 // which one. 2902 guarantee(_cm->out_of_regions(), "only way to reach here"); 2903 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2904 guarantee(_task_queue->size() == 0, "only way to reach here"); 2905 guarantee(!_cm->has_overflown(), "only way to reach here"); 2906 } else { 2907 // Apparently there's more work to do. Let's abort this task. It 2908 // will restart it and we can hopefully find more things to do. 2909 set_has_aborted(); 2910 } 2911 } 2912 2913 // Mainly for debugging purposes to make sure that a pointer to the 2914 // closure which was statically allocated in this frame doesn't 2915 // escape it by accident. 2916 set_cm_oop_closure(NULL); 2917 double end_time_ms = os::elapsedVTime() * 1000.0; 2918 double elapsed_time_ms = end_time_ms - _start_time_ms; 2919 // Update the step history. 2920 _step_times_ms.add(elapsed_time_ms); 2921 2922 if (has_aborted()) { 2923 // The task was aborted for some reason. 2924 if (_has_timed_out) { 2925 double diff_ms = elapsed_time_ms - _time_target_ms; 2926 // Keep statistics of how well we did with respect to hitting 2927 // our target only if we actually timed out (if we aborted for 2928 // other reasons, then the results might get skewed). 2929 _marking_step_diffs_ms.add(diff_ms); 2930 } 2931 2932 if (_cm->has_overflown()) { 2933 // This is the interesting one. We aborted because a global 2934 // overflow was raised. This means we have to restart the 2935 // marking phase and start iterating over regions. However, in 2936 // order to do this we have to make sure that all tasks stop 2937 // what they are doing and re-initialize in a safe manner. We 2938 // will achieve this with the use of two barrier sync points. 2939 2940 if (!is_serial) { 2941 // We only need to enter the sync barrier if being called 2942 // from a parallel context 2943 _cm->enter_first_sync_barrier(_worker_id); 2944 2945 // When we exit this sync barrier we know that all tasks have 2946 // stopped doing marking work. So, it's now safe to 2947 // re-initialize our data structures. At the end of this method, 2948 // task 0 will clear the global data structures. 2949 } 2950 2951 // We clear the local state of this task... 2952 clear_region_fields(); 2953 2954 if (!is_serial) { 2955 // ...and enter the second barrier. 2956 _cm->enter_second_sync_barrier(_worker_id); 2957 } 2958 // At this point, if we're during the concurrent phase of 2959 // marking, everything has been re-initialized and we're 2960 // ready to restart. 2961 } 2962 } 2963 2964 _claimed = false; 2965 } 2966 2967 G1CMTask::G1CMTask(uint worker_id, 2968 G1ConcurrentMark* cm, 2969 G1CMTaskQueue* task_queue, 2970 G1CMTaskQueueSet* task_queues) 2971 : _g1h(G1CollectedHeap::heap()), 2972 _worker_id(worker_id), _cm(cm), 2973 _objArray_processor(this), 2974 _claimed(false), 2975 _nextMarkBitMap(NULL), _hash_seed(17), 2976 _task_queue(task_queue), 2977 _task_queues(task_queues), 2978 _cm_oop_closure(NULL) { 2979 guarantee(task_queue != NULL, "invariant"); 2980 guarantee(task_queues != NULL, "invariant"); 2981 2982 _marking_step_diffs_ms.add(0.5); 2983 } 2984 2985 // These are formatting macros that are used below to ensure 2986 // consistent formatting. The *_H_* versions are used to format the 2987 // header for a particular value and they should be kept consistent 2988 // with the corresponding macro. Also note that most of the macros add 2989 // the necessary white space (as a prefix) which makes them a bit 2990 // easier to compose. 2991 2992 // All the output lines are prefixed with this string to be able to 2993 // identify them easily in a large log file. 2994 #define G1PPRL_LINE_PREFIX "###" 2995 2996 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2997 #ifdef _LP64 2998 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2999 #else // _LP64 3000 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3001 #endif // _LP64 3002 3003 // For per-region info 3004 #define G1PPRL_TYPE_FORMAT " %-4s" 3005 #define G1PPRL_TYPE_H_FORMAT " %4s" 3006 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3007 #define G1PPRL_BYTE_H_FORMAT " %9s" 3008 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3009 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3010 3011 // For summary info 3012 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3013 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3014 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3015 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3016 3017 G1PrintRegionLivenessInfoClosure:: 3018 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3019 : _total_used_bytes(0), _total_capacity_bytes(0), 3020 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3021 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3022 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3023 MemRegion g1_reserved = g1h->g1_reserved(); 3024 double now = os::elapsedTime(); 3025 3026 // Print the header of the output. 3027 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3028 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3029 G1PPRL_SUM_ADDR_FORMAT("reserved") 3030 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3031 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3032 HeapRegion::GrainBytes); 3033 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3034 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3035 G1PPRL_TYPE_H_FORMAT 3036 G1PPRL_ADDR_BASE_H_FORMAT 3037 G1PPRL_BYTE_H_FORMAT 3038 G1PPRL_BYTE_H_FORMAT 3039 G1PPRL_BYTE_H_FORMAT 3040 G1PPRL_DOUBLE_H_FORMAT 3041 G1PPRL_BYTE_H_FORMAT 3042 G1PPRL_BYTE_H_FORMAT, 3043 "type", "address-range", 3044 "used", "prev-live", "next-live", "gc-eff", 3045 "remset", "code-roots"); 3046 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3047 G1PPRL_TYPE_H_FORMAT 3048 G1PPRL_ADDR_BASE_H_FORMAT 3049 G1PPRL_BYTE_H_FORMAT 3050 G1PPRL_BYTE_H_FORMAT 3051 G1PPRL_BYTE_H_FORMAT 3052 G1PPRL_DOUBLE_H_FORMAT 3053 G1PPRL_BYTE_H_FORMAT 3054 G1PPRL_BYTE_H_FORMAT, 3055 "", "", 3056 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3057 "(bytes)", "(bytes)"); 3058 } 3059 3060 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3061 const char* type = r->get_type_str(); 3062 HeapWord* bottom = r->bottom(); 3063 HeapWord* end = r->end(); 3064 size_t capacity_bytes = r->capacity(); 3065 size_t used_bytes = r->used(); 3066 size_t prev_live_bytes = r->live_bytes(); 3067 size_t next_live_bytes = r->next_live_bytes(); 3068 double gc_eff = r->gc_efficiency(); 3069 size_t remset_bytes = r->rem_set()->mem_size(); 3070 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3071 3072 _total_used_bytes += used_bytes; 3073 _total_capacity_bytes += capacity_bytes; 3074 _total_prev_live_bytes += prev_live_bytes; 3075 _total_next_live_bytes += next_live_bytes; 3076 _total_remset_bytes += remset_bytes; 3077 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3078 3079 // Print a line for this particular region. 3080 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3081 G1PPRL_TYPE_FORMAT 3082 G1PPRL_ADDR_BASE_FORMAT 3083 G1PPRL_BYTE_FORMAT 3084 G1PPRL_BYTE_FORMAT 3085 G1PPRL_BYTE_FORMAT 3086 G1PPRL_DOUBLE_FORMAT 3087 G1PPRL_BYTE_FORMAT 3088 G1PPRL_BYTE_FORMAT, 3089 type, p2i(bottom), p2i(end), 3090 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3091 remset_bytes, strong_code_roots_bytes); 3092 3093 return false; 3094 } 3095 3096 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3097 // add static memory usages to remembered set sizes 3098 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3099 // Print the footer of the output. 3100 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3101 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3102 " SUMMARY" 3103 G1PPRL_SUM_MB_FORMAT("capacity") 3104 G1PPRL_SUM_MB_PERC_FORMAT("used") 3105 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3106 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3107 G1PPRL_SUM_MB_FORMAT("remset") 3108 G1PPRL_SUM_MB_FORMAT("code-roots"), 3109 bytes_to_mb(_total_capacity_bytes), 3110 bytes_to_mb(_total_used_bytes), 3111 perc(_total_used_bytes, _total_capacity_bytes), 3112 bytes_to_mb(_total_prev_live_bytes), 3113 perc(_total_prev_live_bytes, _total_capacity_bytes), 3114 bytes_to_mb(_total_next_live_bytes), 3115 perc(_total_next_live_bytes, _total_capacity_bytes), 3116 bytes_to_mb(_total_remset_bytes), 3117 bytes_to_mb(_total_strong_code_roots_bytes)); 3118 }