1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1OopClosures.inline.hpp"
  34 #include "gc/g1/g1Policy.hpp"
  35 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionRemSet.hpp"
  39 #include "gc/g1/heapRegionSet.inline.hpp"
  40 #include "gc/shared/adaptiveSizePolicy.hpp"
  41 #include "gc/shared/gcId.hpp"
  42 #include "gc/shared/gcTimer.hpp"
  43 #include "gc/shared/gcTrace.hpp"
  44 #include "gc/shared/gcTraceTime.inline.hpp"
  45 #include "gc/shared/genOopClosures.inline.hpp"
  46 #include "gc/shared/referencePolicy.hpp"
  47 #include "gc/shared/strongRootsScope.hpp"
  48 #include "gc/shared/suspendibleThreadSet.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "gc/shared/weakProcessor.hpp"
  52 #include "logging/log.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/access.inline.hpp"
  56 #include "oops/oop.inline.hpp"
  57 #include "runtime/atomic.hpp"
  58 #include "runtime/handles.inline.hpp"
  59 #include "runtime/java.hpp"
  60 #include "runtime/prefetch.inline.hpp"
  61 #include "services/memTracker.hpp"
  62 #include "utilities/align.hpp"
  63 #include "utilities/growableArray.hpp"
  64 
  65 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  66   assert(addr < _cm->finger(), "invariant");
  67   assert(addr >= _task->finger(), "invariant");
  68 
  69   // We move that task's local finger along.
  70   _task->move_finger_to(addr);
  71 
  72   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  73   // we only partially drain the local queue and global stack
  74   _task->drain_local_queue(true);
  75   _task->drain_global_stack(true);
  76 
  77   // if the has_aborted flag has been raised, we need to bail out of
  78   // the iteration
  79   return !_task->has_aborted();
  80 }
  81 
  82 G1CMMarkStack::G1CMMarkStack() :
  83   _max_chunk_capacity(0),
  84   _base(NULL),
  85   _chunk_capacity(0) {
  86   set_empty();
  87 }
  88 
  89 bool G1CMMarkStack::resize(size_t new_capacity) {
  90   assert(is_empty(), "Only resize when stack is empty.");
  91   assert(new_capacity <= _max_chunk_capacity,
  92          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  93 
  94   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  95 
  96   if (new_base == NULL) {
  97     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
  98     return false;
  99   }
 100   // Release old mapping.
 101   if (_base != NULL) {
 102     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 103   }
 104 
 105   _base = new_base;
 106   _chunk_capacity = new_capacity;
 107   set_empty();
 108 
 109   return true;
 110 }
 111 
 112 size_t G1CMMarkStack::capacity_alignment() {
 113   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 114 }
 115 
 116 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 117   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 118 
 119   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 120 
 121   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 122   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 123 
 124   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 125             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 126             _max_chunk_capacity,
 127             initial_chunk_capacity);
 128 
 129   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 130                 initial_chunk_capacity, _max_chunk_capacity);
 131 
 132   return resize(initial_chunk_capacity);
 133 }
 134 
 135 void G1CMMarkStack::expand() {
 136   if (_chunk_capacity == _max_chunk_capacity) {
 137     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 138     return;
 139   }
 140   size_t old_capacity = _chunk_capacity;
 141   // Double capacity if possible
 142   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 143 
 144   if (resize(new_capacity)) {
 145     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 146                   old_capacity, new_capacity);
 147   } else {
 148     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 149                     old_capacity, new_capacity);
 150   }
 151 }
 152 
 153 G1CMMarkStack::~G1CMMarkStack() {
 154   if (_base != NULL) {
 155     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 156   }
 157 }
 158 
 159 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 160   elem->next = *list;
 161   *list = elem;
 162 }
 163 
 164 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 165   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 166   add_chunk_to_list(&_chunk_list, elem);
 167   _chunks_in_chunk_list++;
 168 }
 169 
 170 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 171   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 172   add_chunk_to_list(&_free_list, elem);
 173 }
 174 
 175 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 176   TaskQueueEntryChunk* result = *list;
 177   if (result != NULL) {
 178     *list = (*list)->next;
 179   }
 180   return result;
 181 }
 182 
 183 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 184   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 185   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 186   if (result != NULL) {
 187     _chunks_in_chunk_list--;
 188   }
 189   return result;
 190 }
 191 
 192 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 193   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 194   return remove_chunk_from_list(&_free_list);
 195 }
 196 
 197 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 198   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 199   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 200   // wraparound of _hwm.
 201   if (_hwm >= _chunk_capacity) {
 202     return NULL;
 203   }
 204 
 205   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 206   if (cur_idx >= _chunk_capacity) {
 207     return NULL;
 208   }
 209 
 210   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 211   result->next = NULL;
 212   return result;
 213 }
 214 
 215 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 216   // Get a new chunk.
 217   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 218 
 219   if (new_chunk == NULL) {
 220     // Did not get a chunk from the free list. Allocate from backing memory.
 221     new_chunk = allocate_new_chunk();
 222 
 223     if (new_chunk == NULL) {
 224       return false;
 225     }
 226   }
 227 
 228   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 229 
 230   add_chunk_to_chunk_list(new_chunk);
 231 
 232   return true;
 233 }
 234 
 235 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 236   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 237 
 238   if (cur == NULL) {
 239     return false;
 240   }
 241 
 242   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 243 
 244   add_chunk_to_free_list(cur);
 245   return true;
 246 }
 247 
 248 void G1CMMarkStack::set_empty() {
 249   _chunks_in_chunk_list = 0;
 250   _hwm = 0;
 251   _chunk_list = NULL;
 252   _free_list = NULL;
 253 }
 254 
 255 G1CMRootRegions::G1CMRootRegions() :
 256   _survivors(NULL), _cm(NULL), _scan_in_progress(false),
 257   _should_abort(false), _claimed_survivor_index(0) { }
 258 
 259 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 260   _survivors = survivors;
 261   _cm = cm;
 262 }
 263 
 264 void G1CMRootRegions::prepare_for_scan() {
 265   assert(!scan_in_progress(), "pre-condition");
 266 
 267   // Currently, only survivors can be root regions.
 268   _claimed_survivor_index = 0;
 269   _scan_in_progress = _survivors->regions()->is_nonempty();
 270   _should_abort = false;
 271 }
 272 
 273 HeapRegion* G1CMRootRegions::claim_next() {
 274   if (_should_abort) {
 275     // If someone has set the should_abort flag, we return NULL to
 276     // force the caller to bail out of their loop.
 277     return NULL;
 278   }
 279 
 280   // Currently, only survivors can be root regions.
 281   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 282 
 283   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 284   if (claimed_index < survivor_regions->length()) {
 285     return survivor_regions->at(claimed_index);
 286   }
 287   return NULL;
 288 }
 289 
 290 uint G1CMRootRegions::num_root_regions() const {
 291   return (uint)_survivors->regions()->length();
 292 }
 293 
 294 void G1CMRootRegions::notify_scan_done() {
 295   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 296   _scan_in_progress = false;
 297   RootRegionScan_lock->notify_all();
 298 }
 299 
 300 void G1CMRootRegions::cancel_scan() {
 301   notify_scan_done();
 302 }
 303 
 304 void G1CMRootRegions::scan_finished() {
 305   assert(scan_in_progress(), "pre-condition");
 306 
 307   // Currently, only survivors can be root regions.
 308   if (!_should_abort) {
 309     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 310     assert((uint)_claimed_survivor_index >= _survivors->length(),
 311            "we should have claimed all survivors, claimed index = %u, length = %u",
 312            (uint)_claimed_survivor_index, _survivors->length());
 313   }
 314 
 315   notify_scan_done();
 316 }
 317 
 318 bool G1CMRootRegions::wait_until_scan_finished() {
 319   if (!scan_in_progress()) {
 320     return false;
 321   }
 322 
 323   {
 324     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 325     while (scan_in_progress()) {
 326       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 327     }
 328   }
 329   return true;
 330 }
 331 
 332 // Returns the maximum number of workers to be used in a concurrent
 333 // phase based on the number of GC workers being used in a STW
 334 // phase.
 335 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 336   return MAX2((num_gc_workers + 2) / 4, 1U);
 337 }
 338 
 339 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 340                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 341                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 342   // _cm_thread set inside the constructor
 343   _g1h(g1h),
 344   _completed_initialization(false),
 345 
 346   _mark_bitmap_1(),
 347   _mark_bitmap_2(),
 348   _prev_mark_bitmap(&_mark_bitmap_1),
 349   _next_mark_bitmap(&_mark_bitmap_2),
 350 
 351   _heap(_g1h->reserved_region()),
 352 
 353   _root_regions(),
 354 
 355   _global_mark_stack(),
 356 
 357   // _finger set in set_non_marking_state
 358 
 359   _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 360   _max_num_tasks(ParallelGCThreads),
 361   // _num_active_tasks set in set_non_marking_state()
 362   // _tasks set inside the constructor
 363 
 364   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 365   _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
 366 
 367   _first_overflow_barrier_sync(),
 368   _second_overflow_barrier_sync(),
 369 
 370   _has_overflown(false),
 371   _concurrent(false),
 372   _has_aborted(false),
 373   _restart_for_overflow(false),
 374   _concurrent_marking_in_progress(false),
 375   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 376   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 377 
 378   // _verbose_level set below
 379 
 380   _init_times(),
 381   _remark_times(),
 382   _remark_mark_times(),
 383   _remark_weak_ref_times(),
 384   _cleanup_times(),
 385   _total_cleanup_time(0.0),
 386 
 387   _accum_task_vtime(NULL),
 388 
 389   _concurrent_workers(NULL),
 390   _num_concurrent_workers(0),
 391   _max_concurrent_workers(0),
 392 
 393   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 394   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 395 {
 396   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 397   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 398 
 399   // Create & start ConcurrentMark thread.
 400   _cm_thread = new ConcurrentMarkThread(this);
 401   if (_cm_thread->osthread() == NULL) {
 402     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 403   }
 404 
 405   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 406 
 407   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 408   satb_qs.set_buffer_size(G1SATBBufferSize);
 409 
 410   _root_regions.init(_g1h->survivor(), this);
 411 
 412   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 413     // Calculate the number of concurrent worker threads by scaling
 414     // the number of parallel GC threads.
 415     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 416     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 417   }
 418 
 419   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 420   if (ConcGCThreads > ParallelGCThreads) {
 421     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 422                     ConcGCThreads, ParallelGCThreads);
 423     return;
 424   }
 425 
 426   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 427   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 428 
 429   _num_concurrent_workers = ConcGCThreads;
 430   _max_concurrent_workers = _num_concurrent_workers;
 431 
 432   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 433   _concurrent_workers->initialize_workers();
 434 
 435   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 436     size_t mark_stack_size =
 437       MIN2(MarkStackSizeMax,
 438           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 439     // Verify that the calculated value for MarkStackSize is in range.
 440     // It would be nice to use the private utility routine from Arguments.
 441     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 442       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 443                       "must be between 1 and " SIZE_FORMAT,
 444                       mark_stack_size, MarkStackSizeMax);
 445       return;
 446     }
 447     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 448   } else {
 449     // Verify MarkStackSize is in range.
 450     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 451       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 452         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 453           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 454                           "must be between 1 and " SIZE_FORMAT,
 455                           MarkStackSize, MarkStackSizeMax);
 456           return;
 457         }
 458       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 459         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 460           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 461                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 462                           MarkStackSize, MarkStackSizeMax);
 463           return;
 464         }
 465       }
 466     }
 467   }
 468 
 469   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 470     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 471   }
 472 
 473   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 474   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 475 
 476   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 477   _num_active_tasks = _max_num_tasks;
 478 
 479   for (uint i = 0; i < _max_num_tasks; ++i) {
 480     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 481     task_queue->initialize();
 482     _task_queues->register_queue(i, task_queue);
 483 
 484     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 485 
 486     _accum_task_vtime[i] = 0.0;
 487   }
 488 
 489   reset_at_marking_complete();
 490   _completed_initialization = true;
 491 }
 492 
 493 void G1ConcurrentMark::reset() {
 494   _has_aborted = false;
 495 
 496   reset_marking_for_restart();
 497 
 498   // Reset all tasks, since different phases will use different number of active
 499   // threads. So, it's easiest to have all of them ready.
 500   for (uint i = 0; i < _max_num_tasks; ++i) {
 501     _tasks[i]->reset(_next_mark_bitmap);
 502   }
 503 
 504   uint max_regions = _g1h->max_regions();
 505   for (uint i = 0; i < max_regions; i++) {
 506     _top_at_rebuild_starts[i] = NULL;
 507     _region_mark_stats[i].clear();
 508   }
 509 
 510   // we need this to make sure that the flag is on during the evac
 511   // pause with initial mark piggy-backed
 512   set_concurrent_marking_in_progress();
 513 }
 514 
 515 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 516   for (uint j = 0; j < _max_num_tasks; ++j) {
 517     _tasks[j]->clear_mark_stats_cache(region_idx);
 518   }
 519   _top_at_rebuild_starts[region_idx] = NULL;
 520   _region_mark_stats[region_idx].clear();
 521 }
 522 
 523 void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 524   uint const region_idx = r->hrm_index();
 525   if (r->is_humongous()) {
 526     assert(r->is_starts_humongous(), "Got humongous continues region here");
 527     uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 528     for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 529       clear_statistics_in_region(j);
 530     }
 531   } else {
 532     clear_statistics_in_region(region_idx);
 533   } 
 534 }
 535 
 536 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 537   assert_at_safepoint_on_vm_thread();
 538 
 539   G1CMBitMap* const bitmap = _g1h->collector_state()->mark_or_rebuild_in_progress() ? _next_mark_bitmap : _prev_mark_bitmap;
 540   // Need to clear mark bit of the humongous object if already set and during a marking cycle.
 541   if (bitmap->is_marked(r->bottom())) {
 542     bitmap->clear(r->bottom());
 543   }
 544 
 545   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 546     return;
 547   }
 548 
 549   // Clear any statistics about the region gathered so far.
 550   clear_statistics(r);
 551 }
 552 
 553 void G1ConcurrentMark::reset_marking_for_restart() {
 554   _global_mark_stack.set_empty();
 555 
 556   // Expand the marking stack, if we have to and if we can.
 557   if (has_overflown()) {
 558     _global_mark_stack.expand();
 559 
 560     uint max_regions = _g1h->max_regions();
 561     for (uint i = 0; i < max_regions; i++) {
 562       _region_mark_stats[i].clear_during_overflow();
 563     }
 564   }
 565 
 566   clear_has_overflown();
 567   _finger = _heap.start();
 568 
 569   for (uint i = 0; i < _max_num_tasks; ++i) {
 570     G1CMTaskQueue* queue = _task_queues->queue(i);
 571     queue->set_empty();
 572   }
 573 }
 574 
 575 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 576   assert(active_tasks <= _max_num_tasks, "we should not have more");
 577 
 578   _num_active_tasks = active_tasks;
 579   // Need to update the three data structures below according to the
 580   // number of active threads for this phase.
 581   _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
 582   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 583   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 584 }
 585 
 586 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 587   set_concurrency(active_tasks);
 588 
 589   _concurrent = concurrent;
 590   // We propagate this to all tasks, not just the active ones.
 591   for (uint i = 0; i < _max_num_tasks; ++i) {
 592     _tasks[i]->set_concurrent(concurrent);
 593   }
 594 
 595   if (concurrent) {
 596     set_concurrent_marking_in_progress();
 597   } else {
 598     // We currently assume that the concurrent flag has been set to
 599     // false before we start remark. At this point we should also be
 600     // in a STW phase.
 601     assert_at_safepoint_on_vm_thread();
 602     assert(!concurrent_marking_in_progress(), "invariant");
 603     assert(out_of_regions(),
 604            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 605            p2i(_finger), p2i(_heap.end()));
 606   }
 607 }
 608 
 609 void G1ConcurrentMark::reset_at_marking_complete() {
 610   // We set the global marking state to some default values when we're
 611   // not doing marking.
 612   reset_marking_for_restart();
 613   _num_active_tasks = 0;
 614   clear_concurrent_marking_in_progress();
 615 }
 616 
 617 G1ConcurrentMark::~G1ConcurrentMark() {
 618   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 619   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 620   // The G1ConcurrentMark instance is never freed.
 621   ShouldNotReachHere();
 622 }
 623 
 624 class G1ClearBitMapTask : public AbstractGangTask {
 625 public:
 626   static size_t chunk_size() { return M; }
 627 
 628 private:
 629   // Heap region closure used for clearing the given mark bitmap.
 630   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 631   private:
 632     G1CMBitMap* _bitmap;
 633     G1ConcurrentMark* _cm;
 634   public:
 635     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 636     }
 637 
 638     virtual bool do_heap_region(HeapRegion* r) {
 639       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 640 
 641       HeapWord* cur = r->bottom();
 642       HeapWord* const end = r->end();
 643 
 644       while (cur < end) {
 645         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 646         _bitmap->clear_range(mr);
 647 
 648         cur += chunk_size_in_words;
 649 
 650         // Abort iteration if after yielding the marking has been aborted.
 651         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 652           return true;
 653         }
 654         // Repeat the asserts from before the start of the closure. We will do them
 655         // as asserts here to minimize their overhead on the product. However, we
 656         // will have them as guarantees at the beginning / end of the bitmap
 657         // clearing to get some checking in the product.
 658         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 659         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 660       }
 661       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 662 
 663       return false;
 664     }
 665   };
 666 
 667   G1ClearBitmapHRClosure _cl;
 668   HeapRegionClaimer _hr_claimer;
 669   bool _suspendible; // If the task is suspendible, workers must join the STS.
 670 
 671 public:
 672   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 673     AbstractGangTask("G1 Clear Bitmap"),
 674     _cl(bitmap, suspendible ? cm : NULL),
 675     _hr_claimer(n_workers),
 676     _suspendible(suspendible)
 677   { }
 678 
 679   void work(uint worker_id) {
 680     SuspendibleThreadSetJoiner sts_join(_suspendible);
 681     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 682   }
 683 
 684   bool is_complete() {
 685     return _cl.is_complete();
 686   }
 687 };
 688 
 689 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 690   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 691 
 692   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 693   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 694 
 695   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 696 
 697   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 698 
 699   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 700   workers->run_task(&cl, num_workers);
 701   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 702 }
 703 
 704 void G1ConcurrentMark::cleanup_for_next_mark() {
 705   // Make sure that the concurrent mark thread looks to still be in
 706   // the current cycle.
 707   guarantee(cm_thread()->during_cycle(), "invariant");
 708 
 709   // We are finishing up the current cycle by clearing the next
 710   // marking bitmap and getting it ready for the next cycle. During
 711   // this time no other cycle can start. So, let's make sure that this
 712   // is the case.
 713   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 714 
 715   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 716 
 717   // Repeat the asserts from above.
 718   guarantee(cm_thread()->during_cycle(), "invariant");
 719   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 720 }
 721 
 722 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 723   assert_at_safepoint_on_vm_thread();
 724   clear_bitmap(_prev_mark_bitmap, workers, false);
 725 }
 726 
 727 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 728   G1CMBitMap* _bitmap;
 729  public:
 730   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 731   }
 732 
 733   virtual bool do_heap_region(HeapRegion* r) {
 734     // This closure can be called concurrently to the mutator, so we must make sure
 735     // that the result of the getNextMarkedWordAddress() call is compared to the
 736     // value passed to it as limit to detect any found bits.
 737     // end never changes in G1.
 738     HeapWord* end = r->end();
 739     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 740   }
 741 };
 742 
 743 bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
 744   CheckBitmapClearHRClosure cl(_next_mark_bitmap);
 745   _g1h->heap_region_iterate(&cl);
 746   return cl.is_complete();
 747 }
 748 
 749 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 750 public:
 751   bool do_heap_region(HeapRegion* r) {
 752     r->note_start_of_marking();
 753     return false;
 754   }
 755 };
 756 
 757 void G1ConcurrentMark::pre_initial_mark() {
 758   // Initialize marking structures. This has to be done in a STW phase.
 759   reset();
 760 
 761   // For each region note start of marking.
 762   NoteStartOfMarkHRClosure startcl;
 763   _g1h->heap_region_iterate(&startcl);
 764 }
 765 
 766 
 767 void G1ConcurrentMark::post_initial_mark() {
 768   // Start Concurrent Marking weak-reference discovery.
 769   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 770   // enable ("weak") refs discovery
 771   rp->enable_discovery();
 772   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 773 
 774   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 775   // This is the start of  the marking cycle, we're expected all
 776   // threads to have SATB queues with active set to false.
 777   satb_mq_set.set_active_all_threads(true, /* new active value */
 778                                      false /* expected_active */);
 779 
 780   _root_regions.prepare_for_scan();
 781 
 782   // update_g1_committed() will be called at the end of an evac pause
 783   // when marking is on. So, it's also called at the end of the
 784   // initial-mark pause to update the heap end, if the heap expands
 785   // during it. No need to call it here.
 786 }
 787 
 788 /*
 789  * Notice that in the next two methods, we actually leave the STS
 790  * during the barrier sync and join it immediately afterwards. If we
 791  * do not do this, the following deadlock can occur: one thread could
 792  * be in the barrier sync code, waiting for the other thread to also
 793  * sync up, whereas another one could be trying to yield, while also
 794  * waiting for the other threads to sync up too.
 795  *
 796  * Note, however, that this code is also used during remark and in
 797  * this case we should not attempt to leave / enter the STS, otherwise
 798  * we'll either hit an assert (debug / fastdebug) or deadlock
 799  * (product). So we should only leave / enter the STS if we are
 800  * operating concurrently.
 801  *
 802  * Because the thread that does the sync barrier has left the STS, it
 803  * is possible to be suspended for a Full GC or an evacuation pause
 804  * could occur. This is actually safe, since the entering the sync
 805  * barrier is one of the last things do_marking_step() does, and it
 806  * doesn't manipulate any data structures afterwards.
 807  */
 808 
 809 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 810   bool barrier_aborted;
 811   {
 812     SuspendibleThreadSetLeaver sts_leave(concurrent());
 813     barrier_aborted = !_first_overflow_barrier_sync.enter();
 814   }
 815 
 816   // at this point everyone should have synced up and not be doing any
 817   // more work
 818 
 819   if (barrier_aborted) {
 820     // If the barrier aborted we ignore the overflow condition and
 821     // just abort the whole marking phase as quickly as possible.
 822     return;
 823   }
 824 }
 825 
 826 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 827   SuspendibleThreadSetLeaver sts_leave(concurrent());
 828   _second_overflow_barrier_sync.enter();
 829 
 830   // at this point everything should be re-initialized and ready to go
 831 }
 832 
 833 class G1CMConcurrentMarkingTask : public AbstractGangTask {
 834   G1ConcurrentMark*     _cm;
 835 public:
 836   void work(uint worker_id) {
 837     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 838     ResourceMark rm;
 839 
 840     double start_vtime = os::elapsedVTime();
 841 
 842     {
 843       SuspendibleThreadSetJoiner sts_join;
 844 
 845       assert(worker_id < _cm->active_tasks(), "invariant");
 846 
 847       G1CMTask* task = _cm->task(worker_id);
 848       task->record_start_time();
 849       if (!_cm->has_aborted()) {
 850         do {
 851           task->do_marking_step(G1ConcMarkStepDurationMillis,
 852                                 true  /* do_termination */,
 853                                 false /* is_serial*/);
 854 
 855           _cm->do_yield_check();
 856         } while (!_cm->has_aborted() && task->has_aborted());
 857       }
 858       task->record_end_time();
 859       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 860     }
 861 
 862     double end_vtime = os::elapsedVTime();
 863     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 864   }
 865 
 866   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 867       AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 868 
 869   ~G1CMConcurrentMarkingTask() { }
 870 };
 871 
 872 uint G1ConcurrentMark::calc_active_marking_workers() {
 873   uint result = 0;
 874   if (!UseDynamicNumberOfGCThreads ||
 875       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 876        !ForceDynamicNumberOfGCThreads)) {
 877     result = _max_concurrent_workers;
 878   } else {
 879     result =
 880       AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
 881                                                       1, /* Minimum workers */
 882                                                       _num_concurrent_workers,
 883                                                       Threads::number_of_non_daemon_threads());
 884     // Don't scale the result down by scale_concurrent_workers() because
 885     // that scaling has already gone into "_max_concurrent_workers".
 886   }
 887   assert(result > 0 && result <= _max_concurrent_workers,
 888          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 889          _max_concurrent_workers, result);
 890   return result;
 891 }
 892 
 893 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) {
 894   // Currently, only survivors can be root regions.
 895   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 896   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 897 
 898   const uintx interval = PrefetchScanIntervalInBytes;
 899   HeapWord* curr = hr->bottom();
 900   const HeapWord* end = hr->top();
 901   while (curr < end) {
 902     Prefetch::read(curr, interval);
 903     oop obj = oop(curr);
 904     int size = obj->oop_iterate_size(&cl);
 905     assert(size == obj->size(), "sanity");
 906     curr += size;
 907   }
 908 }
 909 
 910 class G1CMRootRegionScanTask : public AbstractGangTask {
 911   G1ConcurrentMark* _cm;
 912 public:
 913   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 914     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 915 
 916   void work(uint worker_id) {
 917     assert(Thread::current()->is_ConcurrentGC_thread(),
 918            "this should only be done by a conc GC thread");
 919 
 920     G1CMRootRegions* root_regions = _cm->root_regions();
 921     HeapRegion* hr = root_regions->claim_next();
 922     while (hr != NULL) {
 923       _cm->scan_root_region(hr, worker_id);
 924       hr = root_regions->claim_next();
 925     }
 926   }
 927 };
 928 
 929 void G1ConcurrentMark::scan_root_regions() {
 930   // scan_in_progress() will have been set to true only if there was
 931   // at least one root region to scan. So, if it's false, we
 932   // should not attempt to do any further work.
 933   if (root_regions()->scan_in_progress()) {
 934     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 935 
 936     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 937                                    // We distribute work on a per-region basis, so starting
 938                                    // more threads than that is useless.
 939                                    root_regions()->num_root_regions());
 940     assert(_num_concurrent_workers <= _max_concurrent_workers,
 941            "Maximum number of marking threads exceeded");
 942 
 943     G1CMRootRegionScanTask task(this);
 944     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 945                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 946     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 947 
 948     // It's possible that has_aborted() is true here without actually
 949     // aborting the survivor scan earlier. This is OK as it's
 950     // mainly used for sanity checking.
 951     root_regions()->scan_finished();
 952   }
 953 }
 954 
 955 void G1ConcurrentMark::concurrent_cycle_start() {
 956   _gc_timer_cm->register_gc_start();
 957 
 958   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 959 
 960   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 961 }
 962 
 963 void G1ConcurrentMark::concurrent_cycle_end() {
 964   _g1h->collector_state()->set_clearing_next_bitmap(false);
 965 
 966   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 967 
 968   if (has_aborted()) {
 969     log_info(gc, marking)("Concurrent Mark Abort");
 970     _gc_tracer_cm->report_concurrent_mode_failure();
 971   }
 972 
 973   _gc_timer_cm->register_gc_end();
 974 
 975   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 976 }
 977 
 978 void G1ConcurrentMark::mark_from_roots() {
 979   _restart_for_overflow = false;
 980 
 981   _num_concurrent_workers = calc_active_marking_workers();
 982 
 983   uint active_workers = MAX2(1U, _num_concurrent_workers);
 984 
 985   // Setting active workers is not guaranteed since fewer
 986   // worker threads may currently exist and more may not be
 987   // available.
 988   active_workers = _concurrent_workers->update_active_workers(active_workers);
 989   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 990 
 991   // Parallel task terminator is set in "set_concurrency_and_phase()"
 992   set_concurrency_and_phase(active_workers, true /* concurrent */);
 993 
 994   G1CMConcurrentMarkingTask marking_task(this);
 995   _concurrent_workers->run_task(&marking_task);
 996   print_stats();
 997 }
 998 
 999 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
1000   G1HeapVerifier* verifier = _g1h->verifier();
1001 
1002   verifier->verify_region_sets_optional();
1003 
1004   if (VerifyDuringGC) {
1005     GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm);
1006 
1007     size_t const BufLen = 512;
1008     char buffer[BufLen];
1009 
1010     os::snprintf(buffer, BufLen, "During GC (%s)", caller);
1011     verifier->verify(type, vo, buffer);
1012   }
1013 
1014   verifier->check_bitmaps(caller);
1015 }
1016 
1017 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
1018   G1CollectedHeap* _g1h;
1019   G1ConcurrentMark* _cm;
1020 
1021   uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
1022 
1023   void update_remset_before_rebuild(HeapRegion * hr) {
1024     G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker();
1025 
1026     size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize;
1027     bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
1028     if (selected_for_rebuild) {
1029       _num_regions_selected_for_rebuild++;
1030     }
1031     _cm->update_top_at_rebuild_start(hr);
1032   }
1033 
1034 public:
1035   G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) :
1036     _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0) { }
1037 
1038   virtual bool do_heap_region(HeapRegion* r) {
1039     update_remset_before_rebuild(r);
1040     return false;
1041   }
1042 
1043   uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1044 };
1045 
1046 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1047   G1CollectedHeap* _g1h;
1048 public:
1049   G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1050 
1051   virtual bool do_heap_region(HeapRegion* r) {
1052     _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r);
1053     return false;
1054   }
1055 };
1056 
1057 void G1ConcurrentMark::remark() {
1058   assert_at_safepoint_on_vm_thread();
1059 
1060   // If a full collection has happened, we should not continue. However we might
1061   // have ended up here as the Remark VM operation has been scheduled already.
1062   if (has_aborted()) {
1063     return;
1064   }
1065 
1066   G1Policy* g1p = _g1h->g1_policy();
1067   g1p->record_concurrent_mark_remark_start();
1068 
1069   double start = os::elapsedTime();
1070 
1071   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1072 
1073   {
1074     GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1075     finalize_marking();
1076   }
1077 
1078   double mark_work_end = os::elapsedTime();
1079 
1080   bool const mark_finished = !has_overflown();
1081   if (mark_finished) {
1082     weak_refs_work(false /* clear_all_soft_refs */);
1083 
1084     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1085     // We're done with marking.
1086     // This is the end of the marking cycle, we're expected all
1087     // threads to have SATB queues with active set to true.
1088     satb_mq_set.set_active_all_threads(false, /* new active value */
1089                                        true /* expected_active */);
1090 
1091     {
1092       GCTraceTime(Debug, gc, phases)("Flush Task Caches");
1093       flush_all_task_caches();
1094     }
1095 
1096     {
1097       GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild");
1098       G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this);
1099       _g1h->heap_region_iterate(&cl);
1100       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1101                                       _g1h->num_regions(), cl.num_selected_for_rebuild());
1102     }
1103 
1104     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UseNextMarking, "Remark after");
1105 
1106     assert(!restart_for_overflow(), "sanity");
1107     // Completely reset the marking state since marking completed
1108     reset_at_marking_complete();
1109   } else {
1110     // We overflowed.  Restart concurrent marking.
1111     _restart_for_overflow = true;
1112 
1113     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1114 
1115     // Clear the marking state because we will be restarting
1116     // marking due to overflowing the global mark stack.
1117     reset_marking_for_restart();
1118   }
1119 
1120   {
1121     GCTraceTime(Debug, gc, phases)("Report Object Count");
1122     report_object_count();
1123   }
1124 
1125   // Statistics
1126   double now = os::elapsedTime();
1127   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1128   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1129   _remark_times.add((now - start) * 1000.0);
1130 
1131   g1p->record_concurrent_mark_remark_end();
1132 }
1133 
1134 class G1CleanupTask : public AbstractGangTask {
1135   // Per-region work during the Cleanup pause.
1136   class G1CleanupRegionsClosure : public HeapRegionClosure {
1137     G1CollectedHeap* _g1h;
1138     size_t _freed_bytes;
1139     FreeRegionList* _local_cleanup_list;
1140     uint _old_regions_removed;
1141     uint _humongous_regions_removed;
1142     HRRSCleanupTask* _hrrs_cleanup_task;
1143 
1144   public:
1145     G1CleanupRegionsClosure(G1CollectedHeap* g1,
1146                             FreeRegionList* local_cleanup_list,
1147                             HRRSCleanupTask* hrrs_cleanup_task) :
1148       _g1h(g1),
1149       _freed_bytes(0),
1150       _local_cleanup_list(local_cleanup_list),
1151       _old_regions_removed(0),
1152       _humongous_regions_removed(0),
1153       _hrrs_cleanup_task(hrrs_cleanup_task) { }
1154 
1155     size_t freed_bytes() { return _freed_bytes; }
1156     const uint old_regions_removed() { return _old_regions_removed; }
1157     const uint humongous_regions_removed() { return _humongous_regions_removed; }
1158 
1159     bool do_heap_region(HeapRegion *hr) {
1160       hr->note_end_of_marking();
1161 
1162       if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1163         _freed_bytes += hr->used();
1164         hr->set_containing_set(NULL);
1165         if (hr->is_humongous()) {
1166           _humongous_regions_removed++;
1167           _g1h->free_humongous_region(hr, _local_cleanup_list);
1168         } else {
1169           _old_regions_removed++;
1170           _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */);
1171         }
1172         hr->clear_cardtable();
1173         _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1174         log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1175       } else {
1176         hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1177       }
1178 
1179       return false;
1180     }
1181   };
1182 
1183   G1CollectedHeap* _g1h;
1184   FreeRegionList* _cleanup_list;
1185   HeapRegionClaimer _hrclaimer;
1186 
1187 public:
1188   G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1189     AbstractGangTask("G1 Cleanup"),
1190     _g1h(g1h),
1191     _cleanup_list(cleanup_list),
1192     _hrclaimer(n_workers) {
1193 
1194     HeapRegionRemSet::reset_for_cleanup_tasks();
1195   }
1196 
1197   void work(uint worker_id) {
1198     FreeRegionList local_cleanup_list("Local Cleanup List");
1199     HRRSCleanupTask hrrs_cleanup_task;
1200     G1CleanupRegionsClosure cl(_g1h,
1201                                &local_cleanup_list,
1202                                &hrrs_cleanup_task);
1203     _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1204     assert(cl.is_complete(), "Shouldn't have aborted!");
1205 
1206     // Now update the old/humongous region sets
1207     _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1208     {
1209       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1210       _g1h->decrement_summary_bytes(cl.freed_bytes());
1211 
1212       _cleanup_list->add_ordered(&local_cleanup_list);
1213       assert(local_cleanup_list.is_empty(), "post-condition");
1214 
1215       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1216     }
1217   }
1218 };
1219 
1220 void G1ConcurrentMark::reclaim_empty_regions() {
1221   WorkGang* workers = _g1h->workers();
1222   FreeRegionList empty_regions_list("Empty Regions After Mark List");
1223 
1224   G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers());
1225   workers->run_task(&cl);
1226 
1227   if (!empty_regions_list.is_empty()) {
1228     log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1229     // Now print the empty regions list.
1230     G1HRPrinter* hrp = _g1h->hr_printer();
1231     if (hrp->is_active()) {
1232       FreeRegionListIterator iter(&empty_regions_list);
1233       while (iter.more_available()) {
1234         HeapRegion* hr = iter.get_next();
1235         hrp->cleanup(hr);
1236       }
1237     }
1238     // And actually make them available.
1239     _g1h->prepend_to_freelist(&empty_regions_list);
1240   }
1241 }
1242 
1243 void G1ConcurrentMark::cleanup() {
1244   assert_at_safepoint_on_vm_thread();
1245 
1246   // If a full collection has happened, we shouldn't do this.
1247   if (has_aborted()) {
1248     return;
1249   }
1250 
1251   G1Policy* g1p = _g1h->g1_policy();
1252   g1p->record_concurrent_mark_cleanup_start();
1253 
1254   double start = os::elapsedTime();
1255 
1256   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UseNextMarking, "Cleanup before");
1257 
1258   {
1259     GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild");
1260     G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1261     _g1h->heap_region_iterate(&cl);
1262   }
1263 
1264   if (log_is_enabled(Trace, gc, liveness)) {
1265     G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1266     _g1h->heap_region_iterate(&cl);
1267   }
1268 
1269   // Install newly created mark bitmap as "prev".
1270   swap_mark_bitmaps();
1271   {
1272     GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions");
1273     reclaim_empty_regions();
1274   }
1275 
1276   // Cleanup will have freed any regions completely full of garbage.
1277   // Update the soft reference policy with the new heap occupancy.
1278   Universe::update_heap_info_at_gc();
1279 
1280   // Clean out dead classes and update Metaspace sizes.
1281   if (ClassUnloadingWithConcurrentMark) {
1282     GCTraceTime(Debug, gc, phases)("Purge Metaspace");
1283     ClassLoaderDataGraph::purge();
1284   }
1285   MetaspaceGC::compute_new_size();
1286 
1287   // We reclaimed old regions so we should calculate the sizes to make
1288   // sure we update the old gen/space data.
1289   _g1h->g1mm()->update_sizes();
1290 
1291   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1292 
1293   // We need to make this be a "collection" so any collection pause that
1294   // races with it goes around and waits for Cleanup to finish.
1295   _g1h->increment_total_collections();
1296 
1297   // Local statistics
1298   double recent_cleanup_time = (os::elapsedTime() - start);
1299   _total_cleanup_time += recent_cleanup_time;
1300   _cleanup_times.add(recent_cleanup_time);
1301 
1302   {
1303     GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup");
1304     _g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1305   }
1306 }
1307 
1308 // Supporting Object and Oop closures for reference discovery
1309 // and processing in during marking
1310 
1311 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1312   HeapWord* addr = (HeapWord*)obj;
1313   return addr != NULL &&
1314          (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj));
1315 }
1316 
1317 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1318 // Uses the G1CMTask associated with a worker thread (for serial reference
1319 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1320 // trace referent objects.
1321 //
1322 // Using the G1CMTask and embedded local queues avoids having the worker
1323 // threads operating on the global mark stack. This reduces the risk
1324 // of overflowing the stack - which we would rather avoid at this late
1325 // state. Also using the tasks' local queues removes the potential
1326 // of the workers interfering with each other that could occur if
1327 // operating on the global stack.
1328 
1329 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1330   G1ConcurrentMark* _cm;
1331   G1CMTask*         _task;
1332   int               _ref_counter_limit;
1333   int               _ref_counter;
1334   bool              _is_serial;
1335 public:
1336   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1337     _cm(cm), _task(task), _is_serial(is_serial),
1338     _ref_counter_limit(G1RefProcDrainInterval) {
1339     assert(_ref_counter_limit > 0, "sanity");
1340     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1341     _ref_counter = _ref_counter_limit;
1342   }
1343 
1344   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1345   virtual void do_oop(      oop* p) { do_oop_work(p); }
1346 
1347   template <class T> void do_oop_work(T* p) {
1348     if (!_cm->has_overflown()) {
1349       _task->deal_with_reference(p);
1350       _ref_counter--;
1351 
1352       if (_ref_counter == 0) {
1353         // We have dealt with _ref_counter_limit references, pushing them
1354         // and objects reachable from them on to the local stack (and
1355         // possibly the global stack). Call G1CMTask::do_marking_step() to
1356         // process these entries.
1357         //
1358         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1359         // there's nothing more to do (i.e. we're done with the entries that
1360         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1361         // above) or we overflow.
1362         //
1363         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1364         // flag while there may still be some work to do. (See the comment at
1365         // the beginning of G1CMTask::do_marking_step() for those conditions -
1366         // one of which is reaching the specified time target.) It is only
1367         // when G1CMTask::do_marking_step() returns without setting the
1368         // has_aborted() flag that the marking step has completed.
1369         do {
1370           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1371           _task->do_marking_step(mark_step_duration_ms,
1372                                  false      /* do_termination */,
1373                                  _is_serial);
1374         } while (_task->has_aborted() && !_cm->has_overflown());
1375         _ref_counter = _ref_counter_limit;
1376       }
1377     }
1378   }
1379 };
1380 
1381 // 'Drain' oop closure used by both serial and parallel reference processing.
1382 // Uses the G1CMTask associated with a given worker thread (for serial
1383 // reference processing the G1CMtask for worker 0 is used). Calls the
1384 // do_marking_step routine, with an unbelievably large timeout value,
1385 // to drain the marking data structures of the remaining entries
1386 // added by the 'keep alive' oop closure above.
1387 
1388 class G1CMDrainMarkingStackClosure : public VoidClosure {
1389   G1ConcurrentMark* _cm;
1390   G1CMTask*         _task;
1391   bool              _is_serial;
1392  public:
1393   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1394     _cm(cm), _task(task), _is_serial(is_serial) {
1395     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1396   }
1397 
1398   void do_void() {
1399     do {
1400       // We call G1CMTask::do_marking_step() to completely drain the local
1401       // and global marking stacks of entries pushed by the 'keep alive'
1402       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1403       //
1404       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1405       // if there's nothing more to do (i.e. we've completely drained the
1406       // entries that were pushed as a a result of applying the 'keep alive'
1407       // closure to the entries on the discovered ref lists) or we overflow
1408       // the global marking stack.
1409       //
1410       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1411       // flag while there may still be some work to do. (See the comment at
1412       // the beginning of G1CMTask::do_marking_step() for those conditions -
1413       // one of which is reaching the specified time target.) It is only
1414       // when G1CMTask::do_marking_step() returns without setting the
1415       // has_aborted() flag that the marking step has completed.
1416 
1417       _task->do_marking_step(1000000000.0 /* something very large */,
1418                              true         /* do_termination */,
1419                              _is_serial);
1420     } while (_task->has_aborted() && !_cm->has_overflown());
1421   }
1422 };
1423 
1424 // Implementation of AbstractRefProcTaskExecutor for parallel
1425 // reference processing at the end of G1 concurrent marking
1426 
1427 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1428 private:
1429   G1CollectedHeap*  _g1h;
1430   G1ConcurrentMark* _cm;
1431   WorkGang*         _workers;
1432   uint              _active_workers;
1433 
1434 public:
1435   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1436                           G1ConcurrentMark* cm,
1437                           WorkGang* workers,
1438                           uint n_workers) :
1439     _g1h(g1h), _cm(cm),
1440     _workers(workers), _active_workers(n_workers) { }
1441 
1442   // Executes the given task using concurrent marking worker threads.
1443   virtual void execute(ProcessTask& task);
1444   virtual void execute(EnqueueTask& task);
1445 };
1446 
1447 class G1CMRefProcTaskProxy : public AbstractGangTask {
1448   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1449   ProcessTask&      _proc_task;
1450   G1CollectedHeap*  _g1h;
1451   G1ConcurrentMark* _cm;
1452 
1453 public:
1454   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1455                        G1CollectedHeap* g1h,
1456                        G1ConcurrentMark* cm) :
1457     AbstractGangTask("Process reference objects in parallel"),
1458     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1459     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1460     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1461   }
1462 
1463   virtual void work(uint worker_id) {
1464     ResourceMark rm;
1465     HandleMark hm;
1466     G1CMTask* task = _cm->task(worker_id);
1467     G1CMIsAliveClosure g1_is_alive(_g1h);
1468     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1469     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1470 
1471     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1472   }
1473 };
1474 
1475 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1476   assert(_workers != NULL, "Need parallel worker threads.");
1477   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1478 
1479   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1480 
1481   // We need to reset the concurrency level before each
1482   // proxy task execution, so that the termination protocol
1483   // and overflow handling in G1CMTask::do_marking_step() knows
1484   // how many workers to wait for.
1485   _cm->set_concurrency(_active_workers);
1486   _workers->run_task(&proc_task_proxy);
1487 }
1488 
1489 class G1CMRefEnqueueTaskProxy : public AbstractGangTask {
1490   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1491   EnqueueTask& _enq_task;
1492 
1493 public:
1494   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1495     AbstractGangTask("Enqueue reference objects in parallel"),
1496     _enq_task(enq_task) { }
1497 
1498   virtual void work(uint worker_id) {
1499     _enq_task.work(worker_id);
1500   }
1501 };
1502 
1503 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1504   assert(_workers != NULL, "Need parallel worker threads.");
1505   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1506 
1507   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1508 
1509   // Not strictly necessary but...
1510   //
1511   // We need to reset the concurrency level before each
1512   // proxy task execution, so that the termination protocol
1513   // and overflow handling in G1CMTask::do_marking_step() knows
1514   // how many workers to wait for.
1515   _cm->set_concurrency(_active_workers);
1516   _workers->run_task(&enq_task_proxy);
1517 }
1518 
1519 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1520   ResourceMark rm;
1521   HandleMark   hm;
1522 
1523   // Is alive closure.
1524   G1CMIsAliveClosure g1_is_alive(_g1h);
1525 
1526   // Inner scope to exclude the cleaning of the string and symbol
1527   // tables from the displayed time.
1528   {
1529     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1530 
1531     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1532 
1533     // See the comment in G1CollectedHeap::ref_processing_init()
1534     // about how reference processing currently works in G1.
1535 
1536     // Set the soft reference policy
1537     rp->setup_policy(clear_all_soft_refs);
1538     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1539 
1540     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1541     // in serial reference processing. Note these closures are also
1542     // used for serially processing (by the the current thread) the
1543     // JNI references during parallel reference processing.
1544     //
1545     // These closures do not need to synchronize with the worker
1546     // threads involved in parallel reference processing as these
1547     // instances are executed serially by the current thread (e.g.
1548     // reference processing is not multi-threaded and is thus
1549     // performed by the current thread instead of a gang worker).
1550     //
1551     // The gang tasks involved in parallel reference processing create
1552     // their own instances of these closures, which do their own
1553     // synchronization among themselves.
1554     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1555     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1556 
1557     // We need at least one active thread. If reference processing
1558     // is not multi-threaded we use the current (VMThread) thread,
1559     // otherwise we use the work gang from the G1CollectedHeap and
1560     // we utilize all the worker threads we can.
1561     bool processing_is_mt = rp->processing_is_mt();
1562     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1563     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1564 
1565     // Parallel processing task executor.
1566     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1567                                               _g1h->workers(), active_workers);
1568     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1569 
1570     // Set the concurrency level. The phase was already set prior to
1571     // executing the remark task.
1572     set_concurrency(active_workers);
1573 
1574     // Set the degree of MT processing here.  If the discovery was done MT,
1575     // the number of threads involved during discovery could differ from
1576     // the number of active workers.  This is OK as long as the discovered
1577     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1578     rp->set_active_mt_degree(active_workers);
1579 
1580     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1581 
1582     // Process the weak references.
1583     const ReferenceProcessorStats& stats =
1584         rp->process_discovered_references(&g1_is_alive,
1585                                           &g1_keep_alive,
1586                                           &g1_drain_mark_stack,
1587                                           executor,
1588                                           &pt);
1589     _gc_tracer_cm->report_gc_reference_stats(stats);
1590     pt.print_all_references();
1591 
1592     // The do_oop work routines of the keep_alive and drain_marking_stack
1593     // oop closures will set the has_overflown flag if we overflow the
1594     // global marking stack.
1595 
1596     assert(has_overflown() || _global_mark_stack.is_empty(),
1597            "Mark stack should be empty (unless it has overflown)");
1598 
1599     assert(rp->num_q() == active_workers, "why not");
1600 
1601     rp->enqueue_discovered_references(executor, &pt);
1602 
1603     rp->verify_no_references_recorded();
1604 
1605     pt.print_enqueue_phase();
1606 
1607     assert(!rp->discovery_enabled(), "Post condition");
1608   }
1609 
1610   assert(has_overflown() || _global_mark_stack.is_empty(),
1611          "Mark stack should be empty (unless it has overflown)");
1612 
1613   {
1614     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1615     WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
1616   }
1617 
1618   if (has_overflown()) {
1619     // We can not trust g1_is_alive if the marking stack overflowed
1620     return;
1621   }
1622 
1623   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1624 
1625   // Unload Klasses, String, Symbols, Code Cache, etc.
1626   if (ClassUnloadingWithConcurrentMark) {
1627     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1628     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1629     _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1630   } else {
1631     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1632     // No need to clean string table and symbol table as they are treated as strong roots when
1633     // class unloading is disabled.
1634     _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1635   }
1636 }
1637 
1638 void G1ConcurrentMark::report_object_count() {
1639   G1CMIsAliveClosure is_alive(_g1h);
1640   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1641 }
1642 
1643 void G1ConcurrentMark::swap_mark_bitmaps() {
1644   G1CMBitMap* temp = _prev_mark_bitmap;
1645   _prev_mark_bitmap = _next_mark_bitmap;
1646   _next_mark_bitmap = temp;
1647   _g1h->collector_state()->set_clearing_next_bitmap(true);
1648 }
1649 
1650 // Closure for marking entries in SATB buffers.
1651 class G1CMSATBBufferClosure : public SATBBufferClosure {
1652 private:
1653   G1CMTask* _task;
1654   G1CollectedHeap* _g1h;
1655 
1656   // This is very similar to G1CMTask::deal_with_reference, but with
1657   // more relaxed requirements for the argument, so this must be more
1658   // circumspect about treating the argument as an object.
1659   void do_entry(void* entry) const {
1660     _task->increment_refs_reached();
1661     oop const obj = static_cast<oop>(entry);
1662     _task->make_reference_grey(obj);
1663   }
1664 
1665 public:
1666   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1667     : _task(task), _g1h(g1h) { }
1668 
1669   virtual void do_buffer(void** buffer, size_t size) {
1670     for (size_t i = 0; i < size; ++i) {
1671       do_entry(buffer[i]);
1672     }
1673   }
1674 };
1675 
1676 class G1RemarkThreadsClosure : public ThreadClosure {
1677   G1CMSATBBufferClosure _cm_satb_cl;
1678   G1CMOopClosure _cm_cl;
1679   MarkingCodeBlobClosure _code_cl;
1680   int _thread_parity;
1681 
1682  public:
1683   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1684     _cm_satb_cl(task, g1h),
1685     _cm_cl(g1h, task),
1686     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1687     _thread_parity(Threads::thread_claim_parity()) {}
1688 
1689   void do_thread(Thread* thread) {
1690     if (thread->is_Java_thread()) {
1691       if (thread->claim_oops_do(true, _thread_parity)) {
1692         JavaThread* jt = (JavaThread*)thread;
1693 
1694         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1695         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1696         // * Alive if on the stack of an executing method
1697         // * Weakly reachable otherwise
1698         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1699         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1700         jt->nmethods_do(&_code_cl);
1701 
1702         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1703       }
1704     } else if (thread->is_VM_thread()) {
1705       if (thread->claim_oops_do(true, _thread_parity)) {
1706         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1707       }
1708     }
1709   }
1710 };
1711 
1712 class G1CMRemarkTask : public AbstractGangTask {
1713   G1ConcurrentMark* _cm;
1714 public:
1715   void work(uint worker_id) {
1716     G1CMTask* task = _cm->task(worker_id);
1717     task->record_start_time();
1718     {
1719       ResourceMark rm;
1720       HandleMark hm;
1721 
1722       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1723       Threads::threads_do(&threads_f);
1724     }
1725 
1726     do {
1727       task->do_marking_step(1000000000.0 /* something very large */,
1728                             true         /* do_termination       */,
1729                             false        /* is_serial            */);
1730     } while (task->has_aborted() && !_cm->has_overflown());
1731     // If we overflow, then we do not want to restart. We instead
1732     // want to abort remark and do concurrent marking again.
1733     task->record_end_time();
1734   }
1735 
1736   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1737     AbstractGangTask("Par Remark"), _cm(cm) {
1738     _cm->terminator()->reset_for_reuse(active_workers);
1739   }
1740 };
1741 
1742 void G1ConcurrentMark::finalize_marking() {
1743   ResourceMark rm;
1744   HandleMark   hm;
1745 
1746   _g1h->ensure_parsability(false);
1747 
1748   // this is remark, so we'll use up all active threads
1749   uint active_workers = _g1h->workers()->active_workers();
1750   set_concurrency_and_phase(active_workers, false /* concurrent */);
1751   // Leave _parallel_marking_threads at it's
1752   // value originally calculated in the G1ConcurrentMark
1753   // constructor and pass values of the active workers
1754   // through the gang in the task.
1755 
1756   {
1757     StrongRootsScope srs(active_workers);
1758 
1759     G1CMRemarkTask remarkTask(this, active_workers);
1760     // We will start all available threads, even if we decide that the
1761     // active_workers will be fewer. The extra ones will just bail out
1762     // immediately.
1763     _g1h->workers()->run_task(&remarkTask);
1764   }
1765 
1766   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1767   guarantee(has_overflown() ||
1768             satb_mq_set.completed_buffers_num() == 0,
1769             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1770             BOOL_TO_STR(has_overflown()),
1771             satb_mq_set.completed_buffers_num());
1772 
1773   print_stats();
1774 }
1775 
1776 void G1ConcurrentMark::flush_all_task_caches() {
1777   size_t hits = 0;
1778   size_t misses = 0;
1779   for (uint i = 0; i < _max_num_tasks; i++) {
1780     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1781     hits += stats.first;
1782     misses += stats.second;
1783   }
1784   size_t sum = hits + misses;
1785   log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1786                        hits, misses, percent_of(hits, sum));
1787 }
1788 
1789 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1790   _prev_mark_bitmap->clear_range(mr);
1791 }
1792 
1793 HeapRegion*
1794 G1ConcurrentMark::claim_region(uint worker_id) {
1795   // "checkpoint" the finger
1796   HeapWord* finger = _finger;
1797 
1798   while (finger < _heap.end()) {
1799     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1800 
1801     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1802     // Make sure that the reads below do not float before loading curr_region.
1803     OrderAccess::loadload();
1804     // Above heap_region_containing may return NULL as we always scan claim
1805     // until the end of the heap. In this case, just jump to the next region.
1806     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1807 
1808     // Is the gap between reading the finger and doing the CAS too long?
1809     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1810     if (res == finger && curr_region != NULL) {
1811       // we succeeded
1812       HeapWord*   bottom        = curr_region->bottom();
1813       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1814 
1815       // notice that _finger == end cannot be guaranteed here since,
1816       // someone else might have moved the finger even further
1817       assert(_finger >= end, "the finger should have moved forward");
1818 
1819       if (limit > bottom) {
1820         return curr_region;
1821       } else {
1822         assert(limit == bottom,
1823                "the region limit should be at bottom");
1824         // we return NULL and the caller should try calling
1825         // claim_region() again.
1826         return NULL;
1827       }
1828     } else {
1829       assert(_finger > finger, "the finger should have moved forward");
1830       // read it again
1831       finger = _finger;
1832     }
1833   }
1834 
1835   return NULL;
1836 }
1837 
1838 #ifndef PRODUCT
1839 class VerifyNoCSetOops {
1840   G1CollectedHeap* _g1h;
1841   const char* _phase;
1842   int _info;
1843 
1844 public:
1845   VerifyNoCSetOops(const char* phase, int info = -1) :
1846     _g1h(G1CollectedHeap::heap()),
1847     _phase(phase),
1848     _info(info)
1849   { }
1850 
1851   void operator()(G1TaskQueueEntry task_entry) const {
1852     if (task_entry.is_array_slice()) {
1853       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1854       return;
1855     }
1856     guarantee(oopDesc::is_oop(task_entry.obj()),
1857               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1858               p2i(task_entry.obj()), _phase, _info);
1859     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1860               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1861               p2i(task_entry.obj()), _phase, _info);
1862   }
1863 };
1864 
1865 void G1ConcurrentMark::verify_no_cset_oops() {
1866   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1867   if (!G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress()) {
1868     return;
1869   }
1870 
1871   // Verify entries on the global mark stack
1872   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1873 
1874   // Verify entries on the task queues
1875   for (uint i = 0; i < _max_num_tasks; ++i) {
1876     G1CMTaskQueue* queue = _task_queues->queue(i);
1877     queue->iterate(VerifyNoCSetOops("Queue", i));
1878   }
1879 
1880   // Verify the global finger
1881   HeapWord* global_finger = finger();
1882   if (global_finger != NULL && global_finger < _heap.end()) {
1883     // Since we always iterate over all regions, we might get a NULL HeapRegion
1884     // here.
1885     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1886     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1887               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1888               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1889   }
1890 
1891   // Verify the task fingers
1892   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1893   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1894     G1CMTask* task = _tasks[i];
1895     HeapWord* task_finger = task->finger();
1896     if (task_finger != NULL && task_finger < _heap.end()) {
1897       // See above note on the global finger verification.
1898       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1899       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1900                 !task_hr->in_collection_set(),
1901                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1902                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1903     }
1904   }
1905 }
1906 #endif // PRODUCT
1907 
1908 void G1ConcurrentMark::rebuild_rem_set_concurrently() {
1909   _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
1910 }
1911 
1912 void G1ConcurrentMark::print_stats() {
1913   if (!log_is_enabled(Debug, gc, stats)) {
1914     return;
1915   }
1916   log_debug(gc, stats)("---------------------------------------------------------------------");
1917   for (size_t i = 0; i < _num_active_tasks; ++i) {
1918     _tasks[i]->print_stats();
1919     log_debug(gc, stats)("---------------------------------------------------------------------");
1920   }
1921 }
1922 
1923 void G1ConcurrentMark::concurrent_cycle_abort() {
1924   if (!cm_thread()->during_cycle() || _has_aborted) {
1925     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1926     return;
1927   }
1928 
1929   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1930   // concurrent bitmap clearing.
1931   {
1932     GCTraceTime(Debug, gc)("Clear Next Bitmap");
1933     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1934   }
1935   // Note we cannot clear the previous marking bitmap here
1936   // since VerifyDuringGC verifies the objects marked during
1937   // a full GC against the previous bitmap.
1938 
1939   // Empty mark stack
1940   reset_marking_for_restart();
1941   for (uint i = 0; i < _max_num_tasks; ++i) {
1942     _tasks[i]->clear_region_fields();
1943   }
1944   _first_overflow_barrier_sync.abort();
1945   _second_overflow_barrier_sync.abort();
1946   _has_aborted = true;
1947 
1948   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1949   satb_mq_set.abandon_partial_marking();
1950   // This can be called either during or outside marking, we'll read
1951   // the expected_active value from the SATB queue set.
1952   satb_mq_set.set_active_all_threads(
1953                                  false, /* new active value */
1954                                  satb_mq_set.is_active() /* expected_active */);
1955 }
1956 
1957 static void print_ms_time_info(const char* prefix, const char* name,
1958                                NumberSeq& ns) {
1959   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1960                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1961   if (ns.num() > 0) {
1962     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1963                            prefix, ns.sd(), ns.maximum());
1964   }
1965 }
1966 
1967 void G1ConcurrentMark::print_summary_info() {
1968   Log(gc, marking) log;
1969   if (!log.is_trace()) {
1970     return;
1971   }
1972 
1973   log.trace(" Concurrent marking:");
1974   print_ms_time_info("  ", "init marks", _init_times);
1975   print_ms_time_info("  ", "remarks", _remark_times);
1976   {
1977     print_ms_time_info("     ", "final marks", _remark_mark_times);
1978     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
1979 
1980   }
1981   print_ms_time_info("  ", "cleanups", _cleanup_times);
1982   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
1983             _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
1984   log.trace("  Total stop_world time = %8.2f s.",
1985             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
1986   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
1987             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
1988 }
1989 
1990 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
1991   _concurrent_workers->print_worker_threads_on(st);
1992 }
1993 
1994 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
1995   _concurrent_workers->threads_do(tc);
1996 }
1997 
1998 void G1ConcurrentMark::print_on_error(outputStream* st) const {
1999   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2000                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
2001   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
2002   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
2003 }
2004 
2005 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2006   ReferenceProcessor* result = g1h->ref_processor_cm();
2007   assert(result != NULL, "CM reference processor should not be NULL");
2008   return result;
2009 }
2010 
2011 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2012                                G1CMTask* task)
2013   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2014     _g1h(g1h), _task(task)
2015 { }
2016 
2017 void G1CMTask::setup_for_region(HeapRegion* hr) {
2018   assert(hr != NULL,
2019         "claim_region() should have filtered out NULL regions");
2020   _curr_region  = hr;
2021   _finger       = hr->bottom();
2022   update_region_limit();
2023 }
2024 
2025 void G1CMTask::update_region_limit() {
2026   HeapRegion* hr            = _curr_region;
2027   HeapWord* bottom          = hr->bottom();
2028   HeapWord* limit           = hr->next_top_at_mark_start();
2029 
2030   if (limit == bottom) {
2031     // The region was collected underneath our feet.
2032     // We set the finger to bottom to ensure that the bitmap
2033     // iteration that will follow this will not do anything.
2034     // (this is not a condition that holds when we set the region up,
2035     // as the region is not supposed to be empty in the first place)
2036     _finger = bottom;
2037   } else if (limit >= _region_limit) {
2038     assert(limit >= _finger, "peace of mind");
2039   } else {
2040     assert(limit < _region_limit, "only way to get here");
2041     // This can happen under some pretty unusual circumstances.  An
2042     // evacuation pause empties the region underneath our feet (NTAMS
2043     // at bottom). We then do some allocation in the region (NTAMS
2044     // stays at bottom), followed by the region being used as a GC
2045     // alloc region (NTAMS will move to top() and the objects
2046     // originally below it will be grayed). All objects now marked in
2047     // the region are explicitly grayed, if below the global finger,
2048     // and we do not need in fact to scan anything else. So, we simply
2049     // set _finger to be limit to ensure that the bitmap iteration
2050     // doesn't do anything.
2051     _finger = limit;
2052   }
2053 
2054   _region_limit = limit;
2055 }
2056 
2057 void G1CMTask::giveup_current_region() {
2058   assert(_curr_region != NULL, "invariant");
2059   clear_region_fields();
2060 }
2061 
2062 void G1CMTask::clear_region_fields() {
2063   // Values for these three fields that indicate that we're not
2064   // holding on to a region.
2065   _curr_region   = NULL;
2066   _finger        = NULL;
2067   _region_limit  = NULL;
2068 }
2069 
2070 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2071   if (cm_oop_closure == NULL) {
2072     assert(_cm_oop_closure != NULL, "invariant");
2073   } else {
2074     assert(_cm_oop_closure == NULL, "invariant");
2075   }
2076   _cm_oop_closure = cm_oop_closure;
2077 }
2078 
2079 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2080   guarantee(next_mark_bitmap != NULL, "invariant");
2081   _next_mark_bitmap              = next_mark_bitmap;
2082   clear_region_fields();
2083 
2084   _calls                         = 0;
2085   _elapsed_time_ms               = 0.0;
2086   _termination_time_ms           = 0.0;
2087   _termination_start_time_ms     = 0.0;
2088 
2089   _mark_stats_cache.reset();
2090 }
2091 
2092 bool G1CMTask::should_exit_termination() {
2093   regular_clock_call();
2094   // This is called when we are in the termination protocol. We should
2095   // quit if, for some reason, this task wants to abort or the global
2096   // stack is not empty (this means that we can get work from it).
2097   return !_cm->mark_stack_empty() || has_aborted();
2098 }
2099 
2100 void G1CMTask::reached_limit() {
2101   assert(_words_scanned >= _words_scanned_limit ||
2102          _refs_reached >= _refs_reached_limit ,
2103          "shouldn't have been called otherwise");
2104   regular_clock_call();
2105 }
2106 
2107 void G1CMTask::regular_clock_call() {
2108   if (has_aborted()) {
2109     return;
2110   }
2111 
2112   // First, we need to recalculate the words scanned and refs reached
2113   // limits for the next clock call.
2114   recalculate_limits();
2115 
2116   // During the regular clock call we do the following
2117 
2118   // (1) If an overflow has been flagged, then we abort.
2119   if (_cm->has_overflown()) {
2120     set_has_aborted();
2121     return;
2122   }
2123 
2124   // If we are not concurrent (i.e. we're doing remark) we don't need
2125   // to check anything else. The other steps are only needed during
2126   // the concurrent marking phase.
2127   if (!_concurrent) {
2128     return;
2129   }
2130 
2131   // (2) If marking has been aborted for Full GC, then we also abort.
2132   if (_cm->has_aborted()) {
2133     set_has_aborted();
2134     return;
2135   }
2136 
2137   double curr_time_ms = os::elapsedVTime() * 1000.0;
2138 
2139   // (4) We check whether we should yield. If we have to, then we abort.
2140   if (SuspendibleThreadSet::should_yield()) {
2141     // We should yield. To do this we abort the task. The caller is
2142     // responsible for yielding.
2143     set_has_aborted();
2144     return;
2145   }
2146 
2147   // (5) We check whether we've reached our time quota. If we have,
2148   // then we abort.
2149   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2150   if (elapsed_time_ms > _time_target_ms) {
2151     set_has_aborted();
2152     _has_timed_out = true;
2153     return;
2154   }
2155 
2156   // (6) Finally, we check whether there are enough completed STAB
2157   // buffers available for processing. If there are, we abort.
2158   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2159   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2160     // we do need to process SATB buffers, we'll abort and restart
2161     // the marking task to do so
2162     set_has_aborted();
2163     return;
2164   }
2165 }
2166 
2167 void G1CMTask::recalculate_limits() {
2168   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2169   _words_scanned_limit      = _real_words_scanned_limit;
2170 
2171   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2172   _refs_reached_limit       = _real_refs_reached_limit;
2173 }
2174 
2175 void G1CMTask::decrease_limits() {
2176   // This is called when we believe that we're going to do an infrequent
2177   // operation which will increase the per byte scanned cost (i.e. move
2178   // entries to/from the global stack). It basically tries to decrease the
2179   // scanning limit so that the clock is called earlier.
2180 
2181   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2182   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2183 }
2184 
2185 void G1CMTask::move_entries_to_global_stack() {
2186   // Local array where we'll store the entries that will be popped
2187   // from the local queue.
2188   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2189 
2190   size_t n = 0;
2191   G1TaskQueueEntry task_entry;
2192   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2193     buffer[n] = task_entry;
2194     ++n;
2195   }
2196   if (n < G1CMMarkStack::EntriesPerChunk) {
2197     buffer[n] = G1TaskQueueEntry();
2198   }
2199 
2200   if (n > 0) {
2201     if (!_cm->mark_stack_push(buffer)) {
2202       set_has_aborted();
2203     }
2204   }
2205 
2206   // This operation was quite expensive, so decrease the limits.
2207   decrease_limits();
2208 }
2209 
2210 bool G1CMTask::get_entries_from_global_stack() {
2211   // Local array where we'll store the entries that will be popped
2212   // from the global stack.
2213   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2214 
2215   if (!_cm->mark_stack_pop(buffer)) {
2216     return false;
2217   }
2218 
2219   // We did actually pop at least one entry.
2220   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2221     G1TaskQueueEntry task_entry = buffer[i];
2222     if (task_entry.is_null()) {
2223       break;
2224     }
2225     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2226     bool success = _task_queue->push(task_entry);
2227     // We only call this when the local queue is empty or under a
2228     // given target limit. So, we do not expect this push to fail.
2229     assert(success, "invariant");
2230   }
2231 
2232   // This operation was quite expensive, so decrease the limits
2233   decrease_limits();
2234   return true;
2235 }
2236 
2237 void G1CMTask::drain_local_queue(bool partially) {
2238   if (has_aborted()) {
2239     return;
2240   }
2241 
2242   // Decide what the target size is, depending whether we're going to
2243   // drain it partially (so that other tasks can steal if they run out
2244   // of things to do) or totally (at the very end).
2245   size_t target_size;
2246   if (partially) {
2247     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2248   } else {
2249     target_size = 0;
2250   }
2251 
2252   if (_task_queue->size() > target_size) {
2253     G1TaskQueueEntry entry;
2254     bool ret = _task_queue->pop_local(entry);
2255     while (ret) {
2256       scan_task_entry(entry);
2257       if (_task_queue->size() <= target_size || has_aborted()) {
2258         ret = false;
2259       } else {
2260         ret = _task_queue->pop_local(entry);
2261       }
2262     }
2263   }
2264 }
2265 
2266 void G1CMTask::drain_global_stack(bool partially) {
2267   if (has_aborted()) {
2268     return;
2269   }
2270 
2271   // We have a policy to drain the local queue before we attempt to
2272   // drain the global stack.
2273   assert(partially || _task_queue->size() == 0, "invariant");
2274 
2275   // Decide what the target size is, depending whether we're going to
2276   // drain it partially (so that other tasks can steal if they run out
2277   // of things to do) or totally (at the very end).
2278   // Notice that when draining the global mark stack partially, due to the racyness
2279   // of the mark stack size update we might in fact drop below the target. But,
2280   // this is not a problem.
2281   // In case of total draining, we simply process until the global mark stack is
2282   // totally empty, disregarding the size counter.
2283   if (partially) {
2284     size_t const target_size = _cm->partial_mark_stack_size_target();
2285     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2286       if (get_entries_from_global_stack()) {
2287         drain_local_queue(partially);
2288       }
2289     }
2290   } else {
2291     while (!has_aborted() && get_entries_from_global_stack()) {
2292       drain_local_queue(partially);
2293     }
2294   }
2295 }
2296 
2297 // SATB Queue has several assumptions on whether to call the par or
2298 // non-par versions of the methods. this is why some of the code is
2299 // replicated. We should really get rid of the single-threaded version
2300 // of the code to simplify things.
2301 void G1CMTask::drain_satb_buffers() {
2302   if (has_aborted()) {
2303     return;
2304   }
2305 
2306   // We set this so that the regular clock knows that we're in the
2307   // middle of draining buffers and doesn't set the abort flag when it
2308   // notices that SATB buffers are available for draining. It'd be
2309   // very counter productive if it did that. :-)
2310   _draining_satb_buffers = true;
2311 
2312   G1CMSATBBufferClosure satb_cl(this, _g1h);
2313   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2314 
2315   // This keeps claiming and applying the closure to completed buffers
2316   // until we run out of buffers or we need to abort.
2317   while (!has_aborted() &&
2318          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2319     regular_clock_call();
2320   }
2321 
2322   _draining_satb_buffers = false;
2323 
2324   assert(has_aborted() ||
2325          _concurrent ||
2326          satb_mq_set.completed_buffers_num() == 0, "invariant");
2327 
2328   // again, this was a potentially expensive operation, decrease the
2329   // limits to get the regular clock call early
2330   decrease_limits();
2331 }
2332 
2333 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2334   _mark_stats_cache.reset(region_idx);
2335 }
2336 
2337 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2338   return _mark_stats_cache.evict_all();
2339 }
2340 
2341 void G1CMTask::print_stats() {
2342   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2343   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2344                        _elapsed_time_ms, _termination_time_ms);
2345   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2346                        _step_times_ms.num(),
2347                        _step_times_ms.avg(),
2348                        _step_times_ms.sd(),
2349                        _step_times_ms.maximum(),
2350                        _step_times_ms.sum());
2351   size_t const hits = _mark_stats_cache.hits();
2352   size_t const misses = _mark_stats_cache.misses();
2353   log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2354                        hits, misses, percent_of(hits, hits + misses));
2355 }
2356 
2357 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2358   return _task_queues->steal(worker_id, hash_seed, task_entry);
2359 }
2360 
2361 /*****************************************************************************
2362 
2363     The do_marking_step(time_target_ms, ...) method is the building
2364     block of the parallel marking framework. It can be called in parallel
2365     with other invocations of do_marking_step() on different tasks
2366     (but only one per task, obviously) and concurrently with the
2367     mutator threads, or during remark, hence it eliminates the need
2368     for two versions of the code. When called during remark, it will
2369     pick up from where the task left off during the concurrent marking
2370     phase. Interestingly, tasks are also claimable during evacuation
2371     pauses too, since do_marking_step() ensures that it aborts before
2372     it needs to yield.
2373 
2374     The data structures that it uses to do marking work are the
2375     following:
2376 
2377       (1) Marking Bitmap. If there are gray objects that appear only
2378       on the bitmap (this happens either when dealing with an overflow
2379       or when the initial marking phase has simply marked the roots
2380       and didn't push them on the stack), then tasks claim heap
2381       regions whose bitmap they then scan to find gray objects. A
2382       global finger indicates where the end of the last claimed region
2383       is. A local finger indicates how far into the region a task has
2384       scanned. The two fingers are used to determine how to gray an
2385       object (i.e. whether simply marking it is OK, as it will be
2386       visited by a task in the future, or whether it needs to be also
2387       pushed on a stack).
2388 
2389       (2) Local Queue. The local queue of the task which is accessed
2390       reasonably efficiently by the task. Other tasks can steal from
2391       it when they run out of work. Throughout the marking phase, a
2392       task attempts to keep its local queue short but not totally
2393       empty, so that entries are available for stealing by other
2394       tasks. Only when there is no more work, a task will totally
2395       drain its local queue.
2396 
2397       (3) Global Mark Stack. This handles local queue overflow. During
2398       marking only sets of entries are moved between it and the local
2399       queues, as access to it requires a mutex and more fine-grain
2400       interaction with it which might cause contention. If it
2401       overflows, then the marking phase should restart and iterate
2402       over the bitmap to identify gray objects. Throughout the marking
2403       phase, tasks attempt to keep the global mark stack at a small
2404       length but not totally empty, so that entries are available for
2405       popping by other tasks. Only when there is no more work, tasks
2406       will totally drain the global mark stack.
2407 
2408       (4) SATB Buffer Queue. This is where completed SATB buffers are
2409       made available. Buffers are regularly removed from this queue
2410       and scanned for roots, so that the queue doesn't get too
2411       long. During remark, all completed buffers are processed, as
2412       well as the filled in parts of any uncompleted buffers.
2413 
2414     The do_marking_step() method tries to abort when the time target
2415     has been reached. There are a few other cases when the
2416     do_marking_step() method also aborts:
2417 
2418       (1) When the marking phase has been aborted (after a Full GC).
2419 
2420       (2) When a global overflow (on the global stack) has been
2421       triggered. Before the task aborts, it will actually sync up with
2422       the other tasks to ensure that all the marking data structures
2423       (local queues, stacks, fingers etc.)  are re-initialized so that
2424       when do_marking_step() completes, the marking phase can
2425       immediately restart.
2426 
2427       (3) When enough completed SATB buffers are available. The
2428       do_marking_step() method only tries to drain SATB buffers right
2429       at the beginning. So, if enough buffers are available, the
2430       marking step aborts and the SATB buffers are processed at
2431       the beginning of the next invocation.
2432 
2433       (4) To yield. when we have to yield then we abort and yield
2434       right at the end of do_marking_step(). This saves us from a lot
2435       of hassle as, by yielding we might allow a Full GC. If this
2436       happens then objects will be compacted underneath our feet, the
2437       heap might shrink, etc. We save checking for this by just
2438       aborting and doing the yield right at the end.
2439 
2440     From the above it follows that the do_marking_step() method should
2441     be called in a loop (or, otherwise, regularly) until it completes.
2442 
2443     If a marking step completes without its has_aborted() flag being
2444     true, it means it has completed the current marking phase (and
2445     also all other marking tasks have done so and have all synced up).
2446 
2447     A method called regular_clock_call() is invoked "regularly" (in
2448     sub ms intervals) throughout marking. It is this clock method that
2449     checks all the abort conditions which were mentioned above and
2450     decides when the task should abort. A work-based scheme is used to
2451     trigger this clock method: when the number of object words the
2452     marking phase has scanned or the number of references the marking
2453     phase has visited reach a given limit. Additional invocations to
2454     the method clock have been planted in a few other strategic places
2455     too. The initial reason for the clock method was to avoid calling
2456     vtime too regularly, as it is quite expensive. So, once it was in
2457     place, it was natural to piggy-back all the other conditions on it
2458     too and not constantly check them throughout the code.
2459 
2460     If do_termination is true then do_marking_step will enter its
2461     termination protocol.
2462 
2463     The value of is_serial must be true when do_marking_step is being
2464     called serially (i.e. by the VMThread) and do_marking_step should
2465     skip any synchronization in the termination and overflow code.
2466     Examples include the serial remark code and the serial reference
2467     processing closures.
2468 
2469     The value of is_serial must be false when do_marking_step is
2470     being called by any of the worker threads in a work gang.
2471     Examples include the concurrent marking code (CMMarkingTask),
2472     the MT remark code, and the MT reference processing closures.
2473 
2474  *****************************************************************************/
2475 
2476 void G1CMTask::do_marking_step(double time_target_ms,
2477                                bool do_termination,
2478                                bool is_serial) {
2479   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2480   assert(_concurrent == _cm->concurrent(), "they should be the same");
2481 
2482   _start_time_ms = os::elapsedVTime() * 1000.0;
2483 
2484   // If do_stealing is true then do_marking_step will attempt to
2485   // steal work from the other G1CMTasks. It only makes sense to
2486   // enable stealing when the termination protocol is enabled
2487   // and do_marking_step() is not being called serially.
2488   bool do_stealing = do_termination && !is_serial;
2489 
2490   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2491   _time_target_ms = time_target_ms - diff_prediction_ms;
2492 
2493   // set up the variables that are used in the work-based scheme to
2494   // call the regular clock method
2495   _words_scanned = 0;
2496   _refs_reached  = 0;
2497   recalculate_limits();
2498 
2499   // clear all flags
2500   clear_has_aborted();
2501   _has_timed_out = false;
2502   _draining_satb_buffers = false;
2503 
2504   ++_calls;
2505 
2506   // Set up the bitmap and oop closures. Anything that uses them is
2507   // eventually called from this method, so it is OK to allocate these
2508   // statically.
2509   G1CMBitMapClosure bitmap_closure(this, _cm);
2510   G1CMOopClosure cm_oop_closure(_g1h, this);
2511   set_cm_oop_closure(&cm_oop_closure);
2512 
2513   if (_cm->has_overflown()) {
2514     // This can happen if the mark stack overflows during a GC pause
2515     // and this task, after a yield point, restarts. We have to abort
2516     // as we need to get into the overflow protocol which happens
2517     // right at the end of this task.
2518     set_has_aborted();
2519   }
2520 
2521   // First drain any available SATB buffers. After this, we will not
2522   // look at SATB buffers before the next invocation of this method.
2523   // If enough completed SATB buffers are queued up, the regular clock
2524   // will abort this task so that it restarts.
2525   drain_satb_buffers();
2526   // ...then partially drain the local queue and the global stack
2527   drain_local_queue(true);
2528   drain_global_stack(true);
2529 
2530   do {
2531     if (!has_aborted() && _curr_region != NULL) {
2532       // This means that we're already holding on to a region.
2533       assert(_finger != NULL, "if region is not NULL, then the finger "
2534              "should not be NULL either");
2535 
2536       // We might have restarted this task after an evacuation pause
2537       // which might have evacuated the region we're holding on to
2538       // underneath our feet. Let's read its limit again to make sure
2539       // that we do not iterate over a region of the heap that
2540       // contains garbage (update_region_limit() will also move
2541       // _finger to the start of the region if it is found empty).
2542       update_region_limit();
2543       // We will start from _finger not from the start of the region,
2544       // as we might be restarting this task after aborting half-way
2545       // through scanning this region. In this case, _finger points to
2546       // the address where we last found a marked object. If this is a
2547       // fresh region, _finger points to start().
2548       MemRegion mr = MemRegion(_finger, _region_limit);
2549 
2550       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2551              "humongous regions should go around loop once only");
2552 
2553       // Some special cases:
2554       // If the memory region is empty, we can just give up the region.
2555       // If the current region is humongous then we only need to check
2556       // the bitmap for the bit associated with the start of the object,
2557       // scan the object if it's live, and give up the region.
2558       // Otherwise, let's iterate over the bitmap of the part of the region
2559       // that is left.
2560       // If the iteration is successful, give up the region.
2561       if (mr.is_empty()) {
2562         giveup_current_region();
2563         regular_clock_call();
2564       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2565         if (_next_mark_bitmap->is_marked(mr.start())) {
2566           // The object is marked - apply the closure
2567           bitmap_closure.do_addr(mr.start());
2568         }
2569         // Even if this task aborted while scanning the humongous object
2570         // we can (and should) give up the current region.
2571         giveup_current_region();
2572         regular_clock_call();
2573       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2574         giveup_current_region();
2575         regular_clock_call();
2576       } else {
2577         assert(has_aborted(), "currently the only way to do so");
2578         // The only way to abort the bitmap iteration is to return
2579         // false from the do_bit() method. However, inside the
2580         // do_bit() method we move the _finger to point to the
2581         // object currently being looked at. So, if we bail out, we
2582         // have definitely set _finger to something non-null.
2583         assert(_finger != NULL, "invariant");
2584 
2585         // Region iteration was actually aborted. So now _finger
2586         // points to the address of the object we last scanned. If we
2587         // leave it there, when we restart this task, we will rescan
2588         // the object. It is easy to avoid this. We move the finger by
2589         // enough to point to the next possible object header.
2590         assert(_finger < _region_limit, "invariant");
2591         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2592         // Check if bitmap iteration was aborted while scanning the last object
2593         if (new_finger >= _region_limit) {
2594           giveup_current_region();
2595         } else {
2596           move_finger_to(new_finger);
2597         }
2598       }
2599     }
2600     // At this point we have either completed iterating over the
2601     // region we were holding on to, or we have aborted.
2602 
2603     // We then partially drain the local queue and the global stack.
2604     // (Do we really need this?)
2605     drain_local_queue(true);
2606     drain_global_stack(true);
2607 
2608     // Read the note on the claim_region() method on why it might
2609     // return NULL with potentially more regions available for
2610     // claiming and why we have to check out_of_regions() to determine
2611     // whether we're done or not.
2612     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2613       // We are going to try to claim a new region. We should have
2614       // given up on the previous one.
2615       // Separated the asserts so that we know which one fires.
2616       assert(_curr_region  == NULL, "invariant");
2617       assert(_finger       == NULL, "invariant");
2618       assert(_region_limit == NULL, "invariant");
2619       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2620       if (claimed_region != NULL) {
2621         // Yes, we managed to claim one
2622         setup_for_region(claimed_region);
2623         assert(_curr_region == claimed_region, "invariant");
2624       }
2625       // It is important to call the regular clock here. It might take
2626       // a while to claim a region if, for example, we hit a large
2627       // block of empty regions. So we need to call the regular clock
2628       // method once round the loop to make sure it's called
2629       // frequently enough.
2630       regular_clock_call();
2631     }
2632 
2633     if (!has_aborted() && _curr_region == NULL) {
2634       assert(_cm->out_of_regions(),
2635              "at this point we should be out of regions");
2636     }
2637   } while ( _curr_region != NULL && !has_aborted());
2638 
2639   if (!has_aborted()) {
2640     // We cannot check whether the global stack is empty, since other
2641     // tasks might be pushing objects to it concurrently.
2642     assert(_cm->out_of_regions(),
2643            "at this point we should be out of regions");
2644     // Try to reduce the number of available SATB buffers so that
2645     // remark has less work to do.
2646     drain_satb_buffers();
2647   }
2648 
2649   // Since we've done everything else, we can now totally drain the
2650   // local queue and global stack.
2651   drain_local_queue(false);
2652   drain_global_stack(false);
2653 
2654   // Attempt at work stealing from other task's queues.
2655   if (do_stealing && !has_aborted()) {
2656     // We have not aborted. This means that we have finished all that
2657     // we could. Let's try to do some stealing...
2658 
2659     // We cannot check whether the global stack is empty, since other
2660     // tasks might be pushing objects to it concurrently.
2661     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2662            "only way to reach here");
2663     while (!has_aborted()) {
2664       G1TaskQueueEntry entry;
2665       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2666         scan_task_entry(entry);
2667 
2668         // And since we're towards the end, let's totally drain the
2669         // local queue and global stack.
2670         drain_local_queue(false);
2671         drain_global_stack(false);
2672       } else {
2673         break;
2674       }
2675     }
2676   }
2677 
2678   // We still haven't aborted. Now, let's try to get into the
2679   // termination protocol.
2680   if (do_termination && !has_aborted()) {
2681     // We cannot check whether the global stack is empty, since other
2682     // tasks might be concurrently pushing objects on it.
2683     // Separated the asserts so that we know which one fires.
2684     assert(_cm->out_of_regions(), "only way to reach here");
2685     assert(_task_queue->size() == 0, "only way to reach here");
2686     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2687 
2688     // The G1CMTask class also extends the TerminatorTerminator class,
2689     // hence its should_exit_termination() method will also decide
2690     // whether to exit the termination protocol or not.
2691     bool finished = (is_serial ||
2692                      _cm->terminator()->offer_termination(this));
2693     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2694     _termination_time_ms +=
2695       termination_end_time_ms - _termination_start_time_ms;
2696 
2697     if (finished) {
2698       // We're all done.
2699 
2700       if (_worker_id == 0) {
2701         // Let's allow task 0 to do this
2702         if (_concurrent) {
2703           assert(_cm->concurrent_marking_in_progress(), "invariant");
2704           // We need to set this to false before the next
2705           // safepoint. This way we ensure that the marking phase
2706           // doesn't observe any more heap expansions.
2707           _cm->clear_concurrent_marking_in_progress();
2708         }
2709       }
2710 
2711       // We can now guarantee that the global stack is empty, since
2712       // all other tasks have finished. We separated the guarantees so
2713       // that, if a condition is false, we can immediately find out
2714       // which one.
2715       guarantee(_cm->out_of_regions(), "only way to reach here");
2716       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2717       guarantee(_task_queue->size() == 0, "only way to reach here");
2718       guarantee(!_cm->has_overflown(), "only way to reach here");
2719     } else {
2720       // Apparently there's more work to do. Let's abort this task. It
2721       // will restart it and we can hopefully find more things to do.
2722       set_has_aborted();
2723     }
2724   }
2725 
2726   // Mainly for debugging purposes to make sure that a pointer to the
2727   // closure which was statically allocated in this frame doesn't
2728   // escape it by accident.
2729   set_cm_oop_closure(NULL);
2730   double end_time_ms = os::elapsedVTime() * 1000.0;
2731   double elapsed_time_ms = end_time_ms - _start_time_ms;
2732   // Update the step history.
2733   _step_times_ms.add(elapsed_time_ms);
2734 
2735   if (has_aborted()) {
2736     // The task was aborted for some reason.
2737     if (_has_timed_out) {
2738       double diff_ms = elapsed_time_ms - _time_target_ms;
2739       // Keep statistics of how well we did with respect to hitting
2740       // our target only if we actually timed out (if we aborted for
2741       // other reasons, then the results might get skewed).
2742       _marking_step_diffs_ms.add(diff_ms);
2743     }
2744 
2745     if (_cm->has_overflown()) {
2746       // This is the interesting one. We aborted because a global
2747       // overflow was raised. This means we have to restart the
2748       // marking phase and start iterating over regions. However, in
2749       // order to do this we have to make sure that all tasks stop
2750       // what they are doing and re-initialize in a safe manner. We
2751       // will achieve this with the use of two barrier sync points.
2752 
2753       if (!is_serial) {
2754         // We only need to enter the sync barrier if being called
2755         // from a parallel context
2756         _cm->enter_first_sync_barrier(_worker_id);
2757 
2758         // When we exit this sync barrier we know that all tasks have
2759         // stopped doing marking work. So, it's now safe to
2760         // re-initialize our data structures.
2761       }
2762 
2763       clear_region_fields();
2764       flush_mark_stats_cache();
2765 
2766       if (!is_serial) {
2767         // If we're executing the concurrent phase of marking, reset the marking
2768         // state; otherwise the marking state is reset after reference processing,
2769         // during the remark pause.
2770         // If we reset here as a result of an overflow during the remark we will
2771         // see assertion failures from any subsequent set_concurrency_and_phase()
2772         // calls.
2773         if (_cm->concurrent() && _worker_id == 0) {
2774           // Worker 0 is responsible for clearing the global data structures because
2775           // of an overflow. During STW we should not clear the overflow flag (in
2776           // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2777           // method to abort the pause and restart concurrent marking.
2778           _cm->reset_marking_for_restart();
2779 
2780           log_info(gc, marking)("Concurrent Mark reset for overflow");
2781         }
2782 
2783         // ...and enter the second barrier.
2784         _cm->enter_second_sync_barrier(_worker_id);
2785       }
2786       // At this point, if we're during the concurrent phase of
2787       // marking, everything has been re-initialized and we're
2788       // ready to restart.
2789     }
2790   }
2791 }
2792 
2793 G1CMTask::G1CMTask(uint worker_id,
2794                    G1ConcurrentMark* cm,
2795                    G1CMTaskQueue* task_queue,
2796                    G1RegionMarkStats* mark_stats,
2797                    uint max_regions) :
2798   _objArray_processor(this),
2799   _worker_id(worker_id),
2800   _g1h(G1CollectedHeap::heap()),
2801   _cm(cm),
2802   _next_mark_bitmap(NULL),
2803   _task_queue(task_queue),
2804   _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2805   _calls(0),
2806   _time_target_ms(0.0),
2807   _start_time_ms(0.0),
2808   _cm_oop_closure(NULL),
2809   _curr_region(NULL),
2810   _finger(NULL),
2811   _region_limit(NULL),
2812   _words_scanned(0),
2813   _words_scanned_limit(0),
2814   _real_words_scanned_limit(0),
2815   _refs_reached(0),
2816   _refs_reached_limit(0),
2817   _real_refs_reached_limit(0),
2818   _hash_seed(17),
2819   _has_aborted(false),
2820   _has_timed_out(false),
2821   _draining_satb_buffers(false),
2822   _step_times_ms(),
2823   _elapsed_time_ms(0.0),
2824   _termination_time_ms(0.0),
2825   _termination_start_time_ms(0.0),
2826   _concurrent(false),
2827   _marking_step_diffs_ms()
2828 {
2829   guarantee(task_queue != NULL, "invariant");
2830 
2831   _marking_step_diffs_ms.add(0.5);
2832 }
2833 
2834 // These are formatting macros that are used below to ensure
2835 // consistent formatting. The *_H_* versions are used to format the
2836 // header for a particular value and they should be kept consistent
2837 // with the corresponding macro. Also note that most of the macros add
2838 // the necessary white space (as a prefix) which makes them a bit
2839 // easier to compose.
2840 
2841 // All the output lines are prefixed with this string to be able to
2842 // identify them easily in a large log file.
2843 #define G1PPRL_LINE_PREFIX            "###"
2844 
2845 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2846 #ifdef _LP64
2847 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2848 #else // _LP64
2849 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2850 #endif // _LP64
2851 
2852 // For per-region info
2853 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2854 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2855 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2856 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2857 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2858 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2859 
2860 // For summary info
2861 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2862 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2863 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2864 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2865 
2866 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2867   _total_used_bytes(0), _total_capacity_bytes(0),
2868   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2869   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2870 {
2871   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2872   MemRegion g1_reserved = g1h->g1_reserved();
2873   double now = os::elapsedTime();
2874 
2875   // Print the header of the output.
2876   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2877   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2878                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2879                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2880                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2881                           HeapRegion::GrainBytes);
2882   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2883   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2884                           G1PPRL_TYPE_H_FORMAT
2885                           G1PPRL_ADDR_BASE_H_FORMAT
2886                           G1PPRL_BYTE_H_FORMAT
2887                           G1PPRL_BYTE_H_FORMAT
2888                           G1PPRL_BYTE_H_FORMAT
2889                           G1PPRL_DOUBLE_H_FORMAT
2890                           G1PPRL_BYTE_H_FORMAT
2891                           G1PPRL_BYTE_H_FORMAT,
2892                           "type", "address-range",
2893                           "used", "prev-live", "next-live", "gc-eff",
2894                           "remset", "code-roots");
2895   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2896                           G1PPRL_TYPE_H_FORMAT
2897                           G1PPRL_ADDR_BASE_H_FORMAT
2898                           G1PPRL_BYTE_H_FORMAT
2899                           G1PPRL_BYTE_H_FORMAT
2900                           G1PPRL_BYTE_H_FORMAT
2901                           G1PPRL_DOUBLE_H_FORMAT
2902                           G1PPRL_BYTE_H_FORMAT
2903                           G1PPRL_BYTE_H_FORMAT,
2904                           "", "",
2905                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2906                           "(bytes)", "(bytes)");
2907 }
2908 
2909 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2910   const char* type       = r->get_type_str();
2911   HeapWord* bottom       = r->bottom();
2912   HeapWord* end          = r->end();
2913   size_t capacity_bytes  = r->capacity();
2914   size_t used_bytes      = r->used();
2915   size_t prev_live_bytes = r->live_bytes();
2916   size_t next_live_bytes = r->next_live_bytes();
2917   double gc_eff          = r->gc_efficiency();
2918   size_t remset_bytes    = r->rem_set()->mem_size();
2919   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2920 
2921   _total_used_bytes      += used_bytes;
2922   _total_capacity_bytes  += capacity_bytes;
2923   _total_prev_live_bytes += prev_live_bytes;
2924   _total_next_live_bytes += next_live_bytes;
2925   _total_remset_bytes    += remset_bytes;
2926   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2927 
2928   // Print a line for this particular region.
2929   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2930                           G1PPRL_TYPE_FORMAT
2931                           G1PPRL_ADDR_BASE_FORMAT
2932                           G1PPRL_BYTE_FORMAT
2933                           G1PPRL_BYTE_FORMAT
2934                           G1PPRL_BYTE_FORMAT
2935                           G1PPRL_DOUBLE_FORMAT
2936                           G1PPRL_BYTE_FORMAT
2937                           G1PPRL_BYTE_FORMAT,
2938                           type, p2i(bottom), p2i(end),
2939                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2940                           remset_bytes, strong_code_roots_bytes);
2941 
2942   return false;
2943 }
2944 
2945 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2946   // add static memory usages to remembered set sizes
2947   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2948   // Print the footer of the output.
2949   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2950   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2951                          " SUMMARY"
2952                          G1PPRL_SUM_MB_FORMAT("capacity")
2953                          G1PPRL_SUM_MB_PERC_FORMAT("used")
2954                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
2955                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
2956                          G1PPRL_SUM_MB_FORMAT("remset")
2957                          G1PPRL_SUM_MB_FORMAT("code-roots"),
2958                          bytes_to_mb(_total_capacity_bytes),
2959                          bytes_to_mb(_total_used_bytes),
2960                          percent_of(_total_used_bytes, _total_capacity_bytes),
2961                          bytes_to_mb(_total_prev_live_bytes),
2962                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
2963                          bytes_to_mb(_total_next_live_bytes),
2964                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
2965                          bytes_to_mb(_total_remset_bytes),
2966                          bytes_to_mb(_total_strong_code_roots_bytes));
2967 }