1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1OopClosures.inline.hpp"
  34 #include "gc/g1/g1Policy.hpp"
  35 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionRemSet.hpp"
  39 #include "gc/g1/heapRegionSet.inline.hpp"
  40 #include "gc/shared/adaptiveSizePolicy.hpp"
  41 #include "gc/shared/gcId.hpp"
  42 #include "gc/shared/gcTimer.hpp"
  43 #include "gc/shared/gcTrace.hpp"
  44 #include "gc/shared/gcTraceTime.inline.hpp"
  45 #include "gc/shared/genOopClosures.inline.hpp"
  46 #include "gc/shared/referencePolicy.hpp"
  47 #include "gc/shared/strongRootsScope.hpp"
  48 #include "gc/shared/suspendibleThreadSet.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "gc/shared/weakProcessor.hpp"
  52 #include "include/jvm.h"
  53 #include "logging/log.hpp"
  54 #include "memory/allocation.hpp"
  55 #include "memory/resourceArea.hpp"
  56 #include "oops/access.inline.hpp"
  57 #include "oops/oop.inline.hpp"
  58 #include "runtime/atomic.hpp"
  59 #include "runtime/handles.inline.hpp"
  60 #include "runtime/java.hpp"
  61 #include "runtime/prefetch.inline.hpp"
  62 #include "services/memTracker.hpp"
  63 #include "utilities/align.hpp"
  64 #include "utilities/growableArray.hpp"
  65 
  66 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  67   assert(addr < _cm->finger(), "invariant");
  68   assert(addr >= _task->finger(), "invariant");
  69 
  70   // We move that task's local finger along.
  71   _task->move_finger_to(addr);
  72 
  73   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  74   // we only partially drain the local queue and global stack
  75   _task->drain_local_queue(true);
  76   _task->drain_global_stack(true);
  77 
  78   // if the has_aborted flag has been raised, we need to bail out of
  79   // the iteration
  80   return !_task->has_aborted();
  81 }
  82 
  83 G1CMMarkStack::G1CMMarkStack() :
  84   _max_chunk_capacity(0),
  85   _base(NULL),
  86   _chunk_capacity(0) {
  87   set_empty();
  88 }
  89 
  90 bool G1CMMarkStack::resize(size_t new_capacity) {
  91   assert(is_empty(), "Only resize when stack is empty.");
  92   assert(new_capacity <= _max_chunk_capacity,
  93          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  94 
  95   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  96 
  97   if (new_base == NULL) {
  98     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
  99     return false;
 100   }
 101   // Release old mapping.
 102   if (_base != NULL) {
 103     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 104   }
 105 
 106   _base = new_base;
 107   _chunk_capacity = new_capacity;
 108   set_empty();
 109 
 110   return true;
 111 }
 112 
 113 size_t G1CMMarkStack::capacity_alignment() {
 114   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 115 }
 116 
 117 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 118   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 119 
 120   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 121 
 122   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 123   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 124 
 125   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 126             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 127             _max_chunk_capacity,
 128             initial_chunk_capacity);
 129 
 130   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 131                 initial_chunk_capacity, _max_chunk_capacity);
 132 
 133   return resize(initial_chunk_capacity);
 134 }
 135 
 136 void G1CMMarkStack::expand() {
 137   if (_chunk_capacity == _max_chunk_capacity) {
 138     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 139     return;
 140   }
 141   size_t old_capacity = _chunk_capacity;
 142   // Double capacity if possible
 143   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 144 
 145   if (resize(new_capacity)) {
 146     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 147                   old_capacity, new_capacity);
 148   } else {
 149     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 150                     old_capacity, new_capacity);
 151   }
 152 }
 153 
 154 G1CMMarkStack::~G1CMMarkStack() {
 155   if (_base != NULL) {
 156     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 157   }
 158 }
 159 
 160 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 161   elem->next = *list;
 162   *list = elem;
 163 }
 164 
 165 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 166   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 167   add_chunk_to_list(&_chunk_list, elem);
 168   _chunks_in_chunk_list++;
 169 }
 170 
 171 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 172   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 173   add_chunk_to_list(&_free_list, elem);
 174 }
 175 
 176 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 177   TaskQueueEntryChunk* result = *list;
 178   if (result != NULL) {
 179     *list = (*list)->next;
 180   }
 181   return result;
 182 }
 183 
 184 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 185   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 186   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 187   if (result != NULL) {
 188     _chunks_in_chunk_list--;
 189   }
 190   return result;
 191 }
 192 
 193 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 194   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 195   return remove_chunk_from_list(&_free_list);
 196 }
 197 
 198 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 199   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 200   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 201   // wraparound of _hwm.
 202   if (_hwm >= _chunk_capacity) {
 203     return NULL;
 204   }
 205 
 206   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 207   if (cur_idx >= _chunk_capacity) {
 208     return NULL;
 209   }
 210 
 211   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 212   result->next = NULL;
 213   return result;
 214 }
 215 
 216 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 217   // Get a new chunk.
 218   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 219 
 220   if (new_chunk == NULL) {
 221     // Did not get a chunk from the free list. Allocate from backing memory.
 222     new_chunk = allocate_new_chunk();
 223 
 224     if (new_chunk == NULL) {
 225       return false;
 226     }
 227   }
 228 
 229   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 230 
 231   add_chunk_to_chunk_list(new_chunk);
 232 
 233   return true;
 234 }
 235 
 236 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 237   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 238 
 239   if (cur == NULL) {
 240     return false;
 241   }
 242 
 243   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 244 
 245   add_chunk_to_free_list(cur);
 246   return true;
 247 }
 248 
 249 void G1CMMarkStack::set_empty() {
 250   _chunks_in_chunk_list = 0;
 251   _hwm = 0;
 252   _chunk_list = NULL;
 253   _free_list = NULL;
 254 }
 255 
 256 G1CMRootRegions::G1CMRootRegions() :
 257   _survivors(NULL), _cm(NULL), _scan_in_progress(false),
 258   _should_abort(false), _claimed_survivor_index(0) { }
 259 
 260 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 261   _survivors = survivors;
 262   _cm = cm;
 263 }
 264 
 265 void G1CMRootRegions::prepare_for_scan() {
 266   assert(!scan_in_progress(), "pre-condition");
 267 
 268   // Currently, only survivors can be root regions.
 269   _claimed_survivor_index = 0;
 270   _scan_in_progress = _survivors->regions()->is_nonempty();
 271   _should_abort = false;
 272 }
 273 
 274 HeapRegion* G1CMRootRegions::claim_next() {
 275   if (_should_abort) {
 276     // If someone has set the should_abort flag, we return NULL to
 277     // force the caller to bail out of their loop.
 278     return NULL;
 279   }
 280 
 281   // Currently, only survivors can be root regions.
 282   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 283 
 284   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 285   if (claimed_index < survivor_regions->length()) {
 286     return survivor_regions->at(claimed_index);
 287   }
 288   return NULL;
 289 }
 290 
 291 uint G1CMRootRegions::num_root_regions() const {
 292   return (uint)_survivors->regions()->length();
 293 }
 294 
 295 void G1CMRootRegions::notify_scan_done() {
 296   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 297   _scan_in_progress = false;
 298   RootRegionScan_lock->notify_all();
 299 }
 300 
 301 void G1CMRootRegions::cancel_scan() {
 302   notify_scan_done();
 303 }
 304 
 305 void G1CMRootRegions::scan_finished() {
 306   assert(scan_in_progress(), "pre-condition");
 307 
 308   // Currently, only survivors can be root regions.
 309   if (!_should_abort) {
 310     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 311     assert((uint)_claimed_survivor_index >= _survivors->length(),
 312            "we should have claimed all survivors, claimed index = %u, length = %u",
 313            (uint)_claimed_survivor_index, _survivors->length());
 314   }
 315 
 316   notify_scan_done();
 317 }
 318 
 319 bool G1CMRootRegions::wait_until_scan_finished() {
 320   if (!scan_in_progress()) {
 321     return false;
 322   }
 323 
 324   {
 325     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 326     while (scan_in_progress()) {
 327       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 328     }
 329   }
 330   return true;
 331 }
 332 
 333 // Returns the maximum number of workers to be used in a concurrent
 334 // phase based on the number of GC workers being used in a STW
 335 // phase.
 336 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 337   return MAX2((num_gc_workers + 2) / 4, 1U);
 338 }
 339 
 340 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 341                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 342                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 343   // _cm_thread set inside the constructor
 344   _g1h(g1h),
 345   _completed_initialization(false),
 346 
 347   _mark_bitmap_1(),
 348   _mark_bitmap_2(),
 349   _prev_mark_bitmap(&_mark_bitmap_1),
 350   _next_mark_bitmap(&_mark_bitmap_2),
 351 
 352   _heap(_g1h->reserved_region()),
 353 
 354   _root_regions(),
 355 
 356   _global_mark_stack(),
 357 
 358   // _finger set in set_non_marking_state
 359 
 360   _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 361   _max_num_tasks(ParallelGCThreads),
 362   // _num_active_tasks set in set_non_marking_state()
 363   // _tasks set inside the constructor
 364 
 365   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 366   _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
 367 
 368   _first_overflow_barrier_sync(),
 369   _second_overflow_barrier_sync(),
 370 
 371   _has_overflown(false),
 372   _concurrent(false),
 373   _has_aborted(false),
 374   _restart_for_overflow(false),
 375   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 376   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 377 
 378   // _verbose_level set below
 379 
 380   _init_times(),
 381   _remark_times(),
 382   _remark_mark_times(),
 383   _remark_weak_ref_times(),
 384   _cleanup_times(),
 385   _total_cleanup_time(0.0),
 386 
 387   _accum_task_vtime(NULL),
 388 
 389   _concurrent_workers(NULL),
 390   _num_concurrent_workers(0),
 391   _max_concurrent_workers(0),
 392 
 393   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 394   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 395 {
 396   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 397   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 398 
 399   // Create & start ConcurrentMark thread.
 400   _cm_thread = new ConcurrentMarkThread(this);
 401   if (_cm_thread->osthread() == NULL) {
 402     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 403   }
 404 
 405   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 406 
 407   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 408   satb_qs.set_buffer_size(G1SATBBufferSize);
 409 
 410   _root_regions.init(_g1h->survivor(), this);
 411 
 412   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 413     // Calculate the number of concurrent worker threads by scaling
 414     // the number of parallel GC threads.
 415     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 416     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 417   }
 418 
 419   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 420   if (ConcGCThreads > ParallelGCThreads) {
 421     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 422                     ConcGCThreads, ParallelGCThreads);
 423     return;
 424   }
 425 
 426   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 427   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 428 
 429   _num_concurrent_workers = ConcGCThreads;
 430   _max_concurrent_workers = _num_concurrent_workers;
 431 
 432   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 433   _concurrent_workers->initialize_workers();
 434 
 435   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 436     size_t mark_stack_size =
 437       MIN2(MarkStackSizeMax,
 438           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 439     // Verify that the calculated value for MarkStackSize is in range.
 440     // It would be nice to use the private utility routine from Arguments.
 441     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 442       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 443                       "must be between 1 and " SIZE_FORMAT,
 444                       mark_stack_size, MarkStackSizeMax);
 445       return;
 446     }
 447     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 448   } else {
 449     // Verify MarkStackSize is in range.
 450     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 451       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 452         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 453           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 454                           "must be between 1 and " SIZE_FORMAT,
 455                           MarkStackSize, MarkStackSizeMax);
 456           return;
 457         }
 458       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 459         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 460           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 461                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 462                           MarkStackSize, MarkStackSizeMax);
 463           return;
 464         }
 465       }
 466     }
 467   }
 468 
 469   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 470     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 471   }
 472 
 473   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 474   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 475 
 476   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 477   _num_active_tasks = _max_num_tasks;
 478 
 479   for (uint i = 0; i < _max_num_tasks; ++i) {
 480     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 481     task_queue->initialize();
 482     _task_queues->register_queue(i, task_queue);
 483 
 484     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 485 
 486     _accum_task_vtime[i] = 0.0;
 487   }
 488 
 489   reset_at_marking_complete();
 490   _completed_initialization = true;
 491 }
 492 
 493 void G1ConcurrentMark::reset() {
 494   _has_aborted = false;
 495 
 496   reset_marking_for_restart();
 497 
 498   // Reset all tasks, since different phases will use different number of active
 499   // threads. So, it's easiest to have all of them ready.
 500   for (uint i = 0; i < _max_num_tasks; ++i) {
 501     _tasks[i]->reset(_next_mark_bitmap);
 502   }
 503 
 504   uint max_regions = _g1h->max_regions();
 505   for (uint i = 0; i < max_regions; i++) {
 506     _top_at_rebuild_starts[i] = NULL;
 507     _region_mark_stats[i].clear();
 508   }
 509 }
 510 
 511 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 512   for (uint j = 0; j < _max_num_tasks; ++j) {
 513     _tasks[j]->clear_mark_stats_cache(region_idx);
 514   }
 515   _top_at_rebuild_starts[region_idx] = NULL;
 516   _region_mark_stats[region_idx].clear();
 517 }
 518 
 519 void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 520   uint const region_idx = r->hrm_index();
 521   if (r->is_humongous()) {
 522     assert(r->is_starts_humongous(), "Got humongous continues region here");
 523     uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 524     for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 525       clear_statistics_in_region(j);
 526     }
 527   } else {
 528     clear_statistics_in_region(region_idx);
 529   } 
 530 }
 531 
 532 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 533   assert_at_safepoint_on_vm_thread();
 534 
 535   // Need to clear mark bit of the humongous object.
 536   if (_next_mark_bitmap->is_marked(r->bottom())) {
 537     _next_mark_bitmap->clear(r->bottom());
 538   }
 539 
 540   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 541     return;
 542   }
 543 
 544   // Clear any statistics about the region gathered so far.
 545   clear_statistics(r);
 546 }
 547 
 548 void G1ConcurrentMark::reset_marking_for_restart() {
 549   _global_mark_stack.set_empty();
 550 
 551   // Expand the marking stack, if we have to and if we can.
 552   if (has_overflown()) {
 553     _global_mark_stack.expand();
 554 
 555     uint max_regions = _g1h->max_regions();
 556     for (uint i = 0; i < max_regions; i++) {
 557       _region_mark_stats[i].clear_during_overflow();
 558     }
 559   }
 560 
 561   clear_has_overflown();
 562   _finger = _heap.start();
 563 
 564   for (uint i = 0; i < _max_num_tasks; ++i) {
 565     G1CMTaskQueue* queue = _task_queues->queue(i);
 566     queue->set_empty();
 567   }
 568 }
 569 
 570 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 571   assert(active_tasks <= _max_num_tasks, "we should not have more");
 572 
 573   _num_active_tasks = active_tasks;
 574   // Need to update the three data structures below according to the
 575   // number of active threads for this phase.
 576   _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
 577   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 578   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 579 }
 580 
 581 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 582   set_concurrency(active_tasks);
 583 
 584   _concurrent = concurrent;
 585 
 586   if (!concurrent) {
 587     // At this point we should be in a STW phase, and completed marking.
 588     assert_at_safepoint_on_vm_thread();
 589     assert(out_of_regions(),
 590            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 591            p2i(_finger), p2i(_heap.end()));
 592   }
 593 }
 594 
 595 void G1ConcurrentMark::reset_at_marking_complete() {
 596   // We set the global marking state to some default values when we're
 597   // not doing marking.
 598   reset_marking_for_restart();
 599   _num_active_tasks = 0;
 600 }
 601 
 602 G1ConcurrentMark::~G1ConcurrentMark() {
 603   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 604   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 605   // The G1ConcurrentMark instance is never freed.
 606   ShouldNotReachHere();
 607 }
 608 
 609 class G1ClearBitMapTask : public AbstractGangTask {
 610 public:
 611   static size_t chunk_size() { return M; }
 612 
 613 private:
 614   // Heap region closure used for clearing the given mark bitmap.
 615   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 616   private:
 617     G1CMBitMap* _bitmap;
 618     G1ConcurrentMark* _cm;
 619   public:
 620     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 621     }
 622 
 623     virtual bool do_heap_region(HeapRegion* r) {
 624       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 625 
 626       HeapWord* cur = r->bottom();
 627       HeapWord* const end = r->end();
 628 
 629       while (cur < end) {
 630         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 631         _bitmap->clear_range(mr);
 632 
 633         cur += chunk_size_in_words;
 634 
 635         // Abort iteration if after yielding the marking has been aborted.
 636         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 637           return true;
 638         }
 639         // Repeat the asserts from before the start of the closure. We will do them
 640         // as asserts here to minimize their overhead on the product. However, we
 641         // will have them as guarantees at the beginning / end of the bitmap
 642         // clearing to get some checking in the product.
 643         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 644         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 645       }
 646       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 647 
 648       return false;
 649     }
 650   };
 651 
 652   G1ClearBitmapHRClosure _cl;
 653   HeapRegionClaimer _hr_claimer;
 654   bool _suspendible; // If the task is suspendible, workers must join the STS.
 655 
 656 public:
 657   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 658     AbstractGangTask("G1 Clear Bitmap"),
 659     _cl(bitmap, suspendible ? cm : NULL),
 660     _hr_claimer(n_workers),
 661     _suspendible(suspendible)
 662   { }
 663 
 664   void work(uint worker_id) {
 665     SuspendibleThreadSetJoiner sts_join(_suspendible);
 666     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 667   }
 668 
 669   bool is_complete() {
 670     return _cl.is_complete();
 671   }
 672 };
 673 
 674 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 675   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 676 
 677   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 678   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 679 
 680   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 681 
 682   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 683 
 684   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 685   workers->run_task(&cl, num_workers);
 686   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 687 }
 688 
 689 void G1ConcurrentMark::cleanup_for_next_mark() {
 690   // Make sure that the concurrent mark thread looks to still be in
 691   // the current cycle.
 692   guarantee(cm_thread()->during_cycle(), "invariant");
 693 
 694   // We are finishing up the current cycle by clearing the next
 695   // marking bitmap and getting it ready for the next cycle. During
 696   // this time no other cycle can start. So, let's make sure that this
 697   // is the case.
 698   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 699 
 700   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 701 
 702   // Repeat the asserts from above.
 703   guarantee(cm_thread()->during_cycle(), "invariant");
 704   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 705 }
 706 
 707 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 708   assert_at_safepoint_on_vm_thread();
 709   clear_bitmap(_prev_mark_bitmap, workers, false);
 710 }
 711 
 712 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 713   G1CMBitMap* _bitmap;
 714  public:
 715   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 716   }
 717 
 718   virtual bool do_heap_region(HeapRegion* r) {
 719     // This closure can be called concurrently to the mutator, so we must make sure
 720     // that the result of the getNextMarkedWordAddress() call is compared to the
 721     // value passed to it as limit to detect any found bits.
 722     // end never changes in G1.
 723     HeapWord* end = r->end();
 724     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 725   }
 726 };
 727 
 728 bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
 729   CheckBitmapClearHRClosure cl(_next_mark_bitmap);
 730   _g1h->heap_region_iterate(&cl);
 731   return cl.is_complete();
 732 }
 733 
 734 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 735 public:
 736   bool do_heap_region(HeapRegion* r) {
 737     r->note_start_of_marking();
 738     return false;
 739   }
 740 };
 741 
 742 void G1ConcurrentMark::pre_initial_mark() {
 743   // Initialize marking structures. This has to be done in a STW phase.
 744   reset();
 745 
 746   // For each region note start of marking.
 747   NoteStartOfMarkHRClosure startcl;
 748   _g1h->heap_region_iterate(&startcl);
 749 }
 750 
 751 
 752 void G1ConcurrentMark::post_initial_mark() {
 753   // Start Concurrent Marking weak-reference discovery.
 754   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 755   // enable ("weak") refs discovery
 756   rp->enable_discovery();
 757   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 758 
 759   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 760   // This is the start of  the marking cycle, we're expected all
 761   // threads to have SATB queues with active set to false.
 762   satb_mq_set.set_active_all_threads(true, /* new active value */
 763                                      false /* expected_active */);
 764 
 765   _root_regions.prepare_for_scan();
 766 
 767   // update_g1_committed() will be called at the end of an evac pause
 768   // when marking is on. So, it's also called at the end of the
 769   // initial-mark pause to update the heap end, if the heap expands
 770   // during it. No need to call it here.
 771 }
 772 
 773 /*
 774  * Notice that in the next two methods, we actually leave the STS
 775  * during the barrier sync and join it immediately afterwards. If we
 776  * do not do this, the following deadlock can occur: one thread could
 777  * be in the barrier sync code, waiting for the other thread to also
 778  * sync up, whereas another one could be trying to yield, while also
 779  * waiting for the other threads to sync up too.
 780  *
 781  * Note, however, that this code is also used during remark and in
 782  * this case we should not attempt to leave / enter the STS, otherwise
 783  * we'll either hit an assert (debug / fastdebug) or deadlock
 784  * (product). So we should only leave / enter the STS if we are
 785  * operating concurrently.
 786  *
 787  * Because the thread that does the sync barrier has left the STS, it
 788  * is possible to be suspended for a Full GC or an evacuation pause
 789  * could occur. This is actually safe, since the entering the sync
 790  * barrier is one of the last things do_marking_step() does, and it
 791  * doesn't manipulate any data structures afterwards.
 792  */
 793 
 794 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 795   bool barrier_aborted;
 796   {
 797     SuspendibleThreadSetLeaver sts_leave(concurrent());
 798     barrier_aborted = !_first_overflow_barrier_sync.enter();
 799   }
 800 
 801   // at this point everyone should have synced up and not be doing any
 802   // more work
 803 
 804   if (barrier_aborted) {
 805     // If the barrier aborted we ignore the overflow condition and
 806     // just abort the whole marking phase as quickly as possible.
 807     return;
 808   }
 809 }
 810 
 811 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 812   SuspendibleThreadSetLeaver sts_leave(concurrent());
 813   _second_overflow_barrier_sync.enter();
 814 
 815   // at this point everything should be re-initialized and ready to go
 816 }
 817 
 818 class G1CMConcurrentMarkingTask : public AbstractGangTask {
 819   G1ConcurrentMark*     _cm;
 820 public:
 821   void work(uint worker_id) {
 822     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 823     ResourceMark rm;
 824 
 825     double start_vtime = os::elapsedVTime();
 826 
 827     {
 828       SuspendibleThreadSetJoiner sts_join;
 829 
 830       assert(worker_id < _cm->active_tasks(), "invariant");
 831 
 832       G1CMTask* task = _cm->task(worker_id);
 833       task->record_start_time();
 834       if (!_cm->has_aborted()) {
 835         do {
 836           task->do_marking_step(G1ConcMarkStepDurationMillis,
 837                                 true  /* do_termination */,
 838                                 false /* is_serial*/);
 839 
 840           _cm->do_yield_check();
 841         } while (!_cm->has_aborted() && task->has_aborted());
 842       }
 843       task->record_end_time();
 844       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 845     }
 846 
 847     double end_vtime = os::elapsedVTime();
 848     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 849   }
 850 
 851   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 852       AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 853 
 854   ~G1CMConcurrentMarkingTask() { }
 855 };
 856 
 857 uint G1ConcurrentMark::calc_active_marking_workers() {
 858   uint result = 0;
 859   if (!UseDynamicNumberOfGCThreads ||
 860       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 861        !ForceDynamicNumberOfGCThreads)) {
 862     result = _max_concurrent_workers;
 863   } else {
 864     result =
 865       AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
 866                                                       1, /* Minimum workers */
 867                                                       _num_concurrent_workers,
 868                                                       Threads::number_of_non_daemon_threads());
 869     // Don't scale the result down by scale_concurrent_workers() because
 870     // that scaling has already gone into "_max_concurrent_workers".
 871   }
 872   assert(result > 0 && result <= _max_concurrent_workers,
 873          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 874          _max_concurrent_workers, result);
 875   return result;
 876 }
 877 
 878 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) {
 879   // Currently, only survivors can be root regions.
 880   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 881   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 882 
 883   const uintx interval = PrefetchScanIntervalInBytes;
 884   HeapWord* curr = hr->bottom();
 885   const HeapWord* end = hr->top();
 886   while (curr < end) {
 887     Prefetch::read(curr, interval);
 888     oop obj = oop(curr);
 889     int size = obj->oop_iterate_size(&cl);
 890     assert(size == obj->size(), "sanity");
 891     curr += size;
 892   }
 893 }
 894 
 895 class G1CMRootRegionScanTask : public AbstractGangTask {
 896   G1ConcurrentMark* _cm;
 897 public:
 898   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 899     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 900 
 901   void work(uint worker_id) {
 902     assert(Thread::current()->is_ConcurrentGC_thread(),
 903            "this should only be done by a conc GC thread");
 904 
 905     G1CMRootRegions* root_regions = _cm->root_regions();
 906     HeapRegion* hr = root_regions->claim_next();
 907     while (hr != NULL) {
 908       _cm->scan_root_region(hr, worker_id);
 909       hr = root_regions->claim_next();
 910     }
 911   }
 912 };
 913 
 914 void G1ConcurrentMark::scan_root_regions() {
 915   // scan_in_progress() will have been set to true only if there was
 916   // at least one root region to scan. So, if it's false, we
 917   // should not attempt to do any further work.
 918   if (root_regions()->scan_in_progress()) {
 919     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 920 
 921     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 922                                    // We distribute work on a per-region basis, so starting
 923                                    // more threads than that is useless.
 924                                    root_regions()->num_root_regions());
 925     assert(_num_concurrent_workers <= _max_concurrent_workers,
 926            "Maximum number of marking threads exceeded");
 927 
 928     G1CMRootRegionScanTask task(this);
 929     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 930                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 931     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 932 
 933     // It's possible that has_aborted() is true here without actually
 934     // aborting the survivor scan earlier. This is OK as it's
 935     // mainly used for sanity checking.
 936     root_regions()->scan_finished();
 937   }
 938 }
 939 
 940 void G1ConcurrentMark::concurrent_cycle_start() {
 941   _gc_timer_cm->register_gc_start();
 942 
 943   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 944 
 945   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 946 }
 947 
 948 void G1ConcurrentMark::concurrent_cycle_end() {
 949   _g1h->collector_state()->set_clearing_next_bitmap(false);
 950 
 951   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 952 
 953   if (has_aborted()) {
 954     log_info(gc, marking)("Concurrent Mark Abort");
 955     _gc_tracer_cm->report_concurrent_mode_failure();
 956   }
 957 
 958   _gc_timer_cm->register_gc_end();
 959 
 960   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 961 }
 962 
 963 void G1ConcurrentMark::mark_from_roots() {
 964   _restart_for_overflow = false;
 965 
 966   _num_concurrent_workers = calc_active_marking_workers();
 967 
 968   uint active_workers = MAX2(1U, _num_concurrent_workers);
 969 
 970   // Setting active workers is not guaranteed since fewer
 971   // worker threads may currently exist and more may not be
 972   // available.
 973   active_workers = _concurrent_workers->update_active_workers(active_workers);
 974   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 975 
 976   // Parallel task terminator is set in "set_concurrency_and_phase()"
 977   set_concurrency_and_phase(active_workers, true /* concurrent */);
 978 
 979   G1CMConcurrentMarkingTask marking_task(this);
 980   _concurrent_workers->run_task(&marking_task);
 981   print_stats();
 982 }
 983 
 984 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
 985   G1HeapVerifier* verifier = _g1h->verifier();
 986 
 987   verifier->verify_region_sets_optional();
 988 
 989   if (VerifyDuringGC) {
 990     GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm);
 991 
 992     size_t const BufLen = 512;
 993     char buffer[BufLen];
 994 
 995     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
 996     verifier->verify(type, vo, buffer);
 997   }
 998 
 999   verifier->check_bitmaps(caller);
1000 }
1001 
1002 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
1003   G1CollectedHeap* _g1h;
1004   G1ConcurrentMark* _cm;
1005 
1006   uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
1007 
1008   void update_remset_before_rebuild(HeapRegion * hr) {
1009     G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker();
1010 
1011     size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize;
1012     bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
1013     if (selected_for_rebuild) {
1014       _num_regions_selected_for_rebuild++;
1015     }
1016     _cm->update_top_at_rebuild_start(hr);
1017   }
1018 
1019 public:
1020   G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) :
1021     _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0) { }
1022 
1023   virtual bool do_heap_region(HeapRegion* r) {
1024     update_remset_before_rebuild(r);
1025     return false;
1026   }
1027 
1028   uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1029 };
1030 
1031 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1032   G1CollectedHeap* _g1h;
1033 public:
1034   G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1035 
1036   virtual bool do_heap_region(HeapRegion* r) {
1037     _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r);
1038     return false;
1039   }
1040 };
1041 
1042 void G1ConcurrentMark::remark() {
1043   assert_at_safepoint_on_vm_thread();
1044 
1045   // If a full collection has happened, we should not continue. However we might
1046   // have ended up here as the Remark VM operation has been scheduled already.
1047   if (has_aborted()) {
1048     return;
1049   }
1050 
1051   G1Policy* g1p = _g1h->g1_policy();
1052   g1p->record_concurrent_mark_remark_start();
1053 
1054   double start = os::elapsedTime();
1055 
1056   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1057 
1058   {
1059     GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1060     finalize_marking();
1061   }
1062 
1063   double mark_work_end = os::elapsedTime();
1064 
1065   bool const mark_finished = !has_overflown();
1066   if (mark_finished) {
1067     weak_refs_work(false /* clear_all_soft_refs */);
1068 
1069     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1070     // We're done with marking.
1071     // This is the end of the marking cycle, we're expected all
1072     // threads to have SATB queues with active set to true.
1073     satb_mq_set.set_active_all_threads(false, /* new active value */
1074                                        true /* expected_active */);
1075 
1076     {
1077       GCTraceTime(Debug, gc, phases)("Flush Task Caches");
1078       flush_all_task_caches();
1079     }
1080 
1081     {
1082       GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild");
1083       G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this);
1084       _g1h->heap_region_iterate(&cl);
1085       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1086                                       _g1h->num_regions(), cl.num_selected_for_rebuild());
1087     }
1088 
1089     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UseNextMarking, "Remark after");
1090 
1091     assert(!restart_for_overflow(), "sanity");
1092     // Completely reset the marking state since marking completed
1093     reset_at_marking_complete();
1094   } else {
1095     // We overflowed.  Restart concurrent marking.
1096     _restart_for_overflow = true;
1097 
1098     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1099 
1100     // Clear the marking state because we will be restarting
1101     // marking due to overflowing the global mark stack.
1102     reset_marking_for_restart();
1103   }
1104 
1105   {
1106     GCTraceTime(Debug, gc, phases)("Report Object Count");
1107     report_object_count();
1108   }
1109 
1110   // Statistics
1111   double now = os::elapsedTime();
1112   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1113   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1114   _remark_times.add((now - start) * 1000.0);
1115 
1116   g1p->record_concurrent_mark_remark_end();
1117 }
1118 
1119 class G1CleanupTask : public AbstractGangTask {
1120   // Per-region work during the Cleanup pause.
1121   class G1CleanupRegionsClosure : public HeapRegionClosure {
1122     G1CollectedHeap* _g1h;
1123     size_t _freed_bytes;
1124     FreeRegionList* _local_cleanup_list;
1125     uint _old_regions_removed;
1126     uint _humongous_regions_removed;
1127     HRRSCleanupTask* _hrrs_cleanup_task;
1128 
1129   public:
1130     G1CleanupRegionsClosure(G1CollectedHeap* g1,
1131                             FreeRegionList* local_cleanup_list,
1132                             HRRSCleanupTask* hrrs_cleanup_task) :
1133       _g1h(g1),
1134       _freed_bytes(0),
1135       _local_cleanup_list(local_cleanup_list),
1136       _old_regions_removed(0),
1137       _humongous_regions_removed(0),
1138       _hrrs_cleanup_task(hrrs_cleanup_task) { }
1139 
1140     size_t freed_bytes() { return _freed_bytes; }
1141     const uint old_regions_removed() { return _old_regions_removed; }
1142     const uint humongous_regions_removed() { return _humongous_regions_removed; }
1143 
1144     bool do_heap_region(HeapRegion *hr) {
1145       hr->note_end_of_marking();
1146 
1147       if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1148         _freed_bytes += hr->used();
1149         hr->set_containing_set(NULL);
1150         if (hr->is_humongous()) {
1151           _humongous_regions_removed++;
1152           _g1h->free_humongous_region(hr, _local_cleanup_list);
1153         } else {
1154           _old_regions_removed++;
1155           _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */);
1156         }
1157         hr->clear_cardtable();
1158         _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1159         log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1160       } else {
1161         hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1162       }
1163 
1164       return false;
1165     }
1166   };
1167 
1168   G1CollectedHeap* _g1h;
1169   FreeRegionList* _cleanup_list;
1170   HeapRegionClaimer _hrclaimer;
1171 
1172 public:
1173   G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1174     AbstractGangTask("G1 Cleanup"),
1175     _g1h(g1h),
1176     _cleanup_list(cleanup_list),
1177     _hrclaimer(n_workers) {
1178 
1179     HeapRegionRemSet::reset_for_cleanup_tasks();
1180   }
1181 
1182   void work(uint worker_id) {
1183     FreeRegionList local_cleanup_list("Local Cleanup List");
1184     HRRSCleanupTask hrrs_cleanup_task;
1185     G1CleanupRegionsClosure cl(_g1h,
1186                                &local_cleanup_list,
1187                                &hrrs_cleanup_task);
1188     _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1189     assert(cl.is_complete(), "Shouldn't have aborted!");
1190 
1191     // Now update the old/humongous region sets
1192     _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1193     {
1194       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1195       _g1h->decrement_summary_bytes(cl.freed_bytes());
1196 
1197       _cleanup_list->add_ordered(&local_cleanup_list);
1198       assert(local_cleanup_list.is_empty(), "post-condition");
1199 
1200       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1201     }
1202   }
1203 };
1204 
1205 void G1ConcurrentMark::reclaim_empty_regions() {
1206   WorkGang* workers = _g1h->workers();
1207   FreeRegionList empty_regions_list("Empty Regions After Mark List");
1208 
1209   G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers());
1210   workers->run_task(&cl);
1211 
1212   if (!empty_regions_list.is_empty()) {
1213     log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1214     // Now print the empty regions list.
1215     G1HRPrinter* hrp = _g1h->hr_printer();
1216     if (hrp->is_active()) {
1217       FreeRegionListIterator iter(&empty_regions_list);
1218       while (iter.more_available()) {
1219         HeapRegion* hr = iter.get_next();
1220         hrp->cleanup(hr);
1221       }
1222     }
1223     // And actually make them available.
1224     _g1h->prepend_to_freelist(&empty_regions_list);
1225   }
1226 }
1227 
1228 void G1ConcurrentMark::cleanup() {
1229   assert_at_safepoint_on_vm_thread();
1230 
1231   // If a full collection has happened, we shouldn't do this.
1232   if (has_aborted()) {
1233     return;
1234   }
1235 
1236   G1Policy* g1p = _g1h->g1_policy();
1237   g1p->record_concurrent_mark_cleanup_start();
1238 
1239   double start = os::elapsedTime();
1240 
1241   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UseNextMarking, "Cleanup before");
1242 
1243   {
1244     GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild");
1245     G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1246     _g1h->heap_region_iterate(&cl);
1247   }
1248 
1249   if (log_is_enabled(Trace, gc, liveness)) {
1250     G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1251     _g1h->heap_region_iterate(&cl);
1252   }
1253 
1254   // Install newly created mark bitmap as "prev".
1255   swap_mark_bitmaps();
1256   {
1257     GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions");
1258     reclaim_empty_regions();
1259   }
1260 
1261   // Cleanup will have freed any regions completely full of garbage.
1262   // Update the soft reference policy with the new heap occupancy.
1263   Universe::update_heap_info_at_gc();
1264 
1265   // Clean out dead classes and update Metaspace sizes.
1266   if (ClassUnloadingWithConcurrentMark) {
1267     GCTraceTime(Debug, gc, phases)("Purge Metaspace");
1268     ClassLoaderDataGraph::purge();
1269   }
1270   MetaspaceGC::compute_new_size();
1271 
1272   // We reclaimed old regions so we should calculate the sizes to make
1273   // sure we update the old gen/space data.
1274   _g1h->g1mm()->update_sizes();
1275 
1276   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1277 
1278   // We need to make this be a "collection" so any collection pause that
1279   // races with it goes around and waits for Cleanup to finish.
1280   _g1h->increment_total_collections();
1281 
1282   // Local statistics
1283   double recent_cleanup_time = (os::elapsedTime() - start);
1284   _total_cleanup_time += recent_cleanup_time;
1285   _cleanup_times.add(recent_cleanup_time);
1286 
1287   {
1288     GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup");
1289     _g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1290   }
1291 }
1292 
1293 // Supporting Object and Oop closures for reference discovery
1294 // and processing in during marking
1295 
1296 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1297   HeapWord* addr = (HeapWord*)obj;
1298   return addr != NULL &&
1299          (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj));
1300 }
1301 
1302 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1303 // Uses the G1CMTask associated with a worker thread (for serial reference
1304 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1305 // trace referent objects.
1306 //
1307 // Using the G1CMTask and embedded local queues avoids having the worker
1308 // threads operating on the global mark stack. This reduces the risk
1309 // of overflowing the stack - which we would rather avoid at this late
1310 // state. Also using the tasks' local queues removes the potential
1311 // of the workers interfering with each other that could occur if
1312 // operating on the global stack.
1313 
1314 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1315   G1ConcurrentMark* _cm;
1316   G1CMTask*         _task;
1317   int               _ref_counter_limit;
1318   int               _ref_counter;
1319   bool              _is_serial;
1320 public:
1321   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1322     _cm(cm), _task(task), _is_serial(is_serial),
1323     _ref_counter_limit(G1RefProcDrainInterval) {
1324     assert(_ref_counter_limit > 0, "sanity");
1325     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1326     _ref_counter = _ref_counter_limit;
1327   }
1328 
1329   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1330   virtual void do_oop(      oop* p) { do_oop_work(p); }
1331 
1332   template <class T> void do_oop_work(T* p) {
1333     if (!_cm->has_overflown()) {
1334       _task->deal_with_reference(p);
1335       _ref_counter--;
1336 
1337       if (_ref_counter == 0) {
1338         // We have dealt with _ref_counter_limit references, pushing them
1339         // and objects reachable from them on to the local stack (and
1340         // possibly the global stack). Call G1CMTask::do_marking_step() to
1341         // process these entries.
1342         //
1343         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1344         // there's nothing more to do (i.e. we're done with the entries that
1345         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1346         // above) or we overflow.
1347         //
1348         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1349         // flag while there may still be some work to do. (See the comment at
1350         // the beginning of G1CMTask::do_marking_step() for those conditions -
1351         // one of which is reaching the specified time target.) It is only
1352         // when G1CMTask::do_marking_step() returns without setting the
1353         // has_aborted() flag that the marking step has completed.
1354         do {
1355           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1356           _task->do_marking_step(mark_step_duration_ms,
1357                                  false      /* do_termination */,
1358                                  _is_serial);
1359         } while (_task->has_aborted() && !_cm->has_overflown());
1360         _ref_counter = _ref_counter_limit;
1361       }
1362     }
1363   }
1364 };
1365 
1366 // 'Drain' oop closure used by both serial and parallel reference processing.
1367 // Uses the G1CMTask associated with a given worker thread (for serial
1368 // reference processing the G1CMtask for worker 0 is used). Calls the
1369 // do_marking_step routine, with an unbelievably large timeout value,
1370 // to drain the marking data structures of the remaining entries
1371 // added by the 'keep alive' oop closure above.
1372 
1373 class G1CMDrainMarkingStackClosure : public VoidClosure {
1374   G1ConcurrentMark* _cm;
1375   G1CMTask*         _task;
1376   bool              _is_serial;
1377  public:
1378   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1379     _cm(cm), _task(task), _is_serial(is_serial) {
1380     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1381   }
1382 
1383   void do_void() {
1384     do {
1385       // We call G1CMTask::do_marking_step() to completely drain the local
1386       // and global marking stacks of entries pushed by the 'keep alive'
1387       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1388       //
1389       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1390       // if there's nothing more to do (i.e. we've completely drained the
1391       // entries that were pushed as a a result of applying the 'keep alive'
1392       // closure to the entries on the discovered ref lists) or we overflow
1393       // the global marking stack.
1394       //
1395       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1396       // flag while there may still be some work to do. (See the comment at
1397       // the beginning of G1CMTask::do_marking_step() for those conditions -
1398       // one of which is reaching the specified time target.) It is only
1399       // when G1CMTask::do_marking_step() returns without setting the
1400       // has_aborted() flag that the marking step has completed.
1401 
1402       _task->do_marking_step(1000000000.0 /* something very large */,
1403                              true         /* do_termination */,
1404                              _is_serial);
1405     } while (_task->has_aborted() && !_cm->has_overflown());
1406   }
1407 };
1408 
1409 // Implementation of AbstractRefProcTaskExecutor for parallel
1410 // reference processing at the end of G1 concurrent marking
1411 
1412 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1413 private:
1414   G1CollectedHeap*  _g1h;
1415   G1ConcurrentMark* _cm;
1416   WorkGang*         _workers;
1417   uint              _active_workers;
1418 
1419 public:
1420   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1421                           G1ConcurrentMark* cm,
1422                           WorkGang* workers,
1423                           uint n_workers) :
1424     _g1h(g1h), _cm(cm),
1425     _workers(workers), _active_workers(n_workers) { }
1426 
1427   // Executes the given task using concurrent marking worker threads.
1428   virtual void execute(ProcessTask& task);
1429   virtual void execute(EnqueueTask& task);
1430 };
1431 
1432 class G1CMRefProcTaskProxy : public AbstractGangTask {
1433   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1434   ProcessTask&      _proc_task;
1435   G1CollectedHeap*  _g1h;
1436   G1ConcurrentMark* _cm;
1437 
1438 public:
1439   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1440                        G1CollectedHeap* g1h,
1441                        G1ConcurrentMark* cm) :
1442     AbstractGangTask("Process reference objects in parallel"),
1443     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1444     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1445     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1446   }
1447 
1448   virtual void work(uint worker_id) {
1449     ResourceMark rm;
1450     HandleMark hm;
1451     G1CMTask* task = _cm->task(worker_id);
1452     G1CMIsAliveClosure g1_is_alive(_g1h);
1453     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1454     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1455 
1456     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1457   }
1458 };
1459 
1460 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1461   assert(_workers != NULL, "Need parallel worker threads.");
1462   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1463 
1464   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1465 
1466   // We need to reset the concurrency level before each
1467   // proxy task execution, so that the termination protocol
1468   // and overflow handling in G1CMTask::do_marking_step() knows
1469   // how many workers to wait for.
1470   _cm->set_concurrency(_active_workers);
1471   _workers->run_task(&proc_task_proxy);
1472 }
1473 
1474 class G1CMRefEnqueueTaskProxy : public AbstractGangTask {
1475   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1476   EnqueueTask& _enq_task;
1477 
1478 public:
1479   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1480     AbstractGangTask("Enqueue reference objects in parallel"),
1481     _enq_task(enq_task) { }
1482 
1483   virtual void work(uint worker_id) {
1484     _enq_task.work(worker_id);
1485   }
1486 };
1487 
1488 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1489   assert(_workers != NULL, "Need parallel worker threads.");
1490   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1491 
1492   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1493 
1494   // Not strictly necessary but...
1495   //
1496   // We need to reset the concurrency level before each
1497   // proxy task execution, so that the termination protocol
1498   // and overflow handling in G1CMTask::do_marking_step() knows
1499   // how many workers to wait for.
1500   _cm->set_concurrency(_active_workers);
1501   _workers->run_task(&enq_task_proxy);
1502 }
1503 
1504 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1505   ResourceMark rm;
1506   HandleMark   hm;
1507 
1508   // Is alive closure.
1509   G1CMIsAliveClosure g1_is_alive(_g1h);
1510 
1511   // Inner scope to exclude the cleaning of the string and symbol
1512   // tables from the displayed time.
1513   {
1514     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1515 
1516     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1517 
1518     // See the comment in G1CollectedHeap::ref_processing_init()
1519     // about how reference processing currently works in G1.
1520 
1521     // Set the soft reference policy
1522     rp->setup_policy(clear_all_soft_refs);
1523     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1524 
1525     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1526     // in serial reference processing. Note these closures are also
1527     // used for serially processing (by the the current thread) the
1528     // JNI references during parallel reference processing.
1529     //
1530     // These closures do not need to synchronize with the worker
1531     // threads involved in parallel reference processing as these
1532     // instances are executed serially by the current thread (e.g.
1533     // reference processing is not multi-threaded and is thus
1534     // performed by the current thread instead of a gang worker).
1535     //
1536     // The gang tasks involved in parallel reference processing create
1537     // their own instances of these closures, which do their own
1538     // synchronization among themselves.
1539     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1540     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1541 
1542     // We need at least one active thread. If reference processing
1543     // is not multi-threaded we use the current (VMThread) thread,
1544     // otherwise we use the work gang from the G1CollectedHeap and
1545     // we utilize all the worker threads we can.
1546     bool processing_is_mt = rp->processing_is_mt();
1547     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1548     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1549 
1550     // Parallel processing task executor.
1551     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1552                                               _g1h->workers(), active_workers);
1553     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1554 
1555     // Set the concurrency level. The phase was already set prior to
1556     // executing the remark task.
1557     set_concurrency(active_workers);
1558 
1559     // Set the degree of MT processing here.  If the discovery was done MT,
1560     // the number of threads involved during discovery could differ from
1561     // the number of active workers.  This is OK as long as the discovered
1562     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1563     rp->set_active_mt_degree(active_workers);
1564 
1565     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1566 
1567     // Process the weak references.
1568     const ReferenceProcessorStats& stats =
1569         rp->process_discovered_references(&g1_is_alive,
1570                                           &g1_keep_alive,
1571                                           &g1_drain_mark_stack,
1572                                           executor,
1573                                           &pt);
1574     _gc_tracer_cm->report_gc_reference_stats(stats);
1575     pt.print_all_references();
1576 
1577     // The do_oop work routines of the keep_alive and drain_marking_stack
1578     // oop closures will set the has_overflown flag if we overflow the
1579     // global marking stack.
1580 
1581     assert(has_overflown() || _global_mark_stack.is_empty(),
1582            "Mark stack should be empty (unless it has overflown)");
1583 
1584     assert(rp->num_q() == active_workers, "why not");
1585 
1586     rp->enqueue_discovered_references(executor, &pt);
1587 
1588     rp->verify_no_references_recorded();
1589 
1590     pt.print_enqueue_phase();
1591 
1592     assert(!rp->discovery_enabled(), "Post condition");
1593   }
1594 
1595   assert(has_overflown() || _global_mark_stack.is_empty(),
1596          "Mark stack should be empty (unless it has overflown)");
1597 
1598   {
1599     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1600     WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
1601   }
1602 
1603   if (has_overflown()) {
1604     // We can not trust g1_is_alive if the marking stack overflowed
1605     return;
1606   }
1607 
1608   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1609 
1610   // Unload Klasses, String, Symbols, Code Cache, etc.
1611   if (ClassUnloadingWithConcurrentMark) {
1612     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1613     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1614     _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1615   } else {
1616     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1617     // No need to clean string table and symbol table as they are treated as strong roots when
1618     // class unloading is disabled.
1619     _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1620   }
1621 }
1622 
1623 void G1ConcurrentMark::report_object_count() {
1624   G1CMIsAliveClosure is_alive(_g1h);
1625   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1626 }
1627 
1628 void G1ConcurrentMark::swap_mark_bitmaps() {
1629   G1CMBitMap* temp = _prev_mark_bitmap;
1630   _prev_mark_bitmap = _next_mark_bitmap;
1631   _next_mark_bitmap = temp;
1632   _g1h->collector_state()->set_clearing_next_bitmap(true);
1633 }
1634 
1635 // Closure for marking entries in SATB buffers.
1636 class G1CMSATBBufferClosure : public SATBBufferClosure {
1637 private:
1638   G1CMTask* _task;
1639   G1CollectedHeap* _g1h;
1640 
1641   // This is very similar to G1CMTask::deal_with_reference, but with
1642   // more relaxed requirements for the argument, so this must be more
1643   // circumspect about treating the argument as an object.
1644   void do_entry(void* entry) const {
1645     _task->increment_refs_reached();
1646     oop const obj = static_cast<oop>(entry);
1647     _task->make_reference_grey(obj);
1648   }
1649 
1650 public:
1651   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1652     : _task(task), _g1h(g1h) { }
1653 
1654   virtual void do_buffer(void** buffer, size_t size) {
1655     for (size_t i = 0; i < size; ++i) {
1656       do_entry(buffer[i]);
1657     }
1658   }
1659 };
1660 
1661 class G1RemarkThreadsClosure : public ThreadClosure {
1662   G1CMSATBBufferClosure _cm_satb_cl;
1663   G1CMOopClosure _cm_cl;
1664   MarkingCodeBlobClosure _code_cl;
1665   int _thread_parity;
1666 
1667  public:
1668   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1669     _cm_satb_cl(task, g1h),
1670     _cm_cl(g1h, task),
1671     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1672     _thread_parity(Threads::thread_claim_parity()) {}
1673 
1674   void do_thread(Thread* thread) {
1675     if (thread->is_Java_thread()) {
1676       if (thread->claim_oops_do(true, _thread_parity)) {
1677         JavaThread* jt = (JavaThread*)thread;
1678 
1679         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1680         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1681         // * Alive if on the stack of an executing method
1682         // * Weakly reachable otherwise
1683         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1684         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1685         jt->nmethods_do(&_code_cl);
1686 
1687         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1688       }
1689     } else if (thread->is_VM_thread()) {
1690       if (thread->claim_oops_do(true, _thread_parity)) {
1691         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1692       }
1693     }
1694   }
1695 };
1696 
1697 class G1CMRemarkTask : public AbstractGangTask {
1698   G1ConcurrentMark* _cm;
1699 public:
1700   void work(uint worker_id) {
1701     G1CMTask* task = _cm->task(worker_id);
1702     task->record_start_time();
1703     {
1704       ResourceMark rm;
1705       HandleMark hm;
1706 
1707       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1708       Threads::threads_do(&threads_f);
1709     }
1710 
1711     do {
1712       task->do_marking_step(1000000000.0 /* something very large */,
1713                             true         /* do_termination       */,
1714                             false        /* is_serial            */);
1715     } while (task->has_aborted() && !_cm->has_overflown());
1716     // If we overflow, then we do not want to restart. We instead
1717     // want to abort remark and do concurrent marking again.
1718     task->record_end_time();
1719   }
1720 
1721   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1722     AbstractGangTask("Par Remark"), _cm(cm) {
1723     _cm->terminator()->reset_for_reuse(active_workers);
1724   }
1725 };
1726 
1727 void G1ConcurrentMark::finalize_marking() {
1728   ResourceMark rm;
1729   HandleMark   hm;
1730 
1731   _g1h->ensure_parsability(false);
1732 
1733   // this is remark, so we'll use up all active threads
1734   uint active_workers = _g1h->workers()->active_workers();
1735   set_concurrency_and_phase(active_workers, false /* concurrent */);
1736   // Leave _parallel_marking_threads at it's
1737   // value originally calculated in the G1ConcurrentMark
1738   // constructor and pass values of the active workers
1739   // through the gang in the task.
1740 
1741   {
1742     StrongRootsScope srs(active_workers);
1743 
1744     G1CMRemarkTask remarkTask(this, active_workers);
1745     // We will start all available threads, even if we decide that the
1746     // active_workers will be fewer. The extra ones will just bail out
1747     // immediately.
1748     _g1h->workers()->run_task(&remarkTask);
1749   }
1750 
1751   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1752   guarantee(has_overflown() ||
1753             satb_mq_set.completed_buffers_num() == 0,
1754             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1755             BOOL_TO_STR(has_overflown()),
1756             satb_mq_set.completed_buffers_num());
1757 
1758   print_stats();
1759 }
1760 
1761 void G1ConcurrentMark::flush_all_task_caches() {
1762   size_t hits = 0;
1763   size_t misses = 0;
1764   for (uint i = 0; i < _max_num_tasks; i++) {
1765     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1766     hits += stats.first;
1767     misses += stats.second;
1768   }
1769   size_t sum = hits + misses;
1770   log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1771                        hits, misses, percent_of(hits, sum));
1772 }
1773 
1774 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1775   _prev_mark_bitmap->clear_range(mr);
1776 }
1777 
1778 HeapRegion*
1779 G1ConcurrentMark::claim_region(uint worker_id) {
1780   // "checkpoint" the finger
1781   HeapWord* finger = _finger;
1782 
1783   while (finger < _heap.end()) {
1784     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1785 
1786     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1787     // Make sure that the reads below do not float before loading curr_region.
1788     OrderAccess::loadload();
1789     // Above heap_region_containing may return NULL as we always scan claim
1790     // until the end of the heap. In this case, just jump to the next region.
1791     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1792 
1793     // Is the gap between reading the finger and doing the CAS too long?
1794     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1795     if (res == finger && curr_region != NULL) {
1796       // we succeeded
1797       HeapWord*   bottom        = curr_region->bottom();
1798       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1799 
1800       // notice that _finger == end cannot be guaranteed here since,
1801       // someone else might have moved the finger even further
1802       assert(_finger >= end, "the finger should have moved forward");
1803 
1804       if (limit > bottom) {
1805         return curr_region;
1806       } else {
1807         assert(limit == bottom,
1808                "the region limit should be at bottom");
1809         // we return NULL and the caller should try calling
1810         // claim_region() again.
1811         return NULL;
1812       }
1813     } else {
1814       assert(_finger > finger, "the finger should have moved forward");
1815       // read it again
1816       finger = _finger;
1817     }
1818   }
1819 
1820   return NULL;
1821 }
1822 
1823 #ifndef PRODUCT
1824 class VerifyNoCSetOops {
1825   G1CollectedHeap* _g1h;
1826   const char* _phase;
1827   int _info;
1828 
1829 public:
1830   VerifyNoCSetOops(const char* phase, int info = -1) :
1831     _g1h(G1CollectedHeap::heap()),
1832     _phase(phase),
1833     _info(info)
1834   { }
1835 
1836   void operator()(G1TaskQueueEntry task_entry) const {
1837     if (task_entry.is_array_slice()) {
1838       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1839       return;
1840     }
1841     guarantee(oopDesc::is_oop(task_entry.obj()),
1842               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1843               p2i(task_entry.obj()), _phase, _info);
1844     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1845               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1846               p2i(task_entry.obj()), _phase, _info);
1847   }
1848 };
1849 
1850 void G1ConcurrentMark::verify_no_cset_oops() {
1851   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1852   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1853     return;
1854   }
1855 
1856   // Verify entries on the global mark stack
1857   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1858 
1859   // Verify entries on the task queues
1860   for (uint i = 0; i < _max_num_tasks; ++i) {
1861     G1CMTaskQueue* queue = _task_queues->queue(i);
1862     queue->iterate(VerifyNoCSetOops("Queue", i));
1863   }
1864 
1865   // Verify the global finger
1866   HeapWord* global_finger = finger();
1867   if (global_finger != NULL && global_finger < _heap.end()) {
1868     // Since we always iterate over all regions, we might get a NULL HeapRegion
1869     // here.
1870     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1871     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1872               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1873               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1874   }
1875 
1876   // Verify the task fingers
1877   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1878   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1879     G1CMTask* task = _tasks[i];
1880     HeapWord* task_finger = task->finger();
1881     if (task_finger != NULL && task_finger < _heap.end()) {
1882       // See above note on the global finger verification.
1883       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1884       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1885                 !task_hr->in_collection_set(),
1886                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1887                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1888     }
1889   }
1890 }
1891 #endif // PRODUCT
1892 
1893 void G1ConcurrentMark::rebuild_rem_set_concurrently() {
1894   _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
1895 }
1896 
1897 void G1ConcurrentMark::print_stats() {
1898   if (!log_is_enabled(Debug, gc, stats)) {
1899     return;
1900   }
1901   log_debug(gc, stats)("---------------------------------------------------------------------");
1902   for (size_t i = 0; i < _num_active_tasks; ++i) {
1903     _tasks[i]->print_stats();
1904     log_debug(gc, stats)("---------------------------------------------------------------------");
1905   }
1906 }
1907 
1908 void G1ConcurrentMark::concurrent_cycle_abort() {
1909   if (!cm_thread()->during_cycle() || _has_aborted) {
1910     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1911     return;
1912   }
1913 
1914   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1915   // concurrent bitmap clearing.
1916   {
1917     GCTraceTime(Debug, gc)("Clear Next Bitmap");
1918     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1919   }
1920   // Note we cannot clear the previous marking bitmap here
1921   // since VerifyDuringGC verifies the objects marked during
1922   // a full GC against the previous bitmap.
1923 
1924   // Empty mark stack
1925   reset_marking_for_restart();
1926   for (uint i = 0; i < _max_num_tasks; ++i) {
1927     _tasks[i]->clear_region_fields();
1928   }
1929   _first_overflow_barrier_sync.abort();
1930   _second_overflow_barrier_sync.abort();
1931   _has_aborted = true;
1932 
1933   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1934   satb_mq_set.abandon_partial_marking();
1935   // This can be called either during or outside marking, we'll read
1936   // the expected_active value from the SATB queue set.
1937   satb_mq_set.set_active_all_threads(
1938                                  false, /* new active value */
1939                                  satb_mq_set.is_active() /* expected_active */);
1940 }
1941 
1942 static void print_ms_time_info(const char* prefix, const char* name,
1943                                NumberSeq& ns) {
1944   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1945                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1946   if (ns.num() > 0) {
1947     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1948                            prefix, ns.sd(), ns.maximum());
1949   }
1950 }
1951 
1952 void G1ConcurrentMark::print_summary_info() {
1953   Log(gc, marking) log;
1954   if (!log.is_trace()) {
1955     return;
1956   }
1957 
1958   log.trace(" Concurrent marking:");
1959   print_ms_time_info("  ", "init marks", _init_times);
1960   print_ms_time_info("  ", "remarks", _remark_times);
1961   {
1962     print_ms_time_info("     ", "final marks", _remark_mark_times);
1963     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
1964 
1965   }
1966   print_ms_time_info("  ", "cleanups", _cleanup_times);
1967   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
1968             _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
1969   log.trace("  Total stop_world time = %8.2f s.",
1970             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
1971   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
1972             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
1973 }
1974 
1975 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
1976   _concurrent_workers->print_worker_threads_on(st);
1977 }
1978 
1979 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
1980   _concurrent_workers->threads_do(tc);
1981 }
1982 
1983 void G1ConcurrentMark::print_on_error(outputStream* st) const {
1984   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
1985                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
1986   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
1987   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
1988 }
1989 
1990 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
1991   ReferenceProcessor* result = g1h->ref_processor_cm();
1992   assert(result != NULL, "CM reference processor should not be NULL");
1993   return result;
1994 }
1995 
1996 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
1997                                G1CMTask* task)
1998   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
1999     _g1h(g1h), _task(task)
2000 { }
2001 
2002 void G1CMTask::setup_for_region(HeapRegion* hr) {
2003   assert(hr != NULL,
2004         "claim_region() should have filtered out NULL regions");
2005   _curr_region  = hr;
2006   _finger       = hr->bottom();
2007   update_region_limit();
2008 }
2009 
2010 void G1CMTask::update_region_limit() {
2011   HeapRegion* hr            = _curr_region;
2012   HeapWord* bottom          = hr->bottom();
2013   HeapWord* limit           = hr->next_top_at_mark_start();
2014 
2015   if (limit == bottom) {
2016     // The region was collected underneath our feet.
2017     // We set the finger to bottom to ensure that the bitmap
2018     // iteration that will follow this will not do anything.
2019     // (this is not a condition that holds when we set the region up,
2020     // as the region is not supposed to be empty in the first place)
2021     _finger = bottom;
2022   } else if (limit >= _region_limit) {
2023     assert(limit >= _finger, "peace of mind");
2024   } else {
2025     assert(limit < _region_limit, "only way to get here");
2026     // This can happen under some pretty unusual circumstances.  An
2027     // evacuation pause empties the region underneath our feet (NTAMS
2028     // at bottom). We then do some allocation in the region (NTAMS
2029     // stays at bottom), followed by the region being used as a GC
2030     // alloc region (NTAMS will move to top() and the objects
2031     // originally below it will be grayed). All objects now marked in
2032     // the region are explicitly grayed, if below the global finger,
2033     // and we do not need in fact to scan anything else. So, we simply
2034     // set _finger to be limit to ensure that the bitmap iteration
2035     // doesn't do anything.
2036     _finger = limit;
2037   }
2038 
2039   _region_limit = limit;
2040 }
2041 
2042 void G1CMTask::giveup_current_region() {
2043   assert(_curr_region != NULL, "invariant");
2044   clear_region_fields();
2045 }
2046 
2047 void G1CMTask::clear_region_fields() {
2048   // Values for these three fields that indicate that we're not
2049   // holding on to a region.
2050   _curr_region   = NULL;
2051   _finger        = NULL;
2052   _region_limit  = NULL;
2053 }
2054 
2055 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2056   if (cm_oop_closure == NULL) {
2057     assert(_cm_oop_closure != NULL, "invariant");
2058   } else {
2059     assert(_cm_oop_closure == NULL, "invariant");
2060   }
2061   _cm_oop_closure = cm_oop_closure;
2062 }
2063 
2064 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2065   guarantee(next_mark_bitmap != NULL, "invariant");
2066   _next_mark_bitmap              = next_mark_bitmap;
2067   clear_region_fields();
2068 
2069   _calls                         = 0;
2070   _elapsed_time_ms               = 0.0;
2071   _termination_time_ms           = 0.0;
2072   _termination_start_time_ms     = 0.0;
2073 
2074   _mark_stats_cache.reset();
2075 }
2076 
2077 bool G1CMTask::should_exit_termination() {
2078   regular_clock_call();
2079   // This is called when we are in the termination protocol. We should
2080   // quit if, for some reason, this task wants to abort or the global
2081   // stack is not empty (this means that we can get work from it).
2082   return !_cm->mark_stack_empty() || has_aborted();
2083 }
2084 
2085 void G1CMTask::reached_limit() {
2086   assert(_words_scanned >= _words_scanned_limit ||
2087          _refs_reached >= _refs_reached_limit ,
2088          "shouldn't have been called otherwise");
2089   regular_clock_call();
2090 }
2091 
2092 void G1CMTask::regular_clock_call() {
2093   if (has_aborted()) {
2094     return;
2095   }
2096 
2097   // First, we need to recalculate the words scanned and refs reached
2098   // limits for the next clock call.
2099   recalculate_limits();
2100 
2101   // During the regular clock call we do the following
2102 
2103   // (1) If an overflow has been flagged, then we abort.
2104   if (_cm->has_overflown()) {
2105     set_has_aborted();
2106     return;
2107   }
2108 
2109   // If we are not concurrent (i.e. we're doing remark) we don't need
2110   // to check anything else. The other steps are only needed during
2111   // the concurrent marking phase.
2112   if (!_cm->concurrent()) {
2113     return;
2114   }
2115 
2116   // (2) If marking has been aborted for Full GC, then we also abort.
2117   if (_cm->has_aborted()) {
2118     set_has_aborted();
2119     return;
2120   }
2121 
2122   double curr_time_ms = os::elapsedVTime() * 1000.0;
2123 
2124   // (4) We check whether we should yield. If we have to, then we abort.
2125   if (SuspendibleThreadSet::should_yield()) {
2126     // We should yield. To do this we abort the task. The caller is
2127     // responsible for yielding.
2128     set_has_aborted();
2129     return;
2130   }
2131 
2132   // (5) We check whether we've reached our time quota. If we have,
2133   // then we abort.
2134   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2135   if (elapsed_time_ms > _time_target_ms) {
2136     set_has_aborted();
2137     _has_timed_out = true;
2138     return;
2139   }
2140 
2141   // (6) Finally, we check whether there are enough completed STAB
2142   // buffers available for processing. If there are, we abort.
2143   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2144   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2145     // we do need to process SATB buffers, we'll abort and restart
2146     // the marking task to do so
2147     set_has_aborted();
2148     return;
2149   }
2150 }
2151 
2152 void G1CMTask::recalculate_limits() {
2153   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2154   _words_scanned_limit      = _real_words_scanned_limit;
2155 
2156   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2157   _refs_reached_limit       = _real_refs_reached_limit;
2158 }
2159 
2160 void G1CMTask::decrease_limits() {
2161   // This is called when we believe that we're going to do an infrequent
2162   // operation which will increase the per byte scanned cost (i.e. move
2163   // entries to/from the global stack). It basically tries to decrease the
2164   // scanning limit so that the clock is called earlier.
2165 
2166   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2167   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2168 }
2169 
2170 void G1CMTask::move_entries_to_global_stack() {
2171   // Local array where we'll store the entries that will be popped
2172   // from the local queue.
2173   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2174 
2175   size_t n = 0;
2176   G1TaskQueueEntry task_entry;
2177   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2178     buffer[n] = task_entry;
2179     ++n;
2180   }
2181   if (n < G1CMMarkStack::EntriesPerChunk) {
2182     buffer[n] = G1TaskQueueEntry();
2183   }
2184 
2185   if (n > 0) {
2186     if (!_cm->mark_stack_push(buffer)) {
2187       set_has_aborted();
2188     }
2189   }
2190 
2191   // This operation was quite expensive, so decrease the limits.
2192   decrease_limits();
2193 }
2194 
2195 bool G1CMTask::get_entries_from_global_stack() {
2196   // Local array where we'll store the entries that will be popped
2197   // from the global stack.
2198   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2199 
2200   if (!_cm->mark_stack_pop(buffer)) {
2201     return false;
2202   }
2203 
2204   // We did actually pop at least one entry.
2205   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2206     G1TaskQueueEntry task_entry = buffer[i];
2207     if (task_entry.is_null()) {
2208       break;
2209     }
2210     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2211     bool success = _task_queue->push(task_entry);
2212     // We only call this when the local queue is empty or under a
2213     // given target limit. So, we do not expect this push to fail.
2214     assert(success, "invariant");
2215   }
2216 
2217   // This operation was quite expensive, so decrease the limits
2218   decrease_limits();
2219   return true;
2220 }
2221 
2222 void G1CMTask::drain_local_queue(bool partially) {
2223   if (has_aborted()) {
2224     return;
2225   }
2226 
2227   // Decide what the target size is, depending whether we're going to
2228   // drain it partially (so that other tasks can steal if they run out
2229   // of things to do) or totally (at the very end).
2230   size_t target_size;
2231   if (partially) {
2232     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2233   } else {
2234     target_size = 0;
2235   }
2236 
2237   if (_task_queue->size() > target_size) {
2238     G1TaskQueueEntry entry;
2239     bool ret = _task_queue->pop_local(entry);
2240     while (ret) {
2241       scan_task_entry(entry);
2242       if (_task_queue->size() <= target_size || has_aborted()) {
2243         ret = false;
2244       } else {
2245         ret = _task_queue->pop_local(entry);
2246       }
2247     }
2248   }
2249 }
2250 
2251 void G1CMTask::drain_global_stack(bool partially) {
2252   if (has_aborted()) {
2253     return;
2254   }
2255 
2256   // We have a policy to drain the local queue before we attempt to
2257   // drain the global stack.
2258   assert(partially || _task_queue->size() == 0, "invariant");
2259 
2260   // Decide what the target size is, depending whether we're going to
2261   // drain it partially (so that other tasks can steal if they run out
2262   // of things to do) or totally (at the very end).
2263   // Notice that when draining the global mark stack partially, due to the racyness
2264   // of the mark stack size update we might in fact drop below the target. But,
2265   // this is not a problem.
2266   // In case of total draining, we simply process until the global mark stack is
2267   // totally empty, disregarding the size counter.
2268   if (partially) {
2269     size_t const target_size = _cm->partial_mark_stack_size_target();
2270     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2271       if (get_entries_from_global_stack()) {
2272         drain_local_queue(partially);
2273       }
2274     }
2275   } else {
2276     while (!has_aborted() && get_entries_from_global_stack()) {
2277       drain_local_queue(partially);
2278     }
2279   }
2280 }
2281 
2282 // SATB Queue has several assumptions on whether to call the par or
2283 // non-par versions of the methods. this is why some of the code is
2284 // replicated. We should really get rid of the single-threaded version
2285 // of the code to simplify things.
2286 void G1CMTask::drain_satb_buffers() {
2287   if (has_aborted()) {
2288     return;
2289   }
2290 
2291   // We set this so that the regular clock knows that we're in the
2292   // middle of draining buffers and doesn't set the abort flag when it
2293   // notices that SATB buffers are available for draining. It'd be
2294   // very counter productive if it did that. :-)
2295   _draining_satb_buffers = true;
2296 
2297   G1CMSATBBufferClosure satb_cl(this, _g1h);
2298   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2299 
2300   // This keeps claiming and applying the closure to completed buffers
2301   // until we run out of buffers or we need to abort.
2302   while (!has_aborted() &&
2303          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2304     regular_clock_call();
2305   }
2306 
2307   _draining_satb_buffers = false;
2308 
2309   assert(has_aborted() ||
2310          _cm->concurrent() ||
2311          satb_mq_set.completed_buffers_num() == 0, "invariant");
2312 
2313   // again, this was a potentially expensive operation, decrease the
2314   // limits to get the regular clock call early
2315   decrease_limits();
2316 }
2317 
2318 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2319   _mark_stats_cache.reset(region_idx);
2320 }
2321 
2322 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2323   return _mark_stats_cache.evict_all();
2324 }
2325 
2326 void G1CMTask::print_stats() {
2327   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2328   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2329                        _elapsed_time_ms, _termination_time_ms);
2330   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2331                        _step_times_ms.num(),
2332                        _step_times_ms.avg(),
2333                        _step_times_ms.sd(),
2334                        _step_times_ms.maximum(),
2335                        _step_times_ms.sum());
2336   size_t const hits = _mark_stats_cache.hits();
2337   size_t const misses = _mark_stats_cache.misses();
2338   log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2339                        hits, misses, percent_of(hits, hits + misses));
2340 }
2341 
2342 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2343   return _task_queues->steal(worker_id, hash_seed, task_entry);
2344 }
2345 
2346 /*****************************************************************************
2347 
2348     The do_marking_step(time_target_ms, ...) method is the building
2349     block of the parallel marking framework. It can be called in parallel
2350     with other invocations of do_marking_step() on different tasks
2351     (but only one per task, obviously) and concurrently with the
2352     mutator threads, or during remark, hence it eliminates the need
2353     for two versions of the code. When called during remark, it will
2354     pick up from where the task left off during the concurrent marking
2355     phase. Interestingly, tasks are also claimable during evacuation
2356     pauses too, since do_marking_step() ensures that it aborts before
2357     it needs to yield.
2358 
2359     The data structures that it uses to do marking work are the
2360     following:
2361 
2362       (1) Marking Bitmap. If there are gray objects that appear only
2363       on the bitmap (this happens either when dealing with an overflow
2364       or when the initial marking phase has simply marked the roots
2365       and didn't push them on the stack), then tasks claim heap
2366       regions whose bitmap they then scan to find gray objects. A
2367       global finger indicates where the end of the last claimed region
2368       is. A local finger indicates how far into the region a task has
2369       scanned. The two fingers are used to determine how to gray an
2370       object (i.e. whether simply marking it is OK, as it will be
2371       visited by a task in the future, or whether it needs to be also
2372       pushed on a stack).
2373 
2374       (2) Local Queue. The local queue of the task which is accessed
2375       reasonably efficiently by the task. Other tasks can steal from
2376       it when they run out of work. Throughout the marking phase, a
2377       task attempts to keep its local queue short but not totally
2378       empty, so that entries are available for stealing by other
2379       tasks. Only when there is no more work, a task will totally
2380       drain its local queue.
2381 
2382       (3) Global Mark Stack. This handles local queue overflow. During
2383       marking only sets of entries are moved between it and the local
2384       queues, as access to it requires a mutex and more fine-grain
2385       interaction with it which might cause contention. If it
2386       overflows, then the marking phase should restart and iterate
2387       over the bitmap to identify gray objects. Throughout the marking
2388       phase, tasks attempt to keep the global mark stack at a small
2389       length but not totally empty, so that entries are available for
2390       popping by other tasks. Only when there is no more work, tasks
2391       will totally drain the global mark stack.
2392 
2393       (4) SATB Buffer Queue. This is where completed SATB buffers are
2394       made available. Buffers are regularly removed from this queue
2395       and scanned for roots, so that the queue doesn't get too
2396       long. During remark, all completed buffers are processed, as
2397       well as the filled in parts of any uncompleted buffers.
2398 
2399     The do_marking_step() method tries to abort when the time target
2400     has been reached. There are a few other cases when the
2401     do_marking_step() method also aborts:
2402 
2403       (1) When the marking phase has been aborted (after a Full GC).
2404 
2405       (2) When a global overflow (on the global stack) has been
2406       triggered. Before the task aborts, it will actually sync up with
2407       the other tasks to ensure that all the marking data structures
2408       (local queues, stacks, fingers etc.)  are re-initialized so that
2409       when do_marking_step() completes, the marking phase can
2410       immediately restart.
2411 
2412       (3) When enough completed SATB buffers are available. The
2413       do_marking_step() method only tries to drain SATB buffers right
2414       at the beginning. So, if enough buffers are available, the
2415       marking step aborts and the SATB buffers are processed at
2416       the beginning of the next invocation.
2417 
2418       (4) To yield. when we have to yield then we abort and yield
2419       right at the end of do_marking_step(). This saves us from a lot
2420       of hassle as, by yielding we might allow a Full GC. If this
2421       happens then objects will be compacted underneath our feet, the
2422       heap might shrink, etc. We save checking for this by just
2423       aborting and doing the yield right at the end.
2424 
2425     From the above it follows that the do_marking_step() method should
2426     be called in a loop (or, otherwise, regularly) until it completes.
2427 
2428     If a marking step completes without its has_aborted() flag being
2429     true, it means it has completed the current marking phase (and
2430     also all other marking tasks have done so and have all synced up).
2431 
2432     A method called regular_clock_call() is invoked "regularly" (in
2433     sub ms intervals) throughout marking. It is this clock method that
2434     checks all the abort conditions which were mentioned above and
2435     decides when the task should abort. A work-based scheme is used to
2436     trigger this clock method: when the number of object words the
2437     marking phase has scanned or the number of references the marking
2438     phase has visited reach a given limit. Additional invocations to
2439     the method clock have been planted in a few other strategic places
2440     too. The initial reason for the clock method was to avoid calling
2441     vtime too regularly, as it is quite expensive. So, once it was in
2442     place, it was natural to piggy-back all the other conditions on it
2443     too and not constantly check them throughout the code.
2444 
2445     If do_termination is true then do_marking_step will enter its
2446     termination protocol.
2447 
2448     The value of is_serial must be true when do_marking_step is being
2449     called serially (i.e. by the VMThread) and do_marking_step should
2450     skip any synchronization in the termination and overflow code.
2451     Examples include the serial remark code and the serial reference
2452     processing closures.
2453 
2454     The value of is_serial must be false when do_marking_step is
2455     being called by any of the worker threads in a work gang.
2456     Examples include the concurrent marking code (CMMarkingTask),
2457     the MT remark code, and the MT reference processing closures.
2458 
2459  *****************************************************************************/
2460 
2461 void G1CMTask::do_marking_step(double time_target_ms,
2462                                bool do_termination,
2463                                bool is_serial) {
2464   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2465 
2466   _start_time_ms = os::elapsedVTime() * 1000.0;
2467 
2468   // If do_stealing is true then do_marking_step will attempt to
2469   // steal work from the other G1CMTasks. It only makes sense to
2470   // enable stealing when the termination protocol is enabled
2471   // and do_marking_step() is not being called serially.
2472   bool do_stealing = do_termination && !is_serial;
2473 
2474   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2475   _time_target_ms = time_target_ms - diff_prediction_ms;
2476 
2477   // set up the variables that are used in the work-based scheme to
2478   // call the regular clock method
2479   _words_scanned = 0;
2480   _refs_reached  = 0;
2481   recalculate_limits();
2482 
2483   // clear all flags
2484   clear_has_aborted();
2485   _has_timed_out = false;
2486   _draining_satb_buffers = false;
2487 
2488   ++_calls;
2489 
2490   // Set up the bitmap and oop closures. Anything that uses them is
2491   // eventually called from this method, so it is OK to allocate these
2492   // statically.
2493   G1CMBitMapClosure bitmap_closure(this, _cm);
2494   G1CMOopClosure cm_oop_closure(_g1h, this);
2495   set_cm_oop_closure(&cm_oop_closure);
2496 
2497   if (_cm->has_overflown()) {
2498     // This can happen if the mark stack overflows during a GC pause
2499     // and this task, after a yield point, restarts. We have to abort
2500     // as we need to get into the overflow protocol which happens
2501     // right at the end of this task.
2502     set_has_aborted();
2503   }
2504 
2505   // First drain any available SATB buffers. After this, we will not
2506   // look at SATB buffers before the next invocation of this method.
2507   // If enough completed SATB buffers are queued up, the regular clock
2508   // will abort this task so that it restarts.
2509   drain_satb_buffers();
2510   // ...then partially drain the local queue and the global stack
2511   drain_local_queue(true);
2512   drain_global_stack(true);
2513 
2514   do {
2515     if (!has_aborted() && _curr_region != NULL) {
2516       // This means that we're already holding on to a region.
2517       assert(_finger != NULL, "if region is not NULL, then the finger "
2518              "should not be NULL either");
2519 
2520       // We might have restarted this task after an evacuation pause
2521       // which might have evacuated the region we're holding on to
2522       // underneath our feet. Let's read its limit again to make sure
2523       // that we do not iterate over a region of the heap that
2524       // contains garbage (update_region_limit() will also move
2525       // _finger to the start of the region if it is found empty).
2526       update_region_limit();
2527       // We will start from _finger not from the start of the region,
2528       // as we might be restarting this task after aborting half-way
2529       // through scanning this region. In this case, _finger points to
2530       // the address where we last found a marked object. If this is a
2531       // fresh region, _finger points to start().
2532       MemRegion mr = MemRegion(_finger, _region_limit);
2533 
2534       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2535              "humongous regions should go around loop once only");
2536 
2537       // Some special cases:
2538       // If the memory region is empty, we can just give up the region.
2539       // If the current region is humongous then we only need to check
2540       // the bitmap for the bit associated with the start of the object,
2541       // scan the object if it's live, and give up the region.
2542       // Otherwise, let's iterate over the bitmap of the part of the region
2543       // that is left.
2544       // If the iteration is successful, give up the region.
2545       if (mr.is_empty()) {
2546         giveup_current_region();
2547         regular_clock_call();
2548       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2549         if (_next_mark_bitmap->is_marked(mr.start())) {
2550           // The object is marked - apply the closure
2551           bitmap_closure.do_addr(mr.start());
2552         }
2553         // Even if this task aborted while scanning the humongous object
2554         // we can (and should) give up the current region.
2555         giveup_current_region();
2556         regular_clock_call();
2557       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2558         giveup_current_region();
2559         regular_clock_call();
2560       } else {
2561         assert(has_aborted(), "currently the only way to do so");
2562         // The only way to abort the bitmap iteration is to return
2563         // false from the do_bit() method. However, inside the
2564         // do_bit() method we move the _finger to point to the
2565         // object currently being looked at. So, if we bail out, we
2566         // have definitely set _finger to something non-null.
2567         assert(_finger != NULL, "invariant");
2568 
2569         // Region iteration was actually aborted. So now _finger
2570         // points to the address of the object we last scanned. If we
2571         // leave it there, when we restart this task, we will rescan
2572         // the object. It is easy to avoid this. We move the finger by
2573         // enough to point to the next possible object header.
2574         assert(_finger < _region_limit, "invariant");
2575         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2576         // Check if bitmap iteration was aborted while scanning the last object
2577         if (new_finger >= _region_limit) {
2578           giveup_current_region();
2579         } else {
2580           move_finger_to(new_finger);
2581         }
2582       }
2583     }
2584     // At this point we have either completed iterating over the
2585     // region we were holding on to, or we have aborted.
2586 
2587     // We then partially drain the local queue and the global stack.
2588     // (Do we really need this?)
2589     drain_local_queue(true);
2590     drain_global_stack(true);
2591 
2592     // Read the note on the claim_region() method on why it might
2593     // return NULL with potentially more regions available for
2594     // claiming and why we have to check out_of_regions() to determine
2595     // whether we're done or not.
2596     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2597       // We are going to try to claim a new region. We should have
2598       // given up on the previous one.
2599       // Separated the asserts so that we know which one fires.
2600       assert(_curr_region  == NULL, "invariant");
2601       assert(_finger       == NULL, "invariant");
2602       assert(_region_limit == NULL, "invariant");
2603       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2604       if (claimed_region != NULL) {
2605         // Yes, we managed to claim one
2606         setup_for_region(claimed_region);
2607         assert(_curr_region == claimed_region, "invariant");
2608       }
2609       // It is important to call the regular clock here. It might take
2610       // a while to claim a region if, for example, we hit a large
2611       // block of empty regions. So we need to call the regular clock
2612       // method once round the loop to make sure it's called
2613       // frequently enough.
2614       regular_clock_call();
2615     }
2616 
2617     if (!has_aborted() && _curr_region == NULL) {
2618       assert(_cm->out_of_regions(),
2619              "at this point we should be out of regions");
2620     }
2621   } while ( _curr_region != NULL && !has_aborted());
2622 
2623   if (!has_aborted()) {
2624     // We cannot check whether the global stack is empty, since other
2625     // tasks might be pushing objects to it concurrently.
2626     assert(_cm->out_of_regions(),
2627            "at this point we should be out of regions");
2628     // Try to reduce the number of available SATB buffers so that
2629     // remark has less work to do.
2630     drain_satb_buffers();
2631   }
2632 
2633   // Since we've done everything else, we can now totally drain the
2634   // local queue and global stack.
2635   drain_local_queue(false);
2636   drain_global_stack(false);
2637 
2638   // Attempt at work stealing from other task's queues.
2639   if (do_stealing && !has_aborted()) {
2640     // We have not aborted. This means that we have finished all that
2641     // we could. Let's try to do some stealing...
2642 
2643     // We cannot check whether the global stack is empty, since other
2644     // tasks might be pushing objects to it concurrently.
2645     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2646            "only way to reach here");
2647     while (!has_aborted()) {
2648       G1TaskQueueEntry entry;
2649       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2650         scan_task_entry(entry);
2651 
2652         // And since we're towards the end, let's totally drain the
2653         // local queue and global stack.
2654         drain_local_queue(false);
2655         drain_global_stack(false);
2656       } else {
2657         break;
2658       }
2659     }
2660   }
2661 
2662   // We still haven't aborted. Now, let's try to get into the
2663   // termination protocol.
2664   if (do_termination && !has_aborted()) {
2665     // We cannot check whether the global stack is empty, since other
2666     // tasks might be concurrently pushing objects on it.
2667     // Separated the asserts so that we know which one fires.
2668     assert(_cm->out_of_regions(), "only way to reach here");
2669     assert(_task_queue->size() == 0, "only way to reach here");
2670     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2671 
2672     // The G1CMTask class also extends the TerminatorTerminator class,
2673     // hence its should_exit_termination() method will also decide
2674     // whether to exit the termination protocol or not.
2675     bool finished = (is_serial ||
2676                      _cm->terminator()->offer_termination(this));
2677     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2678     _termination_time_ms +=
2679       termination_end_time_ms - _termination_start_time_ms;
2680 
2681     if (finished) {
2682       // We're all done.
2683 
2684       // We can now guarantee that the global stack is empty, since
2685       // all other tasks have finished. We separated the guarantees so
2686       // that, if a condition is false, we can immediately find out
2687       // which one.
2688       guarantee(_cm->out_of_regions(), "only way to reach here");
2689       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2690       guarantee(_task_queue->size() == 0, "only way to reach here");
2691       guarantee(!_cm->has_overflown(), "only way to reach here");
2692     } else {
2693       // Apparently there's more work to do. Let's abort this task. It
2694       // will restart it and we can hopefully find more things to do.
2695       set_has_aborted();
2696     }
2697   }
2698 
2699   // Mainly for debugging purposes to make sure that a pointer to the
2700   // closure which was statically allocated in this frame doesn't
2701   // escape it by accident.
2702   set_cm_oop_closure(NULL);
2703   double end_time_ms = os::elapsedVTime() * 1000.0;
2704   double elapsed_time_ms = end_time_ms - _start_time_ms;
2705   // Update the step history.
2706   _step_times_ms.add(elapsed_time_ms);
2707 
2708   if (has_aborted()) {
2709     // The task was aborted for some reason.
2710     if (_has_timed_out) {
2711       double diff_ms = elapsed_time_ms - _time_target_ms;
2712       // Keep statistics of how well we did with respect to hitting
2713       // our target only if we actually timed out (if we aborted for
2714       // other reasons, then the results might get skewed).
2715       _marking_step_diffs_ms.add(diff_ms);
2716     }
2717 
2718     if (_cm->has_overflown()) {
2719       // This is the interesting one. We aborted because a global
2720       // overflow was raised. This means we have to restart the
2721       // marking phase and start iterating over regions. However, in
2722       // order to do this we have to make sure that all tasks stop
2723       // what they are doing and re-initialize in a safe manner. We
2724       // will achieve this with the use of two barrier sync points.
2725 
2726       if (!is_serial) {
2727         // We only need to enter the sync barrier if being called
2728         // from a parallel context
2729         _cm->enter_first_sync_barrier(_worker_id);
2730 
2731         // When we exit this sync barrier we know that all tasks have
2732         // stopped doing marking work. So, it's now safe to
2733         // re-initialize our data structures.
2734       }
2735 
2736       clear_region_fields();
2737       flush_mark_stats_cache();
2738 
2739       if (!is_serial) {
2740         // If we're executing the concurrent phase of marking, reset the marking
2741         // state; otherwise the marking state is reset after reference processing,
2742         // during the remark pause.
2743         // If we reset here as a result of an overflow during the remark we will
2744         // see assertion failures from any subsequent set_concurrency_and_phase()
2745         // calls.
2746         if (_cm->concurrent() && _worker_id == 0) {
2747           // Worker 0 is responsible for clearing the global data structures because
2748           // of an overflow. During STW we should not clear the overflow flag (in
2749           // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2750           // method to abort the pause and restart concurrent marking.
2751           _cm->reset_marking_for_restart();
2752 
2753           log_info(gc, marking)("Concurrent Mark reset for overflow");
2754         }
2755 
2756         // ...and enter the second barrier.
2757         _cm->enter_second_sync_barrier(_worker_id);
2758       }
2759       // At this point, if we're during the concurrent phase of
2760       // marking, everything has been re-initialized and we're
2761       // ready to restart.
2762     }
2763   }
2764 }
2765 
2766 G1CMTask::G1CMTask(uint worker_id,
2767                    G1ConcurrentMark* cm,
2768                    G1CMTaskQueue* task_queue,
2769                    G1RegionMarkStats* mark_stats,
2770                    uint max_regions) :
2771   _objArray_processor(this),
2772   _worker_id(worker_id),
2773   _g1h(G1CollectedHeap::heap()),
2774   _cm(cm),
2775   _next_mark_bitmap(NULL),
2776   _task_queue(task_queue),
2777   _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2778   _calls(0),
2779   _time_target_ms(0.0),
2780   _start_time_ms(0.0),
2781   _cm_oop_closure(NULL),
2782   _curr_region(NULL),
2783   _finger(NULL),
2784   _region_limit(NULL),
2785   _words_scanned(0),
2786   _words_scanned_limit(0),
2787   _real_words_scanned_limit(0),
2788   _refs_reached(0),
2789   _refs_reached_limit(0),
2790   _real_refs_reached_limit(0),
2791   _hash_seed(17),
2792   _has_aborted(false),
2793   _has_timed_out(false),
2794   _draining_satb_buffers(false),
2795   _step_times_ms(),
2796   _elapsed_time_ms(0.0),
2797   _termination_time_ms(0.0),
2798   _termination_start_time_ms(0.0),
2799   _marking_step_diffs_ms()
2800 {
2801   guarantee(task_queue != NULL, "invariant");
2802 
2803   _marking_step_diffs_ms.add(0.5);
2804 }
2805 
2806 // These are formatting macros that are used below to ensure
2807 // consistent formatting. The *_H_* versions are used to format the
2808 // header for a particular value and they should be kept consistent
2809 // with the corresponding macro. Also note that most of the macros add
2810 // the necessary white space (as a prefix) which makes them a bit
2811 // easier to compose.
2812 
2813 // All the output lines are prefixed with this string to be able to
2814 // identify them easily in a large log file.
2815 #define G1PPRL_LINE_PREFIX            "###"
2816 
2817 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2818 #ifdef _LP64
2819 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2820 #else // _LP64
2821 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2822 #endif // _LP64
2823 
2824 // For per-region info
2825 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2826 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2827 #define G1PPRL_STATE_FORMAT           "   %-5s"
2828 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2829 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2830 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2831 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2832 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2833 
2834 // For summary info
2835 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2836 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2837 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2838 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2839 
2840 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2841   _total_used_bytes(0), _total_capacity_bytes(0),
2842   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2843   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2844 {
2845   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2846   MemRegion g1_reserved = g1h->g1_reserved();
2847   double now = os::elapsedTime();
2848 
2849   // Print the header of the output.
2850   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2851   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2852                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2853                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2854                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2855                           HeapRegion::GrainBytes);
2856   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2857   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2858                           G1PPRL_TYPE_H_FORMAT
2859                           G1PPRL_ADDR_BASE_H_FORMAT
2860                           G1PPRL_BYTE_H_FORMAT
2861                           G1PPRL_BYTE_H_FORMAT
2862                           G1PPRL_BYTE_H_FORMAT
2863                           G1PPRL_DOUBLE_H_FORMAT
2864                           G1PPRL_BYTE_H_FORMAT
2865                           G1PPRL_STATE_H_FORMAT
2866                           G1PPRL_BYTE_H_FORMAT,
2867                           "type", "address-range",
2868                           "used", "prev-live", "next-live", "gc-eff",
2869                           "remset", "state", "code-roots");
2870   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2871                           G1PPRL_TYPE_H_FORMAT
2872                           G1PPRL_ADDR_BASE_H_FORMAT
2873                           G1PPRL_BYTE_H_FORMAT
2874                           G1PPRL_BYTE_H_FORMAT
2875                           G1PPRL_BYTE_H_FORMAT
2876                           G1PPRL_DOUBLE_H_FORMAT
2877                           G1PPRL_BYTE_H_FORMAT
2878                           G1PPRL_STATE_H_FORMAT
2879                           G1PPRL_BYTE_H_FORMAT,
2880                           "", "",
2881                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2882                           "(bytes)", "", "(bytes)");
2883 }
2884 
2885 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2886   const char* type       = r->get_type_str();
2887   HeapWord* bottom       = r->bottom();
2888   HeapWord* end          = r->end();
2889   size_t capacity_bytes  = r->capacity();
2890   size_t used_bytes      = r->used();
2891   size_t prev_live_bytes = r->live_bytes();
2892   size_t next_live_bytes = r->next_live_bytes();
2893   double gc_eff          = r->gc_efficiency();
2894   size_t remset_bytes    = r->rem_set()->mem_size();
2895   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2896   const char* remset_type = r->rem_set()->get_short_state_str();
2897 
2898   _total_used_bytes      += used_bytes;
2899   _total_capacity_bytes  += capacity_bytes;
2900   _total_prev_live_bytes += prev_live_bytes;
2901   _total_next_live_bytes += next_live_bytes;
2902   _total_remset_bytes    += remset_bytes;
2903   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2904 
2905   // Print a line for this particular region.
2906   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2907                           G1PPRL_TYPE_FORMAT
2908                           G1PPRL_ADDR_BASE_FORMAT
2909                           G1PPRL_BYTE_FORMAT
2910                           G1PPRL_BYTE_FORMAT
2911                           G1PPRL_BYTE_FORMAT
2912                           G1PPRL_DOUBLE_FORMAT
2913                           G1PPRL_BYTE_FORMAT
2914                           G1PPRL_STATE_FORMAT
2915                           G1PPRL_BYTE_FORMAT,
2916                           type, p2i(bottom), p2i(end),
2917                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2918                           remset_bytes, remset_type, strong_code_roots_bytes);
2919 
2920   return false;
2921 }
2922 
2923 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2924   // add static memory usages to remembered set sizes
2925   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2926   // Print the footer of the output.
2927   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2928   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2929                          " SUMMARY"
2930                          G1PPRL_SUM_MB_FORMAT("capacity")
2931                          G1PPRL_SUM_MB_PERC_FORMAT("used")
2932                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
2933                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
2934                          G1PPRL_SUM_MB_FORMAT("remset")
2935                          G1PPRL_SUM_MB_FORMAT("code-roots"),
2936                          bytes_to_mb(_total_capacity_bytes),
2937                          bytes_to_mb(_total_used_bytes),
2938                          percent_of(_total_used_bytes, _total_capacity_bytes),
2939                          bytes_to_mb(_total_prev_live_bytes),
2940                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
2941                          bytes_to_mb(_total_next_live_bytes),
2942                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
2943                          bytes_to_mb(_total_remset_bytes),
2944                          bytes_to_mb(_total_strong_code_roots_bytes));
2945 }