1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1OopClosures.inline.hpp"
  34 #include "gc/g1/g1Policy.hpp"
  35 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionRemSet.hpp"
  39 #include "gc/g1/heapRegionSet.inline.hpp"
  40 #include "gc/shared/adaptiveSizePolicy.hpp"
  41 #include "gc/shared/gcId.hpp"
  42 #include "gc/shared/gcTimer.hpp"
  43 #include "gc/shared/gcTrace.hpp"
  44 #include "gc/shared/gcTraceTime.inline.hpp"
  45 #include "gc/shared/genOopClosures.inline.hpp"
  46 #include "gc/shared/referencePolicy.hpp"
  47 #include "gc/shared/strongRootsScope.hpp"
  48 #include "gc/shared/suspendibleThreadSet.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "gc/shared/weakProcessor.hpp"
  52 #include "include/jvm.h"
  53 #include "logging/log.hpp"
  54 #include "memory/allocation.hpp"
  55 #include "memory/resourceArea.hpp"
  56 #include "oops/access.inline.hpp"
  57 #include "oops/oop.inline.hpp"
  58 #include "runtime/atomic.hpp"
  59 #include "runtime/handles.inline.hpp"
  60 #include "runtime/java.hpp"
  61 #include "runtime/prefetch.inline.hpp"
  62 #include "services/memTracker.hpp"
  63 #include "utilities/align.hpp"
  64 #include "utilities/growableArray.hpp"
  65 
  66 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  67   assert(addr < _cm->finger(), "invariant");
  68   assert(addr >= _task->finger(), "invariant");
  69 
  70   // We move that task's local finger along.
  71   _task->move_finger_to(addr);
  72 
  73   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  74   // we only partially drain the local queue and global stack
  75   _task->drain_local_queue(true);
  76   _task->drain_global_stack(true);
  77 
  78   // if the has_aborted flag has been raised, we need to bail out of
  79   // the iteration
  80   return !_task->has_aborted();
  81 }
  82 
  83 G1CMMarkStack::G1CMMarkStack() :
  84   _max_chunk_capacity(0),
  85   _base(NULL),
  86   _chunk_capacity(0) {
  87   set_empty();
  88 }
  89 
  90 bool G1CMMarkStack::resize(size_t new_capacity) {
  91   assert(is_empty(), "Only resize when stack is empty.");
  92   assert(new_capacity <= _max_chunk_capacity,
  93          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  94 
  95   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  96 
  97   if (new_base == NULL) {
  98     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
  99     return false;
 100   }
 101   // Release old mapping.
 102   if (_base != NULL) {
 103     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 104   }
 105 
 106   _base = new_base;
 107   _chunk_capacity = new_capacity;
 108   set_empty();
 109 
 110   return true;
 111 }
 112 
 113 size_t G1CMMarkStack::capacity_alignment() {
 114   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 115 }
 116 
 117 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 118   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 119 
 120   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 121 
 122   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 123   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 124 
 125   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 126             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 127             _max_chunk_capacity,
 128             initial_chunk_capacity);
 129 
 130   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 131                 initial_chunk_capacity, _max_chunk_capacity);
 132 
 133   return resize(initial_chunk_capacity);
 134 }
 135 
 136 void G1CMMarkStack::expand() {
 137   if (_chunk_capacity == _max_chunk_capacity) {
 138     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 139     return;
 140   }
 141   size_t old_capacity = _chunk_capacity;
 142   // Double capacity if possible
 143   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 144 
 145   if (resize(new_capacity)) {
 146     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 147                   old_capacity, new_capacity);
 148   } else {
 149     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 150                     old_capacity, new_capacity);
 151   }
 152 }
 153 
 154 G1CMMarkStack::~G1CMMarkStack() {
 155   if (_base != NULL) {
 156     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 157   }
 158 }
 159 
 160 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 161   elem->next = *list;
 162   *list = elem;
 163 }
 164 
 165 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 166   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 167   add_chunk_to_list(&_chunk_list, elem);
 168   _chunks_in_chunk_list++;
 169 }
 170 
 171 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 172   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 173   add_chunk_to_list(&_free_list, elem);
 174 }
 175 
 176 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 177   TaskQueueEntryChunk* result = *list;
 178   if (result != NULL) {
 179     *list = (*list)->next;
 180   }
 181   return result;
 182 }
 183 
 184 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 185   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 186   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 187   if (result != NULL) {
 188     _chunks_in_chunk_list--;
 189   }
 190   return result;
 191 }
 192 
 193 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 194   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 195   return remove_chunk_from_list(&_free_list);
 196 }
 197 
 198 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 199   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 200   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 201   // wraparound of _hwm.
 202   if (_hwm >= _chunk_capacity) {
 203     return NULL;
 204   }
 205 
 206   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 207   if (cur_idx >= _chunk_capacity) {
 208     return NULL;
 209   }
 210 
 211   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 212   result->next = NULL;
 213   return result;
 214 }
 215 
 216 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 217   // Get a new chunk.
 218   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 219 
 220   if (new_chunk == NULL) {
 221     // Did not get a chunk from the free list. Allocate from backing memory.
 222     new_chunk = allocate_new_chunk();
 223 
 224     if (new_chunk == NULL) {
 225       return false;
 226     }
 227   }
 228 
 229   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 230 
 231   add_chunk_to_chunk_list(new_chunk);
 232 
 233   return true;
 234 }
 235 
 236 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 237   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 238 
 239   if (cur == NULL) {
 240     return false;
 241   }
 242 
 243   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 244 
 245   add_chunk_to_free_list(cur);
 246   return true;
 247 }
 248 
 249 void G1CMMarkStack::set_empty() {
 250   _chunks_in_chunk_list = 0;
 251   _hwm = 0;
 252   _chunk_list = NULL;
 253   _free_list = NULL;
 254 }
 255 
 256 G1CMRootRegions::G1CMRootRegions() :
 257   _survivors(NULL), _cm(NULL), _scan_in_progress(false),
 258   _should_abort(false), _claimed_survivor_index(0) { }
 259 
 260 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 261   _survivors = survivors;
 262   _cm = cm;
 263 }
 264 
 265 void G1CMRootRegions::prepare_for_scan() {
 266   assert(!scan_in_progress(), "pre-condition");
 267 
 268   // Currently, only survivors can be root regions.
 269   _claimed_survivor_index = 0;
 270   _scan_in_progress = _survivors->regions()->is_nonempty();
 271   _should_abort = false;
 272 }
 273 
 274 HeapRegion* G1CMRootRegions::claim_next() {
 275   if (_should_abort) {
 276     // If someone has set the should_abort flag, we return NULL to
 277     // force the caller to bail out of their loop.
 278     return NULL;
 279   }
 280 
 281   // Currently, only survivors can be root regions.
 282   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 283 
 284   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 285   if (claimed_index < survivor_regions->length()) {
 286     return survivor_regions->at(claimed_index);
 287   }
 288   return NULL;
 289 }
 290 
 291 uint G1CMRootRegions::num_root_regions() const {
 292   return (uint)_survivors->regions()->length();
 293 }
 294 
 295 void G1CMRootRegions::notify_scan_done() {
 296   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 297   _scan_in_progress = false;
 298   RootRegionScan_lock->notify_all();
 299 }
 300 
 301 void G1CMRootRegions::cancel_scan() {
 302   notify_scan_done();
 303 }
 304 
 305 void G1CMRootRegions::scan_finished() {
 306   assert(scan_in_progress(), "pre-condition");
 307 
 308   // Currently, only survivors can be root regions.
 309   if (!_should_abort) {
 310     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 311     assert((uint)_claimed_survivor_index >= _survivors->length(),
 312            "we should have claimed all survivors, claimed index = %u, length = %u",
 313            (uint)_claimed_survivor_index, _survivors->length());
 314   }
 315 
 316   notify_scan_done();
 317 }
 318 
 319 bool G1CMRootRegions::wait_until_scan_finished() {
 320   if (!scan_in_progress()) {
 321     return false;
 322   }
 323 
 324   {
 325     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 326     while (scan_in_progress()) {
 327       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 328     }
 329   }
 330   return true;
 331 }
 332 
 333 // Returns the maximum number of workers to be used in a concurrent
 334 // phase based on the number of GC workers being used in a STW
 335 // phase.
 336 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 337   return MAX2((num_gc_workers + 2) / 4, 1U);
 338 }
 339 
 340 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 341                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 342                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 343   // _cm_thread set inside the constructor
 344   _g1h(g1h),
 345   _completed_initialization(false),
 346 
 347   _mark_bitmap_1(),
 348   _mark_bitmap_2(),
 349   _prev_mark_bitmap(&_mark_bitmap_1),
 350   _next_mark_bitmap(&_mark_bitmap_2),
 351 
 352   _heap(_g1h->reserved_region()),
 353 
 354   _root_regions(),
 355 
 356   _global_mark_stack(),
 357 
 358   // _finger set in set_non_marking_state
 359 
 360   _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 361   _max_num_tasks(ParallelGCThreads),
 362   // _num_active_tasks set in set_non_marking_state()
 363   // _tasks set inside the constructor
 364 
 365   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 366   _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
 367 
 368   _first_overflow_barrier_sync(),
 369   _second_overflow_barrier_sync(),
 370 
 371   _has_overflown(false),
 372   _concurrent(false),
 373   _has_aborted(false),
 374   _restart_for_overflow(false),
 375   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 376   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 377 
 378   // _verbose_level set below
 379 
 380   _init_times(),
 381   _remark_times(),
 382   _remark_mark_times(),
 383   _remark_weak_ref_times(),
 384   _cleanup_times(),
 385   _total_cleanup_time(0.0),
 386 
 387   _accum_task_vtime(NULL),
 388 
 389   _concurrent_workers(NULL),
 390   _num_concurrent_workers(0),
 391   _max_concurrent_workers(0),
 392 
 393   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 394   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 395 {
 396   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 397   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 398 
 399   // Create & start ConcurrentMark thread.
 400   _cm_thread = new ConcurrentMarkThread(this);
 401   if (_cm_thread->osthread() == NULL) {
 402     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 403   }
 404 
 405   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 406 
 407   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 408   satb_qs.set_buffer_size(G1SATBBufferSize);
 409 
 410   _root_regions.init(_g1h->survivor(), this);
 411 
 412   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 413     // Calculate the number of concurrent worker threads by scaling
 414     // the number of parallel GC threads.
 415     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 416     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 417   }
 418 
 419   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 420   if (ConcGCThreads > ParallelGCThreads) {
 421     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 422                     ConcGCThreads, ParallelGCThreads);
 423     return;
 424   }
 425 
 426   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 427   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 428 
 429   _num_concurrent_workers = ConcGCThreads;
 430   _max_concurrent_workers = _num_concurrent_workers;
 431 
 432   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 433   _concurrent_workers->initialize_workers();
 434 
 435   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 436     size_t mark_stack_size =
 437       MIN2(MarkStackSizeMax,
 438           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 439     // Verify that the calculated value for MarkStackSize is in range.
 440     // It would be nice to use the private utility routine from Arguments.
 441     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 442       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 443                       "must be between 1 and " SIZE_FORMAT,
 444                       mark_stack_size, MarkStackSizeMax);
 445       return;
 446     }
 447     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 448   } else {
 449     // Verify MarkStackSize is in range.
 450     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 451       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 452         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 453           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 454                           "must be between 1 and " SIZE_FORMAT,
 455                           MarkStackSize, MarkStackSizeMax);
 456           return;
 457         }
 458       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 459         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 460           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 461                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 462                           MarkStackSize, MarkStackSizeMax);
 463           return;
 464         }
 465       }
 466     }
 467   }
 468 
 469   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 470     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 471   }
 472 
 473   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 474   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 475 
 476   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 477   _num_active_tasks = _max_num_tasks;
 478 
 479   for (uint i = 0; i < _max_num_tasks; ++i) {
 480     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 481     task_queue->initialize();
 482     _task_queues->register_queue(i, task_queue);
 483 
 484     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 485 
 486     _accum_task_vtime[i] = 0.0;
 487   }
 488 
 489   reset_at_marking_complete();
 490   _completed_initialization = true;
 491 }
 492 
 493 void G1ConcurrentMark::reset() {
 494   _has_aborted = false;
 495 
 496   reset_marking_for_restart();
 497 
 498   // Reset all tasks, since different phases will use different number of active
 499   // threads. So, it's easiest to have all of them ready.
 500   for (uint i = 0; i < _max_num_tasks; ++i) {
 501     _tasks[i]->reset(_next_mark_bitmap);
 502   }
 503 
 504   uint max_regions = _g1h->max_regions();
 505   for (uint i = 0; i < max_regions; i++) {
 506     _top_at_rebuild_starts[i] = NULL;
 507     _region_mark_stats[i].clear();
 508   }
 509 }
 510 
 511 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 512   for (uint j = 0; j < _max_num_tasks; ++j) {
 513     _tasks[j]->clear_mark_stats_cache(region_idx);
 514   }
 515   _top_at_rebuild_starts[region_idx] = NULL;
 516   _region_mark_stats[region_idx].clear();
 517 }
 518 
 519 void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 520   uint const region_idx = r->hrm_index();
 521   if (r->is_humongous()) {
 522     assert(r->is_starts_humongous(), "Got humongous continues region here");
 523     uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 524     for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 525       clear_statistics_in_region(j);
 526     }
 527   } else {
 528     clear_statistics_in_region(region_idx);
 529   } 
 530 }
 531 
 532 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) {
 533   if (bitmap->is_marked(addr)) {
 534     bitmap->clear(addr);
 535   }
 536 }
 537 
 538 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 539   assert_at_safepoint_on_vm_thread();
 540 
 541   // Need to clear all mark bits of the humongous object.
 542   clear_mark_if_set(_prev_mark_bitmap, r->bottom());
 543   clear_mark_if_set(_next_mark_bitmap, r->bottom());
 544 
 545   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 546     return;
 547   }
 548 
 549   // Clear any statistics about the region gathered so far.
 550   clear_statistics(r);
 551 }
 552 
 553 void G1ConcurrentMark::reset_marking_for_restart() {
 554   _global_mark_stack.set_empty();
 555 
 556   // Expand the marking stack, if we have to and if we can.
 557   if (has_overflown()) {
 558     _global_mark_stack.expand();
 559 
 560     uint max_regions = _g1h->max_regions();
 561     for (uint i = 0; i < max_regions; i++) {
 562       _region_mark_stats[i].clear_during_overflow();
 563     }
 564   }
 565 
 566   clear_has_overflown();
 567   _finger = _heap.start();
 568 
 569   for (uint i = 0; i < _max_num_tasks; ++i) {
 570     G1CMTaskQueue* queue = _task_queues->queue(i);
 571     queue->set_empty();
 572   }
 573 }
 574 
 575 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 576   assert(active_tasks <= _max_num_tasks, "we should not have more");
 577 
 578   _num_active_tasks = active_tasks;
 579   // Need to update the three data structures below according to the
 580   // number of active threads for this phase.
 581   _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
 582   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 583   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 584 }
 585 
 586 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 587   set_concurrency(active_tasks);
 588 
 589   _concurrent = concurrent;
 590 
 591   if (!concurrent) {
 592     // At this point we should be in a STW phase, and completed marking.
 593     assert_at_safepoint_on_vm_thread();
 594     assert(out_of_regions(),
 595            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 596            p2i(_finger), p2i(_heap.end()));
 597   }
 598 }
 599 
 600 void G1ConcurrentMark::reset_at_marking_complete() {
 601   // We set the global marking state to some default values when we're
 602   // not doing marking.
 603   reset_marking_for_restart();
 604   _num_active_tasks = 0;
 605 }
 606 
 607 G1ConcurrentMark::~G1ConcurrentMark() {
 608   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 609   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 610   // The G1ConcurrentMark instance is never freed.
 611   ShouldNotReachHere();
 612 }
 613 
 614 class G1ClearBitMapTask : public AbstractGangTask {
 615 public:
 616   static size_t chunk_size() { return M; }
 617 
 618 private:
 619   // Heap region closure used for clearing the given mark bitmap.
 620   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 621   private:
 622     G1CMBitMap* _bitmap;
 623     G1ConcurrentMark* _cm;
 624   public:
 625     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 626     }
 627 
 628     virtual bool do_heap_region(HeapRegion* r) {
 629       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 630 
 631       HeapWord* cur = r->bottom();
 632       HeapWord* const end = r->end();
 633 
 634       while (cur < end) {
 635         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 636         _bitmap->clear_range(mr);
 637 
 638         cur += chunk_size_in_words;
 639 
 640         // Abort iteration if after yielding the marking has been aborted.
 641         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 642           return true;
 643         }
 644         // Repeat the asserts from before the start of the closure. We will do them
 645         // as asserts here to minimize their overhead on the product. However, we
 646         // will have them as guarantees at the beginning / end of the bitmap
 647         // clearing to get some checking in the product.
 648         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 649         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 650       }
 651       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 652 
 653       return false;
 654     }
 655   };
 656 
 657   G1ClearBitmapHRClosure _cl;
 658   HeapRegionClaimer _hr_claimer;
 659   bool _suspendible; // If the task is suspendible, workers must join the STS.
 660 
 661 public:
 662   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 663     AbstractGangTask("G1 Clear Bitmap"),
 664     _cl(bitmap, suspendible ? cm : NULL),
 665     _hr_claimer(n_workers),
 666     _suspendible(suspendible)
 667   { }
 668 
 669   void work(uint worker_id) {
 670     SuspendibleThreadSetJoiner sts_join(_suspendible);
 671     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 672   }
 673 
 674   bool is_complete() {
 675     return _cl.is_complete();
 676   }
 677 };
 678 
 679 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 680   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 681 
 682   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 683   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 684 
 685   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 686 
 687   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 688 
 689   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 690   workers->run_task(&cl, num_workers);
 691   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 692 }
 693 
 694 void G1ConcurrentMark::cleanup_for_next_mark() {
 695   // Make sure that the concurrent mark thread looks to still be in
 696   // the current cycle.
 697   guarantee(cm_thread()->during_cycle(), "invariant");
 698 
 699   // We are finishing up the current cycle by clearing the next
 700   // marking bitmap and getting it ready for the next cycle. During
 701   // this time no other cycle can start. So, let's make sure that this
 702   // is the case.
 703   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 704 
 705   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 706 
 707   // Repeat the asserts from above.
 708   guarantee(cm_thread()->during_cycle(), "invariant");
 709   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 710 }
 711 
 712 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 713   assert_at_safepoint_on_vm_thread();
 714   clear_bitmap(_prev_mark_bitmap, workers, false);
 715 }
 716 
 717 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 718   G1CMBitMap* _bitmap;
 719  public:
 720   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 721   }
 722 
 723   virtual bool do_heap_region(HeapRegion* r) {
 724     // This closure can be called concurrently to the mutator, so we must make sure
 725     // that the result of the getNextMarkedWordAddress() call is compared to the
 726     // value passed to it as limit to detect any found bits.
 727     // end never changes in G1.
 728     HeapWord* end = r->end();
 729     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 730   }
 731 };
 732 
 733 bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
 734   CheckBitmapClearHRClosure cl(_next_mark_bitmap);
 735   _g1h->heap_region_iterate(&cl);
 736   return cl.is_complete();
 737 }
 738 
 739 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 740 public:
 741   bool do_heap_region(HeapRegion* r) {
 742     r->note_start_of_marking();
 743     return false;
 744   }
 745 };
 746 
 747 void G1ConcurrentMark::pre_initial_mark() {
 748   // Initialize marking structures. This has to be done in a STW phase.
 749   reset();
 750 
 751   // For each region note start of marking.
 752   NoteStartOfMarkHRClosure startcl;
 753   _g1h->heap_region_iterate(&startcl);
 754 }
 755 
 756 
 757 void G1ConcurrentMark::post_initial_mark() {
 758   // Start Concurrent Marking weak-reference discovery.
 759   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 760   // enable ("weak") refs discovery
 761   rp->enable_discovery();
 762   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 763 
 764   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 765   // This is the start of  the marking cycle, we're expected all
 766   // threads to have SATB queues with active set to false.
 767   satb_mq_set.set_active_all_threads(true, /* new active value */
 768                                      false /* expected_active */);
 769 
 770   _root_regions.prepare_for_scan();
 771 
 772   // update_g1_committed() will be called at the end of an evac pause
 773   // when marking is on. So, it's also called at the end of the
 774   // initial-mark pause to update the heap end, if the heap expands
 775   // during it. No need to call it here.
 776 }
 777 
 778 /*
 779  * Notice that in the next two methods, we actually leave the STS
 780  * during the barrier sync and join it immediately afterwards. If we
 781  * do not do this, the following deadlock can occur: one thread could
 782  * be in the barrier sync code, waiting for the other thread to also
 783  * sync up, whereas another one could be trying to yield, while also
 784  * waiting for the other threads to sync up too.
 785  *
 786  * Note, however, that this code is also used during remark and in
 787  * this case we should not attempt to leave / enter the STS, otherwise
 788  * we'll either hit an assert (debug / fastdebug) or deadlock
 789  * (product). So we should only leave / enter the STS if we are
 790  * operating concurrently.
 791  *
 792  * Because the thread that does the sync barrier has left the STS, it
 793  * is possible to be suspended for a Full GC or an evacuation pause
 794  * could occur. This is actually safe, since the entering the sync
 795  * barrier is one of the last things do_marking_step() does, and it
 796  * doesn't manipulate any data structures afterwards.
 797  */
 798 
 799 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 800   bool barrier_aborted;
 801   {
 802     SuspendibleThreadSetLeaver sts_leave(concurrent());
 803     barrier_aborted = !_first_overflow_barrier_sync.enter();
 804   }
 805 
 806   // at this point everyone should have synced up and not be doing any
 807   // more work
 808 
 809   if (barrier_aborted) {
 810     // If the barrier aborted we ignore the overflow condition and
 811     // just abort the whole marking phase as quickly as possible.
 812     return;
 813   }
 814 }
 815 
 816 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 817   SuspendibleThreadSetLeaver sts_leave(concurrent());
 818   _second_overflow_barrier_sync.enter();
 819 
 820   // at this point everything should be re-initialized and ready to go
 821 }
 822 
 823 class G1CMConcurrentMarkingTask : public AbstractGangTask {
 824   G1ConcurrentMark*     _cm;
 825 public:
 826   void work(uint worker_id) {
 827     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 828     ResourceMark rm;
 829 
 830     double start_vtime = os::elapsedVTime();
 831 
 832     {
 833       SuspendibleThreadSetJoiner sts_join;
 834 
 835       assert(worker_id < _cm->active_tasks(), "invariant");
 836 
 837       G1CMTask* task = _cm->task(worker_id);
 838       task->record_start_time();
 839       if (!_cm->has_aborted()) {
 840         do {
 841           task->do_marking_step(G1ConcMarkStepDurationMillis,
 842                                 true  /* do_termination */,
 843                                 false /* is_serial*/);
 844 
 845           _cm->do_yield_check();
 846         } while (!_cm->has_aborted() && task->has_aborted());
 847       }
 848       task->record_end_time();
 849       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 850     }
 851 
 852     double end_vtime = os::elapsedVTime();
 853     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 854   }
 855 
 856   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 857       AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 858 
 859   ~G1CMConcurrentMarkingTask() { }
 860 };
 861 
 862 uint G1ConcurrentMark::calc_active_marking_workers() {
 863   uint result = 0;
 864   if (!UseDynamicNumberOfGCThreads ||
 865       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 866        !ForceDynamicNumberOfGCThreads)) {
 867     result = _max_concurrent_workers;
 868   } else {
 869     result =
 870       AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
 871                                                       1, /* Minimum workers */
 872                                                       _num_concurrent_workers,
 873                                                       Threads::number_of_non_daemon_threads());
 874     // Don't scale the result down by scale_concurrent_workers() because
 875     // that scaling has already gone into "_max_concurrent_workers".
 876   }
 877   assert(result > 0 && result <= _max_concurrent_workers,
 878          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 879          _max_concurrent_workers, result);
 880   return result;
 881 }
 882 
 883 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) {
 884   // Currently, only survivors can be root regions.
 885   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 886   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 887 
 888   const uintx interval = PrefetchScanIntervalInBytes;
 889   HeapWord* curr = hr->bottom();
 890   const HeapWord* end = hr->top();
 891   while (curr < end) {
 892     Prefetch::read(curr, interval);
 893     oop obj = oop(curr);
 894     int size = obj->oop_iterate_size(&cl);
 895     assert(size == obj->size(), "sanity");
 896     curr += size;
 897   }
 898 }
 899 
 900 class G1CMRootRegionScanTask : public AbstractGangTask {
 901   G1ConcurrentMark* _cm;
 902 public:
 903   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 904     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 905 
 906   void work(uint worker_id) {
 907     assert(Thread::current()->is_ConcurrentGC_thread(),
 908            "this should only be done by a conc GC thread");
 909 
 910     G1CMRootRegions* root_regions = _cm->root_regions();
 911     HeapRegion* hr = root_regions->claim_next();
 912     while (hr != NULL) {
 913       _cm->scan_root_region(hr, worker_id);
 914       hr = root_regions->claim_next();
 915     }
 916   }
 917 };
 918 
 919 void G1ConcurrentMark::scan_root_regions() {
 920   // scan_in_progress() will have been set to true only if there was
 921   // at least one root region to scan. So, if it's false, we
 922   // should not attempt to do any further work.
 923   if (root_regions()->scan_in_progress()) {
 924     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 925 
 926     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 927                                    // We distribute work on a per-region basis, so starting
 928                                    // more threads than that is useless.
 929                                    root_regions()->num_root_regions());
 930     assert(_num_concurrent_workers <= _max_concurrent_workers,
 931            "Maximum number of marking threads exceeded");
 932 
 933     G1CMRootRegionScanTask task(this);
 934     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 935                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 936     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 937 
 938     // It's possible that has_aborted() is true here without actually
 939     // aborting the survivor scan earlier. This is OK as it's
 940     // mainly used for sanity checking.
 941     root_regions()->scan_finished();
 942   }
 943 }
 944 
 945 void G1ConcurrentMark::concurrent_cycle_start() {
 946   _gc_timer_cm->register_gc_start();
 947 
 948   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 949 
 950   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 951 }
 952 
 953 void G1ConcurrentMark::concurrent_cycle_end() {
 954   _g1h->collector_state()->set_clearing_next_bitmap(false);
 955 
 956   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 957 
 958   if (has_aborted()) {
 959     log_info(gc, marking)("Concurrent Mark Abort");
 960     _gc_tracer_cm->report_concurrent_mode_failure();
 961   }
 962 
 963   _gc_timer_cm->register_gc_end();
 964 
 965   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 966 }
 967 
 968 void G1ConcurrentMark::mark_from_roots() {
 969   _restart_for_overflow = false;
 970 
 971   _num_concurrent_workers = calc_active_marking_workers();
 972 
 973   uint active_workers = MAX2(1U, _num_concurrent_workers);
 974 
 975   // Setting active workers is not guaranteed since fewer
 976   // worker threads may currently exist and more may not be
 977   // available.
 978   active_workers = _concurrent_workers->update_active_workers(active_workers);
 979   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 980 
 981   // Parallel task terminator is set in "set_concurrency_and_phase()"
 982   set_concurrency_and_phase(active_workers, true /* concurrent */);
 983 
 984   G1CMConcurrentMarkingTask marking_task(this);
 985   _concurrent_workers->run_task(&marking_task);
 986   print_stats();
 987 }
 988 
 989 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
 990   G1HeapVerifier* verifier = _g1h->verifier();
 991 
 992   verifier->verify_region_sets_optional();
 993 
 994   if (VerifyDuringGC) {
 995     GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm);
 996 
 997     size_t const BufLen = 512;
 998     char buffer[BufLen];
 999 
1000     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1001     verifier->verify(type, vo, buffer);
1002   }
1003 
1004   verifier->check_bitmaps(caller);
1005 }
1006 
1007 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
1008   G1CollectedHeap* _g1h;
1009   G1ConcurrentMark* _cm;
1010 
1011   uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
1012 
1013   void update_remset_before_rebuild(HeapRegion * hr) {
1014     G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker();
1015 
1016     size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize;
1017     bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
1018     if (selected_for_rebuild) {
1019       _num_regions_selected_for_rebuild++;
1020     }
1021     _cm->update_top_at_rebuild_start(hr);
1022   }
1023 
1024 public:
1025   G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) :
1026     _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0) { }
1027 
1028   virtual bool do_heap_region(HeapRegion* r) {
1029     update_remset_before_rebuild(r);
1030     return false;
1031   }
1032 
1033   uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1034 };
1035 
1036 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1037   G1CollectedHeap* _g1h;
1038 public:
1039   G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1040 
1041   virtual bool do_heap_region(HeapRegion* r) {
1042     _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r);
1043     return false;
1044   }
1045 };
1046 
1047 void G1ConcurrentMark::remark() {
1048   assert_at_safepoint_on_vm_thread();
1049 
1050   // If a full collection has happened, we should not continue. However we might
1051   // have ended up here as the Remark VM operation has been scheduled already.
1052   if (has_aborted()) {
1053     return;
1054   }
1055 
1056   G1Policy* g1p = _g1h->g1_policy();
1057   g1p->record_concurrent_mark_remark_start();
1058 
1059   double start = os::elapsedTime();
1060 
1061   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1062 
1063   {
1064     GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1065     finalize_marking();
1066   }
1067 
1068   double mark_work_end = os::elapsedTime();
1069 
1070   bool const mark_finished = !has_overflown();
1071   if (mark_finished) {
1072     weak_refs_work(false /* clear_all_soft_refs */);
1073 
1074     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1075     // We're done with marking.
1076     // This is the end of the marking cycle, we're expected all
1077     // threads to have SATB queues with active set to true.
1078     satb_mq_set.set_active_all_threads(false, /* new active value */
1079                                        true /* expected_active */);
1080 
1081     {
1082       GCTraceTime(Debug, gc, phases)("Flush Task Caches");
1083       flush_all_task_caches();
1084     }
1085 
1086     {
1087       GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild");
1088       G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this);
1089       _g1h->heap_region_iterate(&cl);
1090       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1091                                       _g1h->num_regions(), cl.num_selected_for_rebuild());
1092     }
1093 
1094     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UseNextMarking, "Remark after");
1095 
1096     assert(!restart_for_overflow(), "sanity");
1097     // Completely reset the marking state since marking completed
1098     reset_at_marking_complete();
1099   } else {
1100     // We overflowed.  Restart concurrent marking.
1101     _restart_for_overflow = true;
1102 
1103     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1104 
1105     // Clear the marking state because we will be restarting
1106     // marking due to overflowing the global mark stack.
1107     reset_marking_for_restart();
1108   }
1109 
1110   {
1111     GCTraceTime(Debug, gc, phases)("Report Object Count");
1112     report_object_count();
1113   }
1114 
1115   // Statistics
1116   double now = os::elapsedTime();
1117   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1118   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1119   _remark_times.add((now - start) * 1000.0);
1120 
1121   g1p->record_concurrent_mark_remark_end();
1122 }
1123 
1124 class G1CleanupTask : public AbstractGangTask {
1125   // Per-region work during the Cleanup pause.
1126   class G1CleanupRegionsClosure : public HeapRegionClosure {
1127     G1CollectedHeap* _g1h;
1128     size_t _freed_bytes;
1129     FreeRegionList* _local_cleanup_list;
1130     uint _old_regions_removed;
1131     uint _humongous_regions_removed;
1132     HRRSCleanupTask* _hrrs_cleanup_task;
1133 
1134   public:
1135     G1CleanupRegionsClosure(G1CollectedHeap* g1,
1136                             FreeRegionList* local_cleanup_list,
1137                             HRRSCleanupTask* hrrs_cleanup_task) :
1138       _g1h(g1),
1139       _freed_bytes(0),
1140       _local_cleanup_list(local_cleanup_list),
1141       _old_regions_removed(0),
1142       _humongous_regions_removed(0),
1143       _hrrs_cleanup_task(hrrs_cleanup_task) { }
1144 
1145     size_t freed_bytes() { return _freed_bytes; }
1146     const uint old_regions_removed() { return _old_regions_removed; }
1147     const uint humongous_regions_removed() { return _humongous_regions_removed; }
1148 
1149     bool do_heap_region(HeapRegion *hr) {
1150       hr->note_end_of_marking();
1151 
1152       if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1153         _freed_bytes += hr->used();
1154         hr->set_containing_set(NULL);
1155         if (hr->is_humongous()) {
1156           _humongous_regions_removed++;
1157           _g1h->free_humongous_region(hr, _local_cleanup_list);
1158         } else {
1159           _old_regions_removed++;
1160           _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */);
1161         }
1162         hr->clear_cardtable();
1163         _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1164         log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1165       } else {
1166         hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1167       }
1168 
1169       return false;
1170     }
1171   };
1172 
1173   G1CollectedHeap* _g1h;
1174   FreeRegionList* _cleanup_list;
1175   HeapRegionClaimer _hrclaimer;
1176 
1177 public:
1178   G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1179     AbstractGangTask("G1 Cleanup"),
1180     _g1h(g1h),
1181     _cleanup_list(cleanup_list),
1182     _hrclaimer(n_workers) {
1183 
1184     HeapRegionRemSet::reset_for_cleanup_tasks();
1185   }
1186 
1187   void work(uint worker_id) {
1188     FreeRegionList local_cleanup_list("Local Cleanup List");
1189     HRRSCleanupTask hrrs_cleanup_task;
1190     G1CleanupRegionsClosure cl(_g1h,
1191                                &local_cleanup_list,
1192                                &hrrs_cleanup_task);
1193     _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1194     assert(cl.is_complete(), "Shouldn't have aborted!");
1195 
1196     // Now update the old/humongous region sets
1197     _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1198     {
1199       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1200       _g1h->decrement_summary_bytes(cl.freed_bytes());
1201 
1202       _cleanup_list->add_ordered(&local_cleanup_list);
1203       assert(local_cleanup_list.is_empty(), "post-condition");
1204 
1205       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1206     }
1207   }
1208 };
1209 
1210 void G1ConcurrentMark::reclaim_empty_regions() {
1211   WorkGang* workers = _g1h->workers();
1212   FreeRegionList empty_regions_list("Empty Regions After Mark List");
1213 
1214   G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers());
1215   workers->run_task(&cl);
1216 
1217   if (!empty_regions_list.is_empty()) {
1218     log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1219     // Now print the empty regions list.
1220     G1HRPrinter* hrp = _g1h->hr_printer();
1221     if (hrp->is_active()) {
1222       FreeRegionListIterator iter(&empty_regions_list);
1223       while (iter.more_available()) {
1224         HeapRegion* hr = iter.get_next();
1225         hrp->cleanup(hr);
1226       }
1227     }
1228     // And actually make them available.
1229     _g1h->prepend_to_freelist(&empty_regions_list);
1230   }
1231 }
1232 
1233 void G1ConcurrentMark::cleanup() {
1234   assert_at_safepoint_on_vm_thread();
1235 
1236   // If a full collection has happened, we shouldn't do this.
1237   if (has_aborted()) {
1238     return;
1239   }
1240 
1241   G1Policy* g1p = _g1h->g1_policy();
1242   g1p->record_concurrent_mark_cleanup_start();
1243 
1244   double start = os::elapsedTime();
1245 
1246   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UseNextMarking, "Cleanup before");
1247 
1248   {
1249     GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild");
1250     G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1251     _g1h->heap_region_iterate(&cl);
1252   }
1253 
1254   if (log_is_enabled(Trace, gc, liveness)) {
1255     G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1256     _g1h->heap_region_iterate(&cl);
1257   }
1258 
1259   // Install newly created mark bitmap as "prev".
1260   swap_mark_bitmaps();
1261   {
1262     GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions");
1263     reclaim_empty_regions();
1264   }
1265 
1266   // Cleanup will have freed any regions completely full of garbage.
1267   // Update the soft reference policy with the new heap occupancy.
1268   Universe::update_heap_info_at_gc();
1269 
1270   // Clean out dead classes and update Metaspace sizes.
1271   if (ClassUnloadingWithConcurrentMark) {
1272     GCTraceTime(Debug, gc, phases)("Purge Metaspace");
1273     ClassLoaderDataGraph::purge();
1274   }
1275   MetaspaceGC::compute_new_size();
1276 
1277   // We reclaimed old regions so we should calculate the sizes to make
1278   // sure we update the old gen/space data.
1279   _g1h->g1mm()->update_sizes();
1280 
1281   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1282 
1283   // We need to make this be a "collection" so any collection pause that
1284   // races with it goes around and waits for Cleanup to finish.
1285   _g1h->increment_total_collections();
1286 
1287   // Local statistics
1288   double recent_cleanup_time = (os::elapsedTime() - start);
1289   _total_cleanup_time += recent_cleanup_time;
1290   _cleanup_times.add(recent_cleanup_time);
1291 
1292   {
1293     GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup");
1294     _g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1295   }
1296 }
1297 
1298 // Supporting Object and Oop closures for reference discovery
1299 // and processing in during marking
1300 
1301 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1302   HeapWord* addr = (HeapWord*)obj;
1303   return addr != NULL &&
1304          (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj));
1305 }
1306 
1307 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1308 // Uses the G1CMTask associated with a worker thread (for serial reference
1309 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1310 // trace referent objects.
1311 //
1312 // Using the G1CMTask and embedded local queues avoids having the worker
1313 // threads operating on the global mark stack. This reduces the risk
1314 // of overflowing the stack - which we would rather avoid at this late
1315 // state. Also using the tasks' local queues removes the potential
1316 // of the workers interfering with each other that could occur if
1317 // operating on the global stack.
1318 
1319 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1320   G1ConcurrentMark* _cm;
1321   G1CMTask*         _task;
1322   int               _ref_counter_limit;
1323   int               _ref_counter;
1324   bool              _is_serial;
1325 public:
1326   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1327     _cm(cm), _task(task), _is_serial(is_serial),
1328     _ref_counter_limit(G1RefProcDrainInterval) {
1329     assert(_ref_counter_limit > 0, "sanity");
1330     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1331     _ref_counter = _ref_counter_limit;
1332   }
1333 
1334   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1335   virtual void do_oop(      oop* p) { do_oop_work(p); }
1336 
1337   template <class T> void do_oop_work(T* p) {
1338     if (!_cm->has_overflown()) {
1339       _task->deal_with_reference(p);
1340       _ref_counter--;
1341 
1342       if (_ref_counter == 0) {
1343         // We have dealt with _ref_counter_limit references, pushing them
1344         // and objects reachable from them on to the local stack (and
1345         // possibly the global stack). Call G1CMTask::do_marking_step() to
1346         // process these entries.
1347         //
1348         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1349         // there's nothing more to do (i.e. we're done with the entries that
1350         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1351         // above) or we overflow.
1352         //
1353         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1354         // flag while there may still be some work to do. (See the comment at
1355         // the beginning of G1CMTask::do_marking_step() for those conditions -
1356         // one of which is reaching the specified time target.) It is only
1357         // when G1CMTask::do_marking_step() returns without setting the
1358         // has_aborted() flag that the marking step has completed.
1359         do {
1360           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1361           _task->do_marking_step(mark_step_duration_ms,
1362                                  false      /* do_termination */,
1363                                  _is_serial);
1364         } while (_task->has_aborted() && !_cm->has_overflown());
1365         _ref_counter = _ref_counter_limit;
1366       }
1367     }
1368   }
1369 };
1370 
1371 // 'Drain' oop closure used by both serial and parallel reference processing.
1372 // Uses the G1CMTask associated with a given worker thread (for serial
1373 // reference processing the G1CMtask for worker 0 is used). Calls the
1374 // do_marking_step routine, with an unbelievably large timeout value,
1375 // to drain the marking data structures of the remaining entries
1376 // added by the 'keep alive' oop closure above.
1377 
1378 class G1CMDrainMarkingStackClosure : public VoidClosure {
1379   G1ConcurrentMark* _cm;
1380   G1CMTask*         _task;
1381   bool              _is_serial;
1382  public:
1383   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1384     _cm(cm), _task(task), _is_serial(is_serial) {
1385     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1386   }
1387 
1388   void do_void() {
1389     do {
1390       // We call G1CMTask::do_marking_step() to completely drain the local
1391       // and global marking stacks of entries pushed by the 'keep alive'
1392       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1393       //
1394       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1395       // if there's nothing more to do (i.e. we've completely drained the
1396       // entries that were pushed as a a result of applying the 'keep alive'
1397       // closure to the entries on the discovered ref lists) or we overflow
1398       // the global marking stack.
1399       //
1400       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1401       // flag while there may still be some work to do. (See the comment at
1402       // the beginning of G1CMTask::do_marking_step() for those conditions -
1403       // one of which is reaching the specified time target.) It is only
1404       // when G1CMTask::do_marking_step() returns without setting the
1405       // has_aborted() flag that the marking step has completed.
1406 
1407       _task->do_marking_step(1000000000.0 /* something very large */,
1408                              true         /* do_termination */,
1409                              _is_serial);
1410     } while (_task->has_aborted() && !_cm->has_overflown());
1411   }
1412 };
1413 
1414 // Implementation of AbstractRefProcTaskExecutor for parallel
1415 // reference processing at the end of G1 concurrent marking
1416 
1417 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1418 private:
1419   G1CollectedHeap*  _g1h;
1420   G1ConcurrentMark* _cm;
1421   WorkGang*         _workers;
1422   uint              _active_workers;
1423 
1424 public:
1425   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1426                           G1ConcurrentMark* cm,
1427                           WorkGang* workers,
1428                           uint n_workers) :
1429     _g1h(g1h), _cm(cm),
1430     _workers(workers), _active_workers(n_workers) { }
1431 
1432   // Executes the given task using concurrent marking worker threads.
1433   virtual void execute(ProcessTask& task);
1434   virtual void execute(EnqueueTask& task);
1435 };
1436 
1437 class G1CMRefProcTaskProxy : public AbstractGangTask {
1438   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1439   ProcessTask&      _proc_task;
1440   G1CollectedHeap*  _g1h;
1441   G1ConcurrentMark* _cm;
1442 
1443 public:
1444   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1445                        G1CollectedHeap* g1h,
1446                        G1ConcurrentMark* cm) :
1447     AbstractGangTask("Process reference objects in parallel"),
1448     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1449     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1450     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1451   }
1452 
1453   virtual void work(uint worker_id) {
1454     ResourceMark rm;
1455     HandleMark hm;
1456     G1CMTask* task = _cm->task(worker_id);
1457     G1CMIsAliveClosure g1_is_alive(_g1h);
1458     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1459     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1460 
1461     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1462   }
1463 };
1464 
1465 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1466   assert(_workers != NULL, "Need parallel worker threads.");
1467   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1468 
1469   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1470 
1471   // We need to reset the concurrency level before each
1472   // proxy task execution, so that the termination protocol
1473   // and overflow handling in G1CMTask::do_marking_step() knows
1474   // how many workers to wait for.
1475   _cm->set_concurrency(_active_workers);
1476   _workers->run_task(&proc_task_proxy);
1477 }
1478 
1479 class G1CMRefEnqueueTaskProxy : public AbstractGangTask {
1480   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1481   EnqueueTask& _enq_task;
1482 
1483 public:
1484   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1485     AbstractGangTask("Enqueue reference objects in parallel"),
1486     _enq_task(enq_task) { }
1487 
1488   virtual void work(uint worker_id) {
1489     _enq_task.work(worker_id);
1490   }
1491 };
1492 
1493 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1494   assert(_workers != NULL, "Need parallel worker threads.");
1495   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1496 
1497   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1498 
1499   // Not strictly necessary but...
1500   //
1501   // We need to reset the concurrency level before each
1502   // proxy task execution, so that the termination protocol
1503   // and overflow handling in G1CMTask::do_marking_step() knows
1504   // how many workers to wait for.
1505   _cm->set_concurrency(_active_workers);
1506   _workers->run_task(&enq_task_proxy);
1507 }
1508 
1509 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1510   ResourceMark rm;
1511   HandleMark   hm;
1512 
1513   // Is alive closure.
1514   G1CMIsAliveClosure g1_is_alive(_g1h);
1515 
1516   // Inner scope to exclude the cleaning of the string and symbol
1517   // tables from the displayed time.
1518   {
1519     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1520 
1521     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1522 
1523     // See the comment in G1CollectedHeap::ref_processing_init()
1524     // about how reference processing currently works in G1.
1525 
1526     // Set the soft reference policy
1527     rp->setup_policy(clear_all_soft_refs);
1528     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1529 
1530     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1531     // in serial reference processing. Note these closures are also
1532     // used for serially processing (by the the current thread) the
1533     // JNI references during parallel reference processing.
1534     //
1535     // These closures do not need to synchronize with the worker
1536     // threads involved in parallel reference processing as these
1537     // instances are executed serially by the current thread (e.g.
1538     // reference processing is not multi-threaded and is thus
1539     // performed by the current thread instead of a gang worker).
1540     //
1541     // The gang tasks involved in parallel reference processing create
1542     // their own instances of these closures, which do their own
1543     // synchronization among themselves.
1544     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1545     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1546 
1547     // We need at least one active thread. If reference processing
1548     // is not multi-threaded we use the current (VMThread) thread,
1549     // otherwise we use the work gang from the G1CollectedHeap and
1550     // we utilize all the worker threads we can.
1551     bool processing_is_mt = rp->processing_is_mt();
1552     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1553     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1554 
1555     // Parallel processing task executor.
1556     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1557                                               _g1h->workers(), active_workers);
1558     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1559 
1560     // Set the concurrency level. The phase was already set prior to
1561     // executing the remark task.
1562     set_concurrency(active_workers);
1563 
1564     // Set the degree of MT processing here.  If the discovery was done MT,
1565     // the number of threads involved during discovery could differ from
1566     // the number of active workers.  This is OK as long as the discovered
1567     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1568     rp->set_active_mt_degree(active_workers);
1569 
1570     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1571 
1572     // Process the weak references.
1573     const ReferenceProcessorStats& stats =
1574         rp->process_discovered_references(&g1_is_alive,
1575                                           &g1_keep_alive,
1576                                           &g1_drain_mark_stack,
1577                                           executor,
1578                                           &pt);
1579     _gc_tracer_cm->report_gc_reference_stats(stats);
1580     pt.print_all_references();
1581 
1582     // The do_oop work routines of the keep_alive and drain_marking_stack
1583     // oop closures will set the has_overflown flag if we overflow the
1584     // global marking stack.
1585 
1586     assert(has_overflown() || _global_mark_stack.is_empty(),
1587            "Mark stack should be empty (unless it has overflown)");
1588 
1589     assert(rp->num_q() == active_workers, "why not");
1590 
1591     rp->enqueue_discovered_references(executor, &pt);
1592 
1593     rp->verify_no_references_recorded();
1594 
1595     pt.print_enqueue_phase();
1596 
1597     assert(!rp->discovery_enabled(), "Post condition");
1598   }
1599 
1600   assert(has_overflown() || _global_mark_stack.is_empty(),
1601          "Mark stack should be empty (unless it has overflown)");
1602 
1603   {
1604     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1605     WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
1606   }
1607 
1608   if (has_overflown()) {
1609     // We can not trust g1_is_alive if the marking stack overflowed
1610     return;
1611   }
1612 
1613   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1614 
1615   // Unload Klasses, String, Symbols, Code Cache, etc.
1616   if (ClassUnloadingWithConcurrentMark) {
1617     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1618     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1619     _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1620   } else {
1621     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1622     // No need to clean string table and symbol table as they are treated as strong roots when
1623     // class unloading is disabled.
1624     _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1625   }
1626 }
1627 
1628 void G1ConcurrentMark::report_object_count() {
1629   G1CMIsAliveClosure is_alive(_g1h);
1630   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1631 }
1632 
1633 void G1ConcurrentMark::swap_mark_bitmaps() {
1634   G1CMBitMap* temp = _prev_mark_bitmap;
1635   _prev_mark_bitmap = _next_mark_bitmap;
1636   _next_mark_bitmap = temp;
1637   _g1h->collector_state()->set_clearing_next_bitmap(true);
1638 }
1639 
1640 // Closure for marking entries in SATB buffers.
1641 class G1CMSATBBufferClosure : public SATBBufferClosure {
1642 private:
1643   G1CMTask* _task;
1644   G1CollectedHeap* _g1h;
1645 
1646   // This is very similar to G1CMTask::deal_with_reference, but with
1647   // more relaxed requirements for the argument, so this must be more
1648   // circumspect about treating the argument as an object.
1649   void do_entry(void* entry) const {
1650     _task->increment_refs_reached();
1651     oop const obj = static_cast<oop>(entry);
1652     _task->make_reference_grey(obj);
1653   }
1654 
1655 public:
1656   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1657     : _task(task), _g1h(g1h) { }
1658 
1659   virtual void do_buffer(void** buffer, size_t size) {
1660     for (size_t i = 0; i < size; ++i) {
1661       do_entry(buffer[i]);
1662     }
1663   }
1664 };
1665 
1666 class G1RemarkThreadsClosure : public ThreadClosure {
1667   G1CMSATBBufferClosure _cm_satb_cl;
1668   G1CMOopClosure _cm_cl;
1669   MarkingCodeBlobClosure _code_cl;
1670   int _thread_parity;
1671 
1672  public:
1673   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1674     _cm_satb_cl(task, g1h),
1675     _cm_cl(g1h, task),
1676     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1677     _thread_parity(Threads::thread_claim_parity()) {}
1678 
1679   void do_thread(Thread* thread) {
1680     if (thread->is_Java_thread()) {
1681       if (thread->claim_oops_do(true, _thread_parity)) {
1682         JavaThread* jt = (JavaThread*)thread;
1683 
1684         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1685         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1686         // * Alive if on the stack of an executing method
1687         // * Weakly reachable otherwise
1688         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1689         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1690         jt->nmethods_do(&_code_cl);
1691 
1692         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1693       }
1694     } else if (thread->is_VM_thread()) {
1695       if (thread->claim_oops_do(true, _thread_parity)) {
1696         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1697       }
1698     }
1699   }
1700 };
1701 
1702 class G1CMRemarkTask : public AbstractGangTask {
1703   G1ConcurrentMark* _cm;
1704 public:
1705   void work(uint worker_id) {
1706     G1CMTask* task = _cm->task(worker_id);
1707     task->record_start_time();
1708     {
1709       ResourceMark rm;
1710       HandleMark hm;
1711 
1712       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1713       Threads::threads_do(&threads_f);
1714     }
1715 
1716     do {
1717       task->do_marking_step(1000000000.0 /* something very large */,
1718                             true         /* do_termination       */,
1719                             false        /* is_serial            */);
1720     } while (task->has_aborted() && !_cm->has_overflown());
1721     // If we overflow, then we do not want to restart. We instead
1722     // want to abort remark and do concurrent marking again.
1723     task->record_end_time();
1724   }
1725 
1726   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1727     AbstractGangTask("Par Remark"), _cm(cm) {
1728     _cm->terminator()->reset_for_reuse(active_workers);
1729   }
1730 };
1731 
1732 void G1ConcurrentMark::finalize_marking() {
1733   ResourceMark rm;
1734   HandleMark   hm;
1735 
1736   _g1h->ensure_parsability(false);
1737 
1738   // this is remark, so we'll use up all active threads
1739   uint active_workers = _g1h->workers()->active_workers();
1740   set_concurrency_and_phase(active_workers, false /* concurrent */);
1741   // Leave _parallel_marking_threads at it's
1742   // value originally calculated in the G1ConcurrentMark
1743   // constructor and pass values of the active workers
1744   // through the gang in the task.
1745 
1746   {
1747     StrongRootsScope srs(active_workers);
1748 
1749     G1CMRemarkTask remarkTask(this, active_workers);
1750     // We will start all available threads, even if we decide that the
1751     // active_workers will be fewer. The extra ones will just bail out
1752     // immediately.
1753     _g1h->workers()->run_task(&remarkTask);
1754   }
1755 
1756   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1757   guarantee(has_overflown() ||
1758             satb_mq_set.completed_buffers_num() == 0,
1759             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1760             BOOL_TO_STR(has_overflown()),
1761             satb_mq_set.completed_buffers_num());
1762 
1763   print_stats();
1764 }
1765 
1766 void G1ConcurrentMark::flush_all_task_caches() {
1767   size_t hits = 0;
1768   size_t misses = 0;
1769   for (uint i = 0; i < _max_num_tasks; i++) {
1770     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1771     hits += stats.first;
1772     misses += stats.second;
1773   }
1774   size_t sum = hits + misses;
1775   log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1776                        hits, misses, percent_of(hits, sum));
1777 }
1778 
1779 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1780   _prev_mark_bitmap->clear_range(mr);
1781 }
1782 
1783 HeapRegion*
1784 G1ConcurrentMark::claim_region(uint worker_id) {
1785   // "checkpoint" the finger
1786   HeapWord* finger = _finger;
1787 
1788   while (finger < _heap.end()) {
1789     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1790 
1791     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1792     // Make sure that the reads below do not float before loading curr_region.
1793     OrderAccess::loadload();
1794     // Above heap_region_containing may return NULL as we always scan claim
1795     // until the end of the heap. In this case, just jump to the next region.
1796     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1797 
1798     // Is the gap between reading the finger and doing the CAS too long?
1799     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1800     if (res == finger && curr_region != NULL) {
1801       // we succeeded
1802       HeapWord*   bottom        = curr_region->bottom();
1803       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1804 
1805       // notice that _finger == end cannot be guaranteed here since,
1806       // someone else might have moved the finger even further
1807       assert(_finger >= end, "the finger should have moved forward");
1808 
1809       if (limit > bottom) {
1810         return curr_region;
1811       } else {
1812         assert(limit == bottom,
1813                "the region limit should be at bottom");
1814         // we return NULL and the caller should try calling
1815         // claim_region() again.
1816         return NULL;
1817       }
1818     } else {
1819       assert(_finger > finger, "the finger should have moved forward");
1820       // read it again
1821       finger = _finger;
1822     }
1823   }
1824 
1825   return NULL;
1826 }
1827 
1828 #ifndef PRODUCT
1829 class VerifyNoCSetOops {
1830   G1CollectedHeap* _g1h;
1831   const char* _phase;
1832   int _info;
1833 
1834 public:
1835   VerifyNoCSetOops(const char* phase, int info = -1) :
1836     _g1h(G1CollectedHeap::heap()),
1837     _phase(phase),
1838     _info(info)
1839   { }
1840 
1841   void operator()(G1TaskQueueEntry task_entry) const {
1842     if (task_entry.is_array_slice()) {
1843       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1844       return;
1845     }
1846     guarantee(oopDesc::is_oop(task_entry.obj()),
1847               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1848               p2i(task_entry.obj()), _phase, _info);
1849     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1850               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1851               p2i(task_entry.obj()), _phase, _info);
1852   }
1853 };
1854 
1855 void G1ConcurrentMark::verify_no_cset_oops() {
1856   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1857   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1858     return;
1859   }
1860 
1861   // Verify entries on the global mark stack
1862   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1863 
1864   // Verify entries on the task queues
1865   for (uint i = 0; i < _max_num_tasks; ++i) {
1866     G1CMTaskQueue* queue = _task_queues->queue(i);
1867     queue->iterate(VerifyNoCSetOops("Queue", i));
1868   }
1869 
1870   // Verify the global finger
1871   HeapWord* global_finger = finger();
1872   if (global_finger != NULL && global_finger < _heap.end()) {
1873     // Since we always iterate over all regions, we might get a NULL HeapRegion
1874     // here.
1875     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1876     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1877               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1878               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1879   }
1880 
1881   // Verify the task fingers
1882   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1883   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1884     G1CMTask* task = _tasks[i];
1885     HeapWord* task_finger = task->finger();
1886     if (task_finger != NULL && task_finger < _heap.end()) {
1887       // See above note on the global finger verification.
1888       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1889       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1890                 !task_hr->in_collection_set(),
1891                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1892                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1893     }
1894   }
1895 }
1896 #endif // PRODUCT
1897 
1898 void G1ConcurrentMark::rebuild_rem_set_concurrently() {
1899   _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
1900 }
1901 
1902 void G1ConcurrentMark::print_stats() {
1903   if (!log_is_enabled(Debug, gc, stats)) {
1904     return;
1905   }
1906   log_debug(gc, stats)("---------------------------------------------------------------------");
1907   for (size_t i = 0; i < _num_active_tasks; ++i) {
1908     _tasks[i]->print_stats();
1909     log_debug(gc, stats)("---------------------------------------------------------------------");
1910   }
1911 }
1912 
1913 void G1ConcurrentMark::concurrent_cycle_abort() {
1914   if (!cm_thread()->during_cycle() || _has_aborted) {
1915     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1916     return;
1917   }
1918 
1919   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1920   // concurrent bitmap clearing.
1921   {
1922     GCTraceTime(Debug, gc)("Clear Next Bitmap");
1923     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1924   }
1925   // Note we cannot clear the previous marking bitmap here
1926   // since VerifyDuringGC verifies the objects marked during
1927   // a full GC against the previous bitmap.
1928 
1929   // Empty mark stack
1930   reset_marking_for_restart();
1931   for (uint i = 0; i < _max_num_tasks; ++i) {
1932     _tasks[i]->clear_region_fields();
1933   }
1934   _first_overflow_barrier_sync.abort();
1935   _second_overflow_barrier_sync.abort();
1936   _has_aborted = true;
1937 
1938   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1939   satb_mq_set.abandon_partial_marking();
1940   // This can be called either during or outside marking, we'll read
1941   // the expected_active value from the SATB queue set.
1942   satb_mq_set.set_active_all_threads(
1943                                  false, /* new active value */
1944                                  satb_mq_set.is_active() /* expected_active */);
1945 }
1946 
1947 static void print_ms_time_info(const char* prefix, const char* name,
1948                                NumberSeq& ns) {
1949   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1950                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1951   if (ns.num() > 0) {
1952     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1953                            prefix, ns.sd(), ns.maximum());
1954   }
1955 }
1956 
1957 void G1ConcurrentMark::print_summary_info() {
1958   Log(gc, marking) log;
1959   if (!log.is_trace()) {
1960     return;
1961   }
1962 
1963   log.trace(" Concurrent marking:");
1964   print_ms_time_info("  ", "init marks", _init_times);
1965   print_ms_time_info("  ", "remarks", _remark_times);
1966   {
1967     print_ms_time_info("     ", "final marks", _remark_mark_times);
1968     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
1969 
1970   }
1971   print_ms_time_info("  ", "cleanups", _cleanup_times);
1972   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
1973             _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
1974   log.trace("  Total stop_world time = %8.2f s.",
1975             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
1976   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
1977             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
1978 }
1979 
1980 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
1981   _concurrent_workers->print_worker_threads_on(st);
1982 }
1983 
1984 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
1985   _concurrent_workers->threads_do(tc);
1986 }
1987 
1988 void G1ConcurrentMark::print_on_error(outputStream* st) const {
1989   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
1990                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
1991   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
1992   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
1993 }
1994 
1995 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
1996   ReferenceProcessor* result = g1h->ref_processor_cm();
1997   assert(result != NULL, "CM reference processor should not be NULL");
1998   return result;
1999 }
2000 
2001 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2002                                G1CMTask* task)
2003   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2004     _g1h(g1h), _task(task)
2005 { }
2006 
2007 void G1CMTask::setup_for_region(HeapRegion* hr) {
2008   assert(hr != NULL,
2009         "claim_region() should have filtered out NULL regions");
2010   _curr_region  = hr;
2011   _finger       = hr->bottom();
2012   update_region_limit();
2013 }
2014 
2015 void G1CMTask::update_region_limit() {
2016   HeapRegion* hr            = _curr_region;
2017   HeapWord* bottom          = hr->bottom();
2018   HeapWord* limit           = hr->next_top_at_mark_start();
2019 
2020   if (limit == bottom) {
2021     // The region was collected underneath our feet.
2022     // We set the finger to bottom to ensure that the bitmap
2023     // iteration that will follow this will not do anything.
2024     // (this is not a condition that holds when we set the region up,
2025     // as the region is not supposed to be empty in the first place)
2026     _finger = bottom;
2027   } else if (limit >= _region_limit) {
2028     assert(limit >= _finger, "peace of mind");
2029   } else {
2030     assert(limit < _region_limit, "only way to get here");
2031     // This can happen under some pretty unusual circumstances.  An
2032     // evacuation pause empties the region underneath our feet (NTAMS
2033     // at bottom). We then do some allocation in the region (NTAMS
2034     // stays at bottom), followed by the region being used as a GC
2035     // alloc region (NTAMS will move to top() and the objects
2036     // originally below it will be grayed). All objects now marked in
2037     // the region are explicitly grayed, if below the global finger,
2038     // and we do not need in fact to scan anything else. So, we simply
2039     // set _finger to be limit to ensure that the bitmap iteration
2040     // doesn't do anything.
2041     _finger = limit;
2042   }
2043 
2044   _region_limit = limit;
2045 }
2046 
2047 void G1CMTask::giveup_current_region() {
2048   assert(_curr_region != NULL, "invariant");
2049   clear_region_fields();
2050 }
2051 
2052 void G1CMTask::clear_region_fields() {
2053   // Values for these three fields that indicate that we're not
2054   // holding on to a region.
2055   _curr_region   = NULL;
2056   _finger        = NULL;
2057   _region_limit  = NULL;
2058 }
2059 
2060 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2061   if (cm_oop_closure == NULL) {
2062     assert(_cm_oop_closure != NULL, "invariant");
2063   } else {
2064     assert(_cm_oop_closure == NULL, "invariant");
2065   }
2066   _cm_oop_closure = cm_oop_closure;
2067 }
2068 
2069 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2070   guarantee(next_mark_bitmap != NULL, "invariant");
2071   _next_mark_bitmap              = next_mark_bitmap;
2072   clear_region_fields();
2073 
2074   _calls                         = 0;
2075   _elapsed_time_ms               = 0.0;
2076   _termination_time_ms           = 0.0;
2077   _termination_start_time_ms     = 0.0;
2078 
2079   _mark_stats_cache.reset();
2080 }
2081 
2082 bool G1CMTask::should_exit_termination() {
2083   regular_clock_call();
2084   // This is called when we are in the termination protocol. We should
2085   // quit if, for some reason, this task wants to abort or the global
2086   // stack is not empty (this means that we can get work from it).
2087   return !_cm->mark_stack_empty() || has_aborted();
2088 }
2089 
2090 void G1CMTask::reached_limit() {
2091   assert(_words_scanned >= _words_scanned_limit ||
2092          _refs_reached >= _refs_reached_limit ,
2093          "shouldn't have been called otherwise");
2094   regular_clock_call();
2095 }
2096 
2097 void G1CMTask::regular_clock_call() {
2098   if (has_aborted()) {
2099     return;
2100   }
2101 
2102   // First, we need to recalculate the words scanned and refs reached
2103   // limits for the next clock call.
2104   recalculate_limits();
2105 
2106   // During the regular clock call we do the following
2107 
2108   // (1) If an overflow has been flagged, then we abort.
2109   if (_cm->has_overflown()) {
2110     set_has_aborted();
2111     return;
2112   }
2113 
2114   // If we are not concurrent (i.e. we're doing remark) we don't need
2115   // to check anything else. The other steps are only needed during
2116   // the concurrent marking phase.
2117   if (!_cm->concurrent()) {
2118     return;
2119   }
2120 
2121   // (2) If marking has been aborted for Full GC, then we also abort.
2122   if (_cm->has_aborted()) {
2123     set_has_aborted();
2124     return;
2125   }
2126 
2127   double curr_time_ms = os::elapsedVTime() * 1000.0;
2128 
2129   // (4) We check whether we should yield. If we have to, then we abort.
2130   if (SuspendibleThreadSet::should_yield()) {
2131     // We should yield. To do this we abort the task. The caller is
2132     // responsible for yielding.
2133     set_has_aborted();
2134     return;
2135   }
2136 
2137   // (5) We check whether we've reached our time quota. If we have,
2138   // then we abort.
2139   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2140   if (elapsed_time_ms > _time_target_ms) {
2141     set_has_aborted();
2142     _has_timed_out = true;
2143     return;
2144   }
2145 
2146   // (6) Finally, we check whether there are enough completed STAB
2147   // buffers available for processing. If there are, we abort.
2148   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2149   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2150     // we do need to process SATB buffers, we'll abort and restart
2151     // the marking task to do so
2152     set_has_aborted();
2153     return;
2154   }
2155 }
2156 
2157 void G1CMTask::recalculate_limits() {
2158   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2159   _words_scanned_limit      = _real_words_scanned_limit;
2160 
2161   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2162   _refs_reached_limit       = _real_refs_reached_limit;
2163 }
2164 
2165 void G1CMTask::decrease_limits() {
2166   // This is called when we believe that we're going to do an infrequent
2167   // operation which will increase the per byte scanned cost (i.e. move
2168   // entries to/from the global stack). It basically tries to decrease the
2169   // scanning limit so that the clock is called earlier.
2170 
2171   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2172   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2173 }
2174 
2175 void G1CMTask::move_entries_to_global_stack() {
2176   // Local array where we'll store the entries that will be popped
2177   // from the local queue.
2178   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2179 
2180   size_t n = 0;
2181   G1TaskQueueEntry task_entry;
2182   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2183     buffer[n] = task_entry;
2184     ++n;
2185   }
2186   if (n < G1CMMarkStack::EntriesPerChunk) {
2187     buffer[n] = G1TaskQueueEntry();
2188   }
2189 
2190   if (n > 0) {
2191     if (!_cm->mark_stack_push(buffer)) {
2192       set_has_aborted();
2193     }
2194   }
2195 
2196   // This operation was quite expensive, so decrease the limits.
2197   decrease_limits();
2198 }
2199 
2200 bool G1CMTask::get_entries_from_global_stack() {
2201   // Local array where we'll store the entries that will be popped
2202   // from the global stack.
2203   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2204 
2205   if (!_cm->mark_stack_pop(buffer)) {
2206     return false;
2207   }
2208 
2209   // We did actually pop at least one entry.
2210   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2211     G1TaskQueueEntry task_entry = buffer[i];
2212     if (task_entry.is_null()) {
2213       break;
2214     }
2215     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2216     bool success = _task_queue->push(task_entry);
2217     // We only call this when the local queue is empty or under a
2218     // given target limit. So, we do not expect this push to fail.
2219     assert(success, "invariant");
2220   }
2221 
2222   // This operation was quite expensive, so decrease the limits
2223   decrease_limits();
2224   return true;
2225 }
2226 
2227 void G1CMTask::drain_local_queue(bool partially) {
2228   if (has_aborted()) {
2229     return;
2230   }
2231 
2232   // Decide what the target size is, depending whether we're going to
2233   // drain it partially (so that other tasks can steal if they run out
2234   // of things to do) or totally (at the very end).
2235   size_t target_size;
2236   if (partially) {
2237     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2238   } else {
2239     target_size = 0;
2240   }
2241 
2242   if (_task_queue->size() > target_size) {
2243     G1TaskQueueEntry entry;
2244     bool ret = _task_queue->pop_local(entry);
2245     while (ret) {
2246       scan_task_entry(entry);
2247       if (_task_queue->size() <= target_size || has_aborted()) {
2248         ret = false;
2249       } else {
2250         ret = _task_queue->pop_local(entry);
2251       }
2252     }
2253   }
2254 }
2255 
2256 void G1CMTask::drain_global_stack(bool partially) {
2257   if (has_aborted()) {
2258     return;
2259   }
2260 
2261   // We have a policy to drain the local queue before we attempt to
2262   // drain the global stack.
2263   assert(partially || _task_queue->size() == 0, "invariant");
2264 
2265   // Decide what the target size is, depending whether we're going to
2266   // drain it partially (so that other tasks can steal if they run out
2267   // of things to do) or totally (at the very end).
2268   // Notice that when draining the global mark stack partially, due to the racyness
2269   // of the mark stack size update we might in fact drop below the target. But,
2270   // this is not a problem.
2271   // In case of total draining, we simply process until the global mark stack is
2272   // totally empty, disregarding the size counter.
2273   if (partially) {
2274     size_t const target_size = _cm->partial_mark_stack_size_target();
2275     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2276       if (get_entries_from_global_stack()) {
2277         drain_local_queue(partially);
2278       }
2279     }
2280   } else {
2281     while (!has_aborted() && get_entries_from_global_stack()) {
2282       drain_local_queue(partially);
2283     }
2284   }
2285 }
2286 
2287 // SATB Queue has several assumptions on whether to call the par or
2288 // non-par versions of the methods. this is why some of the code is
2289 // replicated. We should really get rid of the single-threaded version
2290 // of the code to simplify things.
2291 void G1CMTask::drain_satb_buffers() {
2292   if (has_aborted()) {
2293     return;
2294   }
2295 
2296   // We set this so that the regular clock knows that we're in the
2297   // middle of draining buffers and doesn't set the abort flag when it
2298   // notices that SATB buffers are available for draining. It'd be
2299   // very counter productive if it did that. :-)
2300   _draining_satb_buffers = true;
2301 
2302   G1CMSATBBufferClosure satb_cl(this, _g1h);
2303   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2304 
2305   // This keeps claiming and applying the closure to completed buffers
2306   // until we run out of buffers or we need to abort.
2307   while (!has_aborted() &&
2308          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2309     regular_clock_call();
2310   }
2311 
2312   _draining_satb_buffers = false;
2313 
2314   assert(has_aborted() ||
2315          _cm->concurrent() ||
2316          satb_mq_set.completed_buffers_num() == 0, "invariant");
2317 
2318   // again, this was a potentially expensive operation, decrease the
2319   // limits to get the regular clock call early
2320   decrease_limits();
2321 }
2322 
2323 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2324   _mark_stats_cache.reset(region_idx);
2325 }
2326 
2327 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2328   return _mark_stats_cache.evict_all();
2329 }
2330 
2331 void G1CMTask::print_stats() {
2332   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2333   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2334                        _elapsed_time_ms, _termination_time_ms);
2335   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2336                        _step_times_ms.num(),
2337                        _step_times_ms.avg(),
2338                        _step_times_ms.sd(),
2339                        _step_times_ms.maximum(),
2340                        _step_times_ms.sum());
2341   size_t const hits = _mark_stats_cache.hits();
2342   size_t const misses = _mark_stats_cache.misses();
2343   log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2344                        hits, misses, percent_of(hits, hits + misses));
2345 }
2346 
2347 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2348   return _task_queues->steal(worker_id, hash_seed, task_entry);
2349 }
2350 
2351 /*****************************************************************************
2352 
2353     The do_marking_step(time_target_ms, ...) method is the building
2354     block of the parallel marking framework. It can be called in parallel
2355     with other invocations of do_marking_step() on different tasks
2356     (but only one per task, obviously) and concurrently with the
2357     mutator threads, or during remark, hence it eliminates the need
2358     for two versions of the code. When called during remark, it will
2359     pick up from where the task left off during the concurrent marking
2360     phase. Interestingly, tasks are also claimable during evacuation
2361     pauses too, since do_marking_step() ensures that it aborts before
2362     it needs to yield.
2363 
2364     The data structures that it uses to do marking work are the
2365     following:
2366 
2367       (1) Marking Bitmap. If there are gray objects that appear only
2368       on the bitmap (this happens either when dealing with an overflow
2369       or when the initial marking phase has simply marked the roots
2370       and didn't push them on the stack), then tasks claim heap
2371       regions whose bitmap they then scan to find gray objects. A
2372       global finger indicates where the end of the last claimed region
2373       is. A local finger indicates how far into the region a task has
2374       scanned. The two fingers are used to determine how to gray an
2375       object (i.e. whether simply marking it is OK, as it will be
2376       visited by a task in the future, or whether it needs to be also
2377       pushed on a stack).
2378 
2379       (2) Local Queue. The local queue of the task which is accessed
2380       reasonably efficiently by the task. Other tasks can steal from
2381       it when they run out of work. Throughout the marking phase, a
2382       task attempts to keep its local queue short but not totally
2383       empty, so that entries are available for stealing by other
2384       tasks. Only when there is no more work, a task will totally
2385       drain its local queue.
2386 
2387       (3) Global Mark Stack. This handles local queue overflow. During
2388       marking only sets of entries are moved between it and the local
2389       queues, as access to it requires a mutex and more fine-grain
2390       interaction with it which might cause contention. If it
2391       overflows, then the marking phase should restart and iterate
2392       over the bitmap to identify gray objects. Throughout the marking
2393       phase, tasks attempt to keep the global mark stack at a small
2394       length but not totally empty, so that entries are available for
2395       popping by other tasks. Only when there is no more work, tasks
2396       will totally drain the global mark stack.
2397 
2398       (4) SATB Buffer Queue. This is where completed SATB buffers are
2399       made available. Buffers are regularly removed from this queue
2400       and scanned for roots, so that the queue doesn't get too
2401       long. During remark, all completed buffers are processed, as
2402       well as the filled in parts of any uncompleted buffers.
2403 
2404     The do_marking_step() method tries to abort when the time target
2405     has been reached. There are a few other cases when the
2406     do_marking_step() method also aborts:
2407 
2408       (1) When the marking phase has been aborted (after a Full GC).
2409 
2410       (2) When a global overflow (on the global stack) has been
2411       triggered. Before the task aborts, it will actually sync up with
2412       the other tasks to ensure that all the marking data structures
2413       (local queues, stacks, fingers etc.)  are re-initialized so that
2414       when do_marking_step() completes, the marking phase can
2415       immediately restart.
2416 
2417       (3) When enough completed SATB buffers are available. The
2418       do_marking_step() method only tries to drain SATB buffers right
2419       at the beginning. So, if enough buffers are available, the
2420       marking step aborts and the SATB buffers are processed at
2421       the beginning of the next invocation.
2422 
2423       (4) To yield. when we have to yield then we abort and yield
2424       right at the end of do_marking_step(). This saves us from a lot
2425       of hassle as, by yielding we might allow a Full GC. If this
2426       happens then objects will be compacted underneath our feet, the
2427       heap might shrink, etc. We save checking for this by just
2428       aborting and doing the yield right at the end.
2429 
2430     From the above it follows that the do_marking_step() method should
2431     be called in a loop (or, otherwise, regularly) until it completes.
2432 
2433     If a marking step completes without its has_aborted() flag being
2434     true, it means it has completed the current marking phase (and
2435     also all other marking tasks have done so and have all synced up).
2436 
2437     A method called regular_clock_call() is invoked "regularly" (in
2438     sub ms intervals) throughout marking. It is this clock method that
2439     checks all the abort conditions which were mentioned above and
2440     decides when the task should abort. A work-based scheme is used to
2441     trigger this clock method: when the number of object words the
2442     marking phase has scanned or the number of references the marking
2443     phase has visited reach a given limit. Additional invocations to
2444     the method clock have been planted in a few other strategic places
2445     too. The initial reason for the clock method was to avoid calling
2446     vtime too regularly, as it is quite expensive. So, once it was in
2447     place, it was natural to piggy-back all the other conditions on it
2448     too and not constantly check them throughout the code.
2449 
2450     If do_termination is true then do_marking_step will enter its
2451     termination protocol.
2452 
2453     The value of is_serial must be true when do_marking_step is being
2454     called serially (i.e. by the VMThread) and do_marking_step should
2455     skip any synchronization in the termination and overflow code.
2456     Examples include the serial remark code and the serial reference
2457     processing closures.
2458 
2459     The value of is_serial must be false when do_marking_step is
2460     being called by any of the worker threads in a work gang.
2461     Examples include the concurrent marking code (CMMarkingTask),
2462     the MT remark code, and the MT reference processing closures.
2463 
2464  *****************************************************************************/
2465 
2466 void G1CMTask::do_marking_step(double time_target_ms,
2467                                bool do_termination,
2468                                bool is_serial) {
2469   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2470 
2471   _start_time_ms = os::elapsedVTime() * 1000.0;
2472 
2473   // If do_stealing is true then do_marking_step will attempt to
2474   // steal work from the other G1CMTasks. It only makes sense to
2475   // enable stealing when the termination protocol is enabled
2476   // and do_marking_step() is not being called serially.
2477   bool do_stealing = do_termination && !is_serial;
2478 
2479   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2480   _time_target_ms = time_target_ms - diff_prediction_ms;
2481 
2482   // set up the variables that are used in the work-based scheme to
2483   // call the regular clock method
2484   _words_scanned = 0;
2485   _refs_reached  = 0;
2486   recalculate_limits();
2487 
2488   // clear all flags
2489   clear_has_aborted();
2490   _has_timed_out = false;
2491   _draining_satb_buffers = false;
2492 
2493   ++_calls;
2494 
2495   // Set up the bitmap and oop closures. Anything that uses them is
2496   // eventually called from this method, so it is OK to allocate these
2497   // statically.
2498   G1CMBitMapClosure bitmap_closure(this, _cm);
2499   G1CMOopClosure cm_oop_closure(_g1h, this);
2500   set_cm_oop_closure(&cm_oop_closure);
2501 
2502   if (_cm->has_overflown()) {
2503     // This can happen if the mark stack overflows during a GC pause
2504     // and this task, after a yield point, restarts. We have to abort
2505     // as we need to get into the overflow protocol which happens
2506     // right at the end of this task.
2507     set_has_aborted();
2508   }
2509 
2510   // First drain any available SATB buffers. After this, we will not
2511   // look at SATB buffers before the next invocation of this method.
2512   // If enough completed SATB buffers are queued up, the regular clock
2513   // will abort this task so that it restarts.
2514   drain_satb_buffers();
2515   // ...then partially drain the local queue and the global stack
2516   drain_local_queue(true);
2517   drain_global_stack(true);
2518 
2519   do {
2520     if (!has_aborted() && _curr_region != NULL) {
2521       // This means that we're already holding on to a region.
2522       assert(_finger != NULL, "if region is not NULL, then the finger "
2523              "should not be NULL either");
2524 
2525       // We might have restarted this task after an evacuation pause
2526       // which might have evacuated the region we're holding on to
2527       // underneath our feet. Let's read its limit again to make sure
2528       // that we do not iterate over a region of the heap that
2529       // contains garbage (update_region_limit() will also move
2530       // _finger to the start of the region if it is found empty).
2531       update_region_limit();
2532       // We will start from _finger not from the start of the region,
2533       // as we might be restarting this task after aborting half-way
2534       // through scanning this region. In this case, _finger points to
2535       // the address where we last found a marked object. If this is a
2536       // fresh region, _finger points to start().
2537       MemRegion mr = MemRegion(_finger, _region_limit);
2538 
2539       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2540              "humongous regions should go around loop once only");
2541 
2542       // Some special cases:
2543       // If the memory region is empty, we can just give up the region.
2544       // If the current region is humongous then we only need to check
2545       // the bitmap for the bit associated with the start of the object,
2546       // scan the object if it's live, and give up the region.
2547       // Otherwise, let's iterate over the bitmap of the part of the region
2548       // that is left.
2549       // If the iteration is successful, give up the region.
2550       if (mr.is_empty()) {
2551         giveup_current_region();
2552         regular_clock_call();
2553       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2554         if (_next_mark_bitmap->is_marked(mr.start())) {
2555           // The object is marked - apply the closure
2556           bitmap_closure.do_addr(mr.start());
2557         }
2558         // Even if this task aborted while scanning the humongous object
2559         // we can (and should) give up the current region.
2560         giveup_current_region();
2561         regular_clock_call();
2562       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2563         giveup_current_region();
2564         regular_clock_call();
2565       } else {
2566         assert(has_aborted(), "currently the only way to do so");
2567         // The only way to abort the bitmap iteration is to return
2568         // false from the do_bit() method. However, inside the
2569         // do_bit() method we move the _finger to point to the
2570         // object currently being looked at. So, if we bail out, we
2571         // have definitely set _finger to something non-null.
2572         assert(_finger != NULL, "invariant");
2573 
2574         // Region iteration was actually aborted. So now _finger
2575         // points to the address of the object we last scanned. If we
2576         // leave it there, when we restart this task, we will rescan
2577         // the object. It is easy to avoid this. We move the finger by
2578         // enough to point to the next possible object header.
2579         assert(_finger < _region_limit, "invariant");
2580         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2581         // Check if bitmap iteration was aborted while scanning the last object
2582         if (new_finger >= _region_limit) {
2583           giveup_current_region();
2584         } else {
2585           move_finger_to(new_finger);
2586         }
2587       }
2588     }
2589     // At this point we have either completed iterating over the
2590     // region we were holding on to, or we have aborted.
2591 
2592     // We then partially drain the local queue and the global stack.
2593     // (Do we really need this?)
2594     drain_local_queue(true);
2595     drain_global_stack(true);
2596 
2597     // Read the note on the claim_region() method on why it might
2598     // return NULL with potentially more regions available for
2599     // claiming and why we have to check out_of_regions() to determine
2600     // whether we're done or not.
2601     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2602       // We are going to try to claim a new region. We should have
2603       // given up on the previous one.
2604       // Separated the asserts so that we know which one fires.
2605       assert(_curr_region  == NULL, "invariant");
2606       assert(_finger       == NULL, "invariant");
2607       assert(_region_limit == NULL, "invariant");
2608       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2609       if (claimed_region != NULL) {
2610         // Yes, we managed to claim one
2611         setup_for_region(claimed_region);
2612         assert(_curr_region == claimed_region, "invariant");
2613       }
2614       // It is important to call the regular clock here. It might take
2615       // a while to claim a region if, for example, we hit a large
2616       // block of empty regions. So we need to call the regular clock
2617       // method once round the loop to make sure it's called
2618       // frequently enough.
2619       regular_clock_call();
2620     }
2621 
2622     if (!has_aborted() && _curr_region == NULL) {
2623       assert(_cm->out_of_regions(),
2624              "at this point we should be out of regions");
2625     }
2626   } while ( _curr_region != NULL && !has_aborted());
2627 
2628   if (!has_aborted()) {
2629     // We cannot check whether the global stack is empty, since other
2630     // tasks might be pushing objects to it concurrently.
2631     assert(_cm->out_of_regions(),
2632            "at this point we should be out of regions");
2633     // Try to reduce the number of available SATB buffers so that
2634     // remark has less work to do.
2635     drain_satb_buffers();
2636   }
2637 
2638   // Since we've done everything else, we can now totally drain the
2639   // local queue and global stack.
2640   drain_local_queue(false);
2641   drain_global_stack(false);
2642 
2643   // Attempt at work stealing from other task's queues.
2644   if (do_stealing && !has_aborted()) {
2645     // We have not aborted. This means that we have finished all that
2646     // we could. Let's try to do some stealing...
2647 
2648     // We cannot check whether the global stack is empty, since other
2649     // tasks might be pushing objects to it concurrently.
2650     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2651            "only way to reach here");
2652     while (!has_aborted()) {
2653       G1TaskQueueEntry entry;
2654       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2655         scan_task_entry(entry);
2656 
2657         // And since we're towards the end, let's totally drain the
2658         // local queue and global stack.
2659         drain_local_queue(false);
2660         drain_global_stack(false);
2661       } else {
2662         break;
2663       }
2664     }
2665   }
2666 
2667   // We still haven't aborted. Now, let's try to get into the
2668   // termination protocol.
2669   if (do_termination && !has_aborted()) {
2670     // We cannot check whether the global stack is empty, since other
2671     // tasks might be concurrently pushing objects on it.
2672     // Separated the asserts so that we know which one fires.
2673     assert(_cm->out_of_regions(), "only way to reach here");
2674     assert(_task_queue->size() == 0, "only way to reach here");
2675     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2676 
2677     // The G1CMTask class also extends the TerminatorTerminator class,
2678     // hence its should_exit_termination() method will also decide
2679     // whether to exit the termination protocol or not.
2680     bool finished = (is_serial ||
2681                      _cm->terminator()->offer_termination(this));
2682     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2683     _termination_time_ms +=
2684       termination_end_time_ms - _termination_start_time_ms;
2685 
2686     if (finished) {
2687       // We're all done.
2688 
2689       // We can now guarantee that the global stack is empty, since
2690       // all other tasks have finished. We separated the guarantees so
2691       // that, if a condition is false, we can immediately find out
2692       // which one.
2693       guarantee(_cm->out_of_regions(), "only way to reach here");
2694       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2695       guarantee(_task_queue->size() == 0, "only way to reach here");
2696       guarantee(!_cm->has_overflown(), "only way to reach here");
2697     } else {
2698       // Apparently there's more work to do. Let's abort this task. It
2699       // will restart it and we can hopefully find more things to do.
2700       set_has_aborted();
2701     }
2702   }
2703 
2704   // Mainly for debugging purposes to make sure that a pointer to the
2705   // closure which was statically allocated in this frame doesn't
2706   // escape it by accident.
2707   set_cm_oop_closure(NULL);
2708   double end_time_ms = os::elapsedVTime() * 1000.0;
2709   double elapsed_time_ms = end_time_ms - _start_time_ms;
2710   // Update the step history.
2711   _step_times_ms.add(elapsed_time_ms);
2712 
2713   if (has_aborted()) {
2714     // The task was aborted for some reason.
2715     if (_has_timed_out) {
2716       double diff_ms = elapsed_time_ms - _time_target_ms;
2717       // Keep statistics of how well we did with respect to hitting
2718       // our target only if we actually timed out (if we aborted for
2719       // other reasons, then the results might get skewed).
2720       _marking_step_diffs_ms.add(diff_ms);
2721     }
2722 
2723     if (_cm->has_overflown()) {
2724       // This is the interesting one. We aborted because a global
2725       // overflow was raised. This means we have to restart the
2726       // marking phase and start iterating over regions. However, in
2727       // order to do this we have to make sure that all tasks stop
2728       // what they are doing and re-initialize in a safe manner. We
2729       // will achieve this with the use of two barrier sync points.
2730 
2731       if (!is_serial) {
2732         // We only need to enter the sync barrier if being called
2733         // from a parallel context
2734         _cm->enter_first_sync_barrier(_worker_id);
2735 
2736         // When we exit this sync barrier we know that all tasks have
2737         // stopped doing marking work. So, it's now safe to
2738         // re-initialize our data structures.
2739       }
2740 
2741       clear_region_fields();
2742       flush_mark_stats_cache();
2743 
2744       if (!is_serial) {
2745         // If we're executing the concurrent phase of marking, reset the marking
2746         // state; otherwise the marking state is reset after reference processing,
2747         // during the remark pause.
2748         // If we reset here as a result of an overflow during the remark we will
2749         // see assertion failures from any subsequent set_concurrency_and_phase()
2750         // calls.
2751         if (_cm->concurrent() && _worker_id == 0) {
2752           // Worker 0 is responsible for clearing the global data structures because
2753           // of an overflow. During STW we should not clear the overflow flag (in
2754           // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2755           // method to abort the pause and restart concurrent marking.
2756           _cm->reset_marking_for_restart();
2757 
2758           log_info(gc, marking)("Concurrent Mark reset for overflow");
2759         }
2760 
2761         // ...and enter the second barrier.
2762         _cm->enter_second_sync_barrier(_worker_id);
2763       }
2764       // At this point, if we're during the concurrent phase of
2765       // marking, everything has been re-initialized and we're
2766       // ready to restart.
2767     }
2768   }
2769 }
2770 
2771 G1CMTask::G1CMTask(uint worker_id,
2772                    G1ConcurrentMark* cm,
2773                    G1CMTaskQueue* task_queue,
2774                    G1RegionMarkStats* mark_stats,
2775                    uint max_regions) :
2776   _objArray_processor(this),
2777   _worker_id(worker_id),
2778   _g1h(G1CollectedHeap::heap()),
2779   _cm(cm),
2780   _next_mark_bitmap(NULL),
2781   _task_queue(task_queue),
2782   _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2783   _calls(0),
2784   _time_target_ms(0.0),
2785   _start_time_ms(0.0),
2786   _cm_oop_closure(NULL),
2787   _curr_region(NULL),
2788   _finger(NULL),
2789   _region_limit(NULL),
2790   _words_scanned(0),
2791   _words_scanned_limit(0),
2792   _real_words_scanned_limit(0),
2793   _refs_reached(0),
2794   _refs_reached_limit(0),
2795   _real_refs_reached_limit(0),
2796   _hash_seed(17),
2797   _has_aborted(false),
2798   _has_timed_out(false),
2799   _draining_satb_buffers(false),
2800   _step_times_ms(),
2801   _elapsed_time_ms(0.0),
2802   _termination_time_ms(0.0),
2803   _termination_start_time_ms(0.0),
2804   _marking_step_diffs_ms()
2805 {
2806   guarantee(task_queue != NULL, "invariant");
2807 
2808   _marking_step_diffs_ms.add(0.5);
2809 }
2810 
2811 // These are formatting macros that are used below to ensure
2812 // consistent formatting. The *_H_* versions are used to format the
2813 // header for a particular value and they should be kept consistent
2814 // with the corresponding macro. Also note that most of the macros add
2815 // the necessary white space (as a prefix) which makes them a bit
2816 // easier to compose.
2817 
2818 // All the output lines are prefixed with this string to be able to
2819 // identify them easily in a large log file.
2820 #define G1PPRL_LINE_PREFIX            "###"
2821 
2822 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2823 #ifdef _LP64
2824 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2825 #else // _LP64
2826 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2827 #endif // _LP64
2828 
2829 // For per-region info
2830 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2831 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2832 #define G1PPRL_STATE_FORMAT           "   %-5s"
2833 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2834 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2835 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2836 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2837 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2838 
2839 // For summary info
2840 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2841 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2842 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2843 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2844 
2845 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2846   _total_used_bytes(0), _total_capacity_bytes(0),
2847   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2848   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2849 {
2850   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2851   MemRegion g1_reserved = g1h->g1_reserved();
2852   double now = os::elapsedTime();
2853 
2854   // Print the header of the output.
2855   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2856   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2857                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2858                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2859                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2860                           HeapRegion::GrainBytes);
2861   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2862   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2863                           G1PPRL_TYPE_H_FORMAT
2864                           G1PPRL_ADDR_BASE_H_FORMAT
2865                           G1PPRL_BYTE_H_FORMAT
2866                           G1PPRL_BYTE_H_FORMAT
2867                           G1PPRL_BYTE_H_FORMAT
2868                           G1PPRL_DOUBLE_H_FORMAT
2869                           G1PPRL_BYTE_H_FORMAT
2870                           G1PPRL_STATE_H_FORMAT
2871                           G1PPRL_BYTE_H_FORMAT,
2872                           "type", "address-range",
2873                           "used", "prev-live", "next-live", "gc-eff",
2874                           "remset", "state", "code-roots");
2875   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2876                           G1PPRL_TYPE_H_FORMAT
2877                           G1PPRL_ADDR_BASE_H_FORMAT
2878                           G1PPRL_BYTE_H_FORMAT
2879                           G1PPRL_BYTE_H_FORMAT
2880                           G1PPRL_BYTE_H_FORMAT
2881                           G1PPRL_DOUBLE_H_FORMAT
2882                           G1PPRL_BYTE_H_FORMAT
2883                           G1PPRL_STATE_H_FORMAT
2884                           G1PPRL_BYTE_H_FORMAT,
2885                           "", "",
2886                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2887                           "(bytes)", "", "(bytes)");
2888 }
2889 
2890 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2891   const char* type       = r->get_type_str();
2892   HeapWord* bottom       = r->bottom();
2893   HeapWord* end          = r->end();
2894   size_t capacity_bytes  = r->capacity();
2895   size_t used_bytes      = r->used();
2896   size_t prev_live_bytes = r->live_bytes();
2897   size_t next_live_bytes = r->next_live_bytes();
2898   double gc_eff          = r->gc_efficiency();
2899   size_t remset_bytes    = r->rem_set()->mem_size();
2900   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2901   const char* remset_type = r->rem_set()->get_short_state_str();
2902 
2903   _total_used_bytes      += used_bytes;
2904   _total_capacity_bytes  += capacity_bytes;
2905   _total_prev_live_bytes += prev_live_bytes;
2906   _total_next_live_bytes += next_live_bytes;
2907   _total_remset_bytes    += remset_bytes;
2908   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2909 
2910   // Print a line for this particular region.
2911   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2912                           G1PPRL_TYPE_FORMAT
2913                           G1PPRL_ADDR_BASE_FORMAT
2914                           G1PPRL_BYTE_FORMAT
2915                           G1PPRL_BYTE_FORMAT
2916                           G1PPRL_BYTE_FORMAT
2917                           G1PPRL_DOUBLE_FORMAT
2918                           G1PPRL_BYTE_FORMAT
2919                           G1PPRL_STATE_FORMAT
2920                           G1PPRL_BYTE_FORMAT,
2921                           type, p2i(bottom), p2i(end),
2922                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2923                           remset_bytes, remset_type, strong_code_roots_bytes);
2924 
2925   return false;
2926 }
2927 
2928 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2929   // add static memory usages to remembered set sizes
2930   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2931   // Print the footer of the output.
2932   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2933   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2934                          " SUMMARY"
2935                          G1PPRL_SUM_MB_FORMAT("capacity")
2936                          G1PPRL_SUM_MB_PERC_FORMAT("used")
2937                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
2938                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
2939                          G1PPRL_SUM_MB_FORMAT("remset")
2940                          G1PPRL_SUM_MB_FORMAT("code-roots"),
2941                          bytes_to_mb(_total_capacity_bytes),
2942                          bytes_to_mb(_total_used_bytes),
2943                          percent_of(_total_used_bytes, _total_capacity_bytes),
2944                          bytes_to_mb(_total_prev_live_bytes),
2945                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
2946                          bytes_to_mb(_total_next_live_bytes),
2947                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
2948                          bytes_to_mb(_total_remset_bytes),
2949                          bytes_to_mb(_total_strong_code_roots_bytes));
2950 }