1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc/g1/g1CollectorState.hpp"
  31 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  32 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1Policy.hpp"
  36 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/shared/adaptiveSizePolicy.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/suspendibleThreadSet.hpp"
  50 #include "gc/shared/taskqueue.inline.hpp"
  51 #include "gc/shared/vmGCOperations.hpp"
  52 #include "gc/shared/weakProcessor.hpp"
  53 #include "include/jvm.h"
  54 #include "logging/log.hpp"
  55 #include "memory/allocation.hpp"
  56 #include "memory/resourceArea.hpp"
  57 #include "oops/access.inline.hpp"
  58 #include "oops/oop.inline.hpp"
  59 #include "runtime/atomic.hpp"
  60 #include "runtime/handles.inline.hpp"
  61 #include "runtime/java.hpp"
  62 #include "runtime/prefetch.inline.hpp"
  63 #include "services/memTracker.hpp"
  64 #include "utilities/align.hpp"
  65 #include "utilities/growableArray.hpp"
  66 
  67 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  68   assert(addr < _cm->finger(), "invariant");
  69   assert(addr >= _task->finger(), "invariant");
  70 
  71   // We move that task's local finger along.
  72   _task->move_finger_to(addr);
  73 
  74   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  75   // we only partially drain the local queue and global stack
  76   _task->drain_local_queue(true);
  77   _task->drain_global_stack(true);
  78 
  79   // if the has_aborted flag has been raised, we need to bail out of
  80   // the iteration
  81   return !_task->has_aborted();
  82 }
  83 
  84 G1CMMarkStack::G1CMMarkStack() :
  85   _max_chunk_capacity(0),
  86   _base(NULL),
  87   _chunk_capacity(0) {
  88   set_empty();
  89 }
  90 
  91 bool G1CMMarkStack::resize(size_t new_capacity) {
  92   assert(is_empty(), "Only resize when stack is empty.");
  93   assert(new_capacity <= _max_chunk_capacity,
  94          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  95 
  96   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  97 
  98   if (new_base == NULL) {
  99     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 100     return false;
 101   }
 102   // Release old mapping.
 103   if (_base != NULL) {
 104     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 105   }
 106 
 107   _base = new_base;
 108   _chunk_capacity = new_capacity;
 109   set_empty();
 110 
 111   return true;
 112 }
 113 
 114 size_t G1CMMarkStack::capacity_alignment() {
 115   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 116 }
 117 
 118 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 119   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 120 
 121   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 122 
 123   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 124   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 125 
 126   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 127             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 128             _max_chunk_capacity,
 129             initial_chunk_capacity);
 130 
 131   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 132                 initial_chunk_capacity, _max_chunk_capacity);
 133 
 134   return resize(initial_chunk_capacity);
 135 }
 136 
 137 void G1CMMarkStack::expand() {
 138   if (_chunk_capacity == _max_chunk_capacity) {
 139     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 140     return;
 141   }
 142   size_t old_capacity = _chunk_capacity;
 143   // Double capacity if possible
 144   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 145 
 146   if (resize(new_capacity)) {
 147     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 148                   old_capacity, new_capacity);
 149   } else {
 150     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 151                     old_capacity, new_capacity);
 152   }
 153 }
 154 
 155 G1CMMarkStack::~G1CMMarkStack() {
 156   if (_base != NULL) {
 157     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 158   }
 159 }
 160 
 161 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 162   elem->next = *list;
 163   *list = elem;
 164 }
 165 
 166 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 167   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 168   add_chunk_to_list(&_chunk_list, elem);
 169   _chunks_in_chunk_list++;
 170 }
 171 
 172 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 173   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 174   add_chunk_to_list(&_free_list, elem);
 175 }
 176 
 177 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 178   TaskQueueEntryChunk* result = *list;
 179   if (result != NULL) {
 180     *list = (*list)->next;
 181   }
 182   return result;
 183 }
 184 
 185 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 186   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 187   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 188   if (result != NULL) {
 189     _chunks_in_chunk_list--;
 190   }
 191   return result;
 192 }
 193 
 194 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 195   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 196   return remove_chunk_from_list(&_free_list);
 197 }
 198 
 199 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 200   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 201   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 202   // wraparound of _hwm.
 203   if (_hwm >= _chunk_capacity) {
 204     return NULL;
 205   }
 206 
 207   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 208   if (cur_idx >= _chunk_capacity) {
 209     return NULL;
 210   }
 211 
 212   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 213   result->next = NULL;
 214   return result;
 215 }
 216 
 217 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 218   // Get a new chunk.
 219   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 220 
 221   if (new_chunk == NULL) {
 222     // Did not get a chunk from the free list. Allocate from backing memory.
 223     new_chunk = allocate_new_chunk();
 224 
 225     if (new_chunk == NULL) {
 226       return false;
 227     }
 228   }
 229 
 230   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 231 
 232   add_chunk_to_chunk_list(new_chunk);
 233 
 234   return true;
 235 }
 236 
 237 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 238   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 239 
 240   if (cur == NULL) {
 241     return false;
 242   }
 243 
 244   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 245 
 246   add_chunk_to_free_list(cur);
 247   return true;
 248 }
 249 
 250 void G1CMMarkStack::set_empty() {
 251   _chunks_in_chunk_list = 0;
 252   _hwm = 0;
 253   _chunk_list = NULL;
 254   _free_list = NULL;
 255 }
 256 
 257 G1CMRootRegions::G1CMRootRegions() :
 258   _survivors(NULL), _cm(NULL), _scan_in_progress(false),
 259   _should_abort(false), _claimed_survivor_index(0) { }
 260 
 261 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 262   _survivors = survivors;
 263   _cm = cm;
 264 }
 265 
 266 void G1CMRootRegions::prepare_for_scan() {
 267   assert(!scan_in_progress(), "pre-condition");
 268 
 269   // Currently, only survivors can be root regions.
 270   _claimed_survivor_index = 0;
 271   _scan_in_progress = _survivors->regions()->is_nonempty();
 272   _should_abort = false;
 273 }
 274 
 275 HeapRegion* G1CMRootRegions::claim_next() {
 276   if (_should_abort) {
 277     // If someone has set the should_abort flag, we return NULL to
 278     // force the caller to bail out of their loop.
 279     return NULL;
 280   }
 281 
 282   // Currently, only survivors can be root regions.
 283   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 284 
 285   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 286   if (claimed_index < survivor_regions->length()) {
 287     return survivor_regions->at(claimed_index);
 288   }
 289   return NULL;
 290 }
 291 
 292 uint G1CMRootRegions::num_root_regions() const {
 293   return (uint)_survivors->regions()->length();
 294 }
 295 
 296 void G1CMRootRegions::notify_scan_done() {
 297   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 298   _scan_in_progress = false;
 299   RootRegionScan_lock->notify_all();
 300 }
 301 
 302 void G1CMRootRegions::cancel_scan() {
 303   notify_scan_done();
 304 }
 305 
 306 void G1CMRootRegions::scan_finished() {
 307   assert(scan_in_progress(), "pre-condition");
 308 
 309   // Currently, only survivors can be root regions.
 310   if (!_should_abort) {
 311     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 312     assert((uint)_claimed_survivor_index >= _survivors->length(),
 313            "we should have claimed all survivors, claimed index = %u, length = %u",
 314            (uint)_claimed_survivor_index, _survivors->length());
 315   }
 316 
 317   notify_scan_done();
 318 }
 319 
 320 bool G1CMRootRegions::wait_until_scan_finished() {
 321   if (!scan_in_progress()) {
 322     return false;
 323   }
 324 
 325   {
 326     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 327     while (scan_in_progress()) {
 328       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 329     }
 330   }
 331   return true;
 332 }
 333 
 334 // Returns the maximum number of workers to be used in a concurrent
 335 // phase based on the number of GC workers being used in a STW
 336 // phase.
 337 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 338   return MAX2((num_gc_workers + 2) / 4, 1U);
 339 }
 340 
 341 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 342                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 343                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 344   // _cm_thread set inside the constructor
 345   _g1h(g1h),
 346   _completed_initialization(false),
 347 
 348   _mark_bitmap_1(),
 349   _mark_bitmap_2(),
 350   _prev_mark_bitmap(&_mark_bitmap_1),
 351   _next_mark_bitmap(&_mark_bitmap_2),
 352 
 353   _heap(_g1h->reserved_region()),
 354 
 355   _root_regions(),
 356 
 357   _global_mark_stack(),
 358 
 359   // _finger set in set_non_marking_state
 360 
 361   _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 362   _max_num_tasks(ParallelGCThreads),
 363   // _num_active_tasks set in set_non_marking_state()
 364   // _tasks set inside the constructor
 365 
 366   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 367   _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
 368 
 369   _first_overflow_barrier_sync(),
 370   _second_overflow_barrier_sync(),
 371 
 372   _has_overflown(false),
 373   _concurrent(false),
 374   _has_aborted(false),
 375   _restart_for_overflow(false),
 376   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 377   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 378 
 379   // _verbose_level set below
 380 
 381   _init_times(),
 382   _remark_times(),
 383   _remark_mark_times(),
 384   _remark_weak_ref_times(),
 385   _cleanup_times(),
 386   _total_cleanup_time(0.0),
 387 
 388   _accum_task_vtime(NULL),
 389 
 390   _concurrent_workers(NULL),
 391   _num_concurrent_workers(0),
 392   _max_concurrent_workers(0),
 393 
 394   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 395   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 396 {
 397   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 398   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 399 
 400   // Create & start ConcurrentMark thread.
 401   _cm_thread = new G1ConcurrentMarkThread(this);
 402   if (_cm_thread->osthread() == NULL) {
 403     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 404   }
 405 
 406   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 407 
 408   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 409   satb_qs.set_buffer_size(G1SATBBufferSize);
 410 
 411   _root_regions.init(_g1h->survivor(), this);
 412 
 413   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 414     // Calculate the number of concurrent worker threads by scaling
 415     // the number of parallel GC threads.
 416     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 417     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 418   }
 419 
 420   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 421   if (ConcGCThreads > ParallelGCThreads) {
 422     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 423                     ConcGCThreads, ParallelGCThreads);
 424     return;
 425   }
 426 
 427   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 428   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 429 
 430   _num_concurrent_workers = ConcGCThreads;
 431   _max_concurrent_workers = _num_concurrent_workers;
 432 
 433   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 434   _concurrent_workers->initialize_workers();
 435 
 436   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 437     size_t mark_stack_size =
 438       MIN2(MarkStackSizeMax,
 439           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 440     // Verify that the calculated value for MarkStackSize is in range.
 441     // It would be nice to use the private utility routine from Arguments.
 442     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 443       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 444                       "must be between 1 and " SIZE_FORMAT,
 445                       mark_stack_size, MarkStackSizeMax);
 446       return;
 447     }
 448     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 449   } else {
 450     // Verify MarkStackSize is in range.
 451     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 452       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 453         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 454           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 455                           "must be between 1 and " SIZE_FORMAT,
 456                           MarkStackSize, MarkStackSizeMax);
 457           return;
 458         }
 459       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 460         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 461           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 462                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 463                           MarkStackSize, MarkStackSizeMax);
 464           return;
 465         }
 466       }
 467     }
 468   }
 469 
 470   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 471     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 472   }
 473 
 474   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 475   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 476 
 477   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 478   _num_active_tasks = _max_num_tasks;
 479 
 480   for (uint i = 0; i < _max_num_tasks; ++i) {
 481     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 482     task_queue->initialize();
 483     _task_queues->register_queue(i, task_queue);
 484 
 485     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 486 
 487     _accum_task_vtime[i] = 0.0;
 488   }
 489 
 490   reset_at_marking_complete();
 491   _completed_initialization = true;
 492 }
 493 
 494 void G1ConcurrentMark::reset() {
 495   _has_aborted = false;
 496 
 497   reset_marking_for_restart();
 498 
 499   // Reset all tasks, since different phases will use different number of active
 500   // threads. So, it's easiest to have all of them ready.
 501   for (uint i = 0; i < _max_num_tasks; ++i) {
 502     _tasks[i]->reset(_next_mark_bitmap);
 503   }
 504 
 505   uint max_regions = _g1h->max_regions();
 506   for (uint i = 0; i < max_regions; i++) {
 507     _top_at_rebuild_starts[i] = NULL;
 508     _region_mark_stats[i].clear();
 509   }
 510 }
 511 
 512 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 513   for (uint j = 0; j < _max_num_tasks; ++j) {
 514     _tasks[j]->clear_mark_stats_cache(region_idx);
 515   }
 516   _top_at_rebuild_starts[region_idx] = NULL;
 517   _region_mark_stats[region_idx].clear();
 518 }
 519 
 520 void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 521   uint const region_idx = r->hrm_index();
 522   if (r->is_humongous()) {
 523     assert(r->is_starts_humongous(), "Got humongous continues region here");
 524     uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 525     for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 526       clear_statistics_in_region(j);
 527     }
 528   } else {
 529     clear_statistics_in_region(region_idx);
 530   }
 531 }
 532 
 533 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 534   assert_at_safepoint_on_vm_thread();
 535 
 536   // Need to clear mark bit of the humongous object.
 537   if (_next_mark_bitmap->is_marked(r->bottom())) {
 538     _next_mark_bitmap->clear(r->bottom());
 539   }
 540 
 541   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 542     return;
 543   }
 544 
 545   // Clear any statistics about the region gathered so far.
 546   clear_statistics(r);
 547 }
 548 
 549 void G1ConcurrentMark::reset_marking_for_restart() {
 550   _global_mark_stack.set_empty();
 551 
 552   // Expand the marking stack, if we have to and if we can.
 553   if (has_overflown()) {
 554     _global_mark_stack.expand();
 555 
 556     uint max_regions = _g1h->max_regions();
 557     for (uint i = 0; i < max_regions; i++) {
 558       _region_mark_stats[i].clear_during_overflow();
 559     }
 560   }
 561 
 562   clear_has_overflown();
 563   _finger = _heap.start();
 564 
 565   for (uint i = 0; i < _max_num_tasks; ++i) {
 566     G1CMTaskQueue* queue = _task_queues->queue(i);
 567     queue->set_empty();
 568   }
 569 }
 570 
 571 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 572   assert(active_tasks <= _max_num_tasks, "we should not have more");
 573 
 574   _num_active_tasks = active_tasks;
 575   // Need to update the three data structures below according to the
 576   // number of active threads for this phase.
 577   _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
 578   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 579   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 580 }
 581 
 582 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 583   set_concurrency(active_tasks);
 584 
 585   _concurrent = concurrent;
 586 
 587   if (!concurrent) {
 588     // At this point we should be in a STW phase, and completed marking.
 589     assert_at_safepoint_on_vm_thread();
 590     assert(out_of_regions(),
 591            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 592            p2i(_finger), p2i(_heap.end()));
 593   }
 594 }
 595 
 596 void G1ConcurrentMark::reset_at_marking_complete() {
 597   // We set the global marking state to some default values when we're
 598   // not doing marking.
 599   reset_marking_for_restart();
 600   _num_active_tasks = 0;
 601 }
 602 
 603 G1ConcurrentMark::~G1ConcurrentMark() {
 604   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 605   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 606   // The G1ConcurrentMark instance is never freed.
 607   ShouldNotReachHere();
 608 }
 609 
 610 class G1ClearBitMapTask : public AbstractGangTask {
 611 public:
 612   static size_t chunk_size() { return M; }
 613 
 614 private:
 615   // Heap region closure used for clearing the given mark bitmap.
 616   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 617   private:
 618     G1CMBitMap* _bitmap;
 619     G1ConcurrentMark* _cm;
 620   public:
 621     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 622     }
 623 
 624     virtual bool do_heap_region(HeapRegion* r) {
 625       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 626 
 627       HeapWord* cur = r->bottom();
 628       HeapWord* const end = r->end();
 629 
 630       while (cur < end) {
 631         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 632         _bitmap->clear_range(mr);
 633 
 634         cur += chunk_size_in_words;
 635 
 636         // Abort iteration if after yielding the marking has been aborted.
 637         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 638           return true;
 639         }
 640         // Repeat the asserts from before the start of the closure. We will do them
 641         // as asserts here to minimize their overhead on the product. However, we
 642         // will have them as guarantees at the beginning / end of the bitmap
 643         // clearing to get some checking in the product.
 644         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 645         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 646       }
 647       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 648 
 649       return false;
 650     }
 651   };
 652 
 653   G1ClearBitmapHRClosure _cl;
 654   HeapRegionClaimer _hr_claimer;
 655   bool _suspendible; // If the task is suspendible, workers must join the STS.
 656 
 657 public:
 658   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 659     AbstractGangTask("G1 Clear Bitmap"),
 660     _cl(bitmap, suspendible ? cm : NULL),
 661     _hr_claimer(n_workers),
 662     _suspendible(suspendible)
 663   { }
 664 
 665   void work(uint worker_id) {
 666     SuspendibleThreadSetJoiner sts_join(_suspendible);
 667     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 668   }
 669 
 670   bool is_complete() {
 671     return _cl.is_complete();
 672   }
 673 };
 674 
 675 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 676   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 677 
 678   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 679   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 680 
 681   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 682 
 683   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 684 
 685   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 686   workers->run_task(&cl, num_workers);
 687   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 688 }
 689 
 690 void G1ConcurrentMark::cleanup_for_next_mark() {
 691   // Make sure that the concurrent mark thread looks to still be in
 692   // the current cycle.
 693   guarantee(cm_thread()->during_cycle(), "invariant");
 694 
 695   // We are finishing up the current cycle by clearing the next
 696   // marking bitmap and getting it ready for the next cycle. During
 697   // this time no other cycle can start. So, let's make sure that this
 698   // is the case.
 699   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 700 
 701   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 702 
 703   // Repeat the asserts from above.
 704   guarantee(cm_thread()->during_cycle(), "invariant");
 705   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 706 }
 707 
 708 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 709   assert_at_safepoint_on_vm_thread();
 710   clear_bitmap(_prev_mark_bitmap, workers, false);
 711 }
 712 
 713 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 714   G1CMBitMap* _bitmap;
 715  public:
 716   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 717   }
 718 
 719   virtual bool do_heap_region(HeapRegion* r) {
 720     // This closure can be called concurrently to the mutator, so we must make sure
 721     // that the result of the getNextMarkedWordAddress() call is compared to the
 722     // value passed to it as limit to detect any found bits.
 723     // end never changes in G1.
 724     HeapWord* end = r->end();
 725     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 726   }
 727 };
 728 
 729 bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
 730   CheckBitmapClearHRClosure cl(_next_mark_bitmap);
 731   _g1h->heap_region_iterate(&cl);
 732   return cl.is_complete();
 733 }
 734 
 735 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 736 public:
 737   bool do_heap_region(HeapRegion* r) {
 738     r->note_start_of_marking();
 739     return false;
 740   }
 741 };
 742 
 743 void G1ConcurrentMark::pre_initial_mark() {
 744   // Initialize marking structures. This has to be done in a STW phase.
 745   reset();
 746 
 747   // For each region note start of marking.
 748   NoteStartOfMarkHRClosure startcl;
 749   _g1h->heap_region_iterate(&startcl);
 750 }
 751 
 752 
 753 void G1ConcurrentMark::post_initial_mark() {
 754   // Start Concurrent Marking weak-reference discovery.
 755   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 756   // enable ("weak") refs discovery
 757   rp->enable_discovery();
 758   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 759 
 760   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 761   // This is the start of  the marking cycle, we're expected all
 762   // threads to have SATB queues with active set to false.
 763   satb_mq_set.set_active_all_threads(true, /* new active value */
 764                                      false /* expected_active */);
 765 
 766   _root_regions.prepare_for_scan();
 767 
 768   // update_g1_committed() will be called at the end of an evac pause
 769   // when marking is on. So, it's also called at the end of the
 770   // initial-mark pause to update the heap end, if the heap expands
 771   // during it. No need to call it here.
 772 }
 773 
 774 /*
 775  * Notice that in the next two methods, we actually leave the STS
 776  * during the barrier sync and join it immediately afterwards. If we
 777  * do not do this, the following deadlock can occur: one thread could
 778  * be in the barrier sync code, waiting for the other thread to also
 779  * sync up, whereas another one could be trying to yield, while also
 780  * waiting for the other threads to sync up too.
 781  *
 782  * Note, however, that this code is also used during remark and in
 783  * this case we should not attempt to leave / enter the STS, otherwise
 784  * we'll either hit an assert (debug / fastdebug) or deadlock
 785  * (product). So we should only leave / enter the STS if we are
 786  * operating concurrently.
 787  *
 788  * Because the thread that does the sync barrier has left the STS, it
 789  * is possible to be suspended for a Full GC or an evacuation pause
 790  * could occur. This is actually safe, since the entering the sync
 791  * barrier is one of the last things do_marking_step() does, and it
 792  * doesn't manipulate any data structures afterwards.
 793  */
 794 
 795 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 796   bool barrier_aborted;
 797   {
 798     SuspendibleThreadSetLeaver sts_leave(concurrent());
 799     barrier_aborted = !_first_overflow_barrier_sync.enter();
 800   }
 801 
 802   // at this point everyone should have synced up and not be doing any
 803   // more work
 804 
 805   if (barrier_aborted) {
 806     // If the barrier aborted we ignore the overflow condition and
 807     // just abort the whole marking phase as quickly as possible.
 808     return;
 809   }
 810 }
 811 
 812 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 813   SuspendibleThreadSetLeaver sts_leave(concurrent());
 814   _second_overflow_barrier_sync.enter();
 815 
 816   // at this point everything should be re-initialized and ready to go
 817 }
 818 
 819 class G1CMConcurrentMarkingTask : public AbstractGangTask {
 820   G1ConcurrentMark*     _cm;
 821 
 822 public:
 823   void work(uint worker_id) {
 824     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 825     ResourceMark rm;
 826 
 827     double start_vtime = os::elapsedVTime();
 828 
 829     {
 830       SuspendibleThreadSetJoiner sts_join;
 831 
 832       assert(worker_id < _cm->active_tasks(), "invariant");
 833 
 834       G1CMTask* task = _cm->task(worker_id);
 835       task->record_start_time();
 836       if (!_cm->has_aborted()) {
 837         do {
 838           task->do_marking_step(G1ConcMarkStepDurationMillis,
 839                                 true  /* do_termination */,
 840                                 false /* is_serial*/);
 841 
 842           _cm->do_yield_check();
 843         } while (!_cm->has_aborted() && task->has_aborted());
 844       }
 845       task->record_end_time();
 846       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 847     }
 848 
 849     double end_vtime = os::elapsedVTime();
 850     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 851   }
 852 
 853   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 854       AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 855 
 856   ~G1CMConcurrentMarkingTask() { }
 857 };
 858 
 859 uint G1ConcurrentMark::calc_active_marking_workers() {
 860   uint result = 0;
 861   if (!UseDynamicNumberOfGCThreads ||
 862       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 863        !ForceDynamicNumberOfGCThreads)) {
 864     result = _max_concurrent_workers;
 865   } else {
 866     result =
 867       AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
 868                                                       1, /* Minimum workers */
 869                                                       _num_concurrent_workers,
 870                                                       Threads::number_of_non_daemon_threads());
 871     // Don't scale the result down by scale_concurrent_workers() because
 872     // that scaling has already gone into "_max_concurrent_workers".
 873   }
 874   assert(result > 0 && result <= _max_concurrent_workers,
 875          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 876          _max_concurrent_workers, result);
 877   return result;
 878 }
 879 
 880 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) {
 881   // Currently, only survivors can be root regions.
 882   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 883   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 884 
 885   const uintx interval = PrefetchScanIntervalInBytes;
 886   HeapWord* curr = hr->bottom();
 887   const HeapWord* end = hr->top();
 888   while (curr < end) {
 889     Prefetch::read(curr, interval);
 890     oop obj = oop(curr);
 891     int size = obj->oop_iterate_size(&cl);
 892     assert(size == obj->size(), "sanity");
 893     curr += size;
 894   }
 895 }
 896 
 897 class G1CMRootRegionScanTask : public AbstractGangTask {
 898   G1ConcurrentMark* _cm;
 899 public:
 900   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 901     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 902 
 903   void work(uint worker_id) {
 904     assert(Thread::current()->is_ConcurrentGC_thread(),
 905            "this should only be done by a conc GC thread");
 906 
 907     G1CMRootRegions* root_regions = _cm->root_regions();
 908     HeapRegion* hr = root_regions->claim_next();
 909     while (hr != NULL) {
 910       _cm->scan_root_region(hr, worker_id);
 911       hr = root_regions->claim_next();
 912     }
 913   }
 914 };
 915 
 916 void G1ConcurrentMark::scan_root_regions() {
 917   // scan_in_progress() will have been set to true only if there was
 918   // at least one root region to scan. So, if it's false, we
 919   // should not attempt to do any further work.
 920   if (root_regions()->scan_in_progress()) {
 921     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 922 
 923     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 924                                    // We distribute work on a per-region basis, so starting
 925                                    // more threads than that is useless.
 926                                    root_regions()->num_root_regions());
 927     assert(_num_concurrent_workers <= _max_concurrent_workers,
 928            "Maximum number of marking threads exceeded");
 929 
 930     G1CMRootRegionScanTask task(this);
 931     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 932                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 933     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 934 
 935     // It's possible that has_aborted() is true here without actually
 936     // aborting the survivor scan earlier. This is OK as it's
 937     // mainly used for sanity checking.
 938     root_regions()->scan_finished();
 939   }
 940 }
 941 
 942 void G1ConcurrentMark::concurrent_cycle_start() {
 943   _gc_timer_cm->register_gc_start();
 944 
 945   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 946 
 947   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 948 }
 949 
 950 void G1ConcurrentMark::concurrent_cycle_end() {
 951   _g1h->collector_state()->set_clearing_next_bitmap(false);
 952 
 953   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 954 
 955   if (has_aborted()) {
 956     log_info(gc, marking)("Concurrent Mark Abort");
 957     _gc_tracer_cm->report_concurrent_mode_failure();
 958   }
 959 
 960   _gc_timer_cm->register_gc_end();
 961 
 962   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 963 }
 964 
 965 void G1ConcurrentMark::mark_from_roots() {
 966   _restart_for_overflow = false;
 967 
 968   _num_concurrent_workers = calc_active_marking_workers();
 969 
 970   uint active_workers = MAX2(1U, _num_concurrent_workers);
 971 
 972   // Setting active workers is not guaranteed since fewer
 973   // worker threads may currently exist and more may not be
 974   // available.
 975   active_workers = _concurrent_workers->update_active_workers(active_workers);
 976   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 977 
 978   // Parallel task terminator is set in "set_concurrency_and_phase()"
 979   set_concurrency_and_phase(active_workers, true /* concurrent */);
 980 
 981   G1CMConcurrentMarkingTask marking_task(this);
 982   _concurrent_workers->run_task(&marking_task);
 983   print_stats();
 984 }
 985 
 986 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
 987   G1HeapVerifier* verifier = _g1h->verifier();
 988 
 989   verifier->verify_region_sets_optional();
 990 
 991   if (VerifyDuringGC) {
 992     GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm);
 993 
 994     size_t const BufLen = 512;
 995     char buffer[BufLen];
 996 
 997     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
 998     verifier->verify(type, vo, buffer);
 999   }
1000 
1001   verifier->check_bitmaps(caller);
1002 }
1003 
1004 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
1005   G1CollectedHeap* _g1h;
1006   G1ConcurrentMark* _cm;
1007 
1008   uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
1009 
1010   void update_remset_before_rebuild(HeapRegion * hr) {
1011     G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker();
1012 
1013     size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize;
1014     bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
1015     if (selected_for_rebuild) {
1016       _num_regions_selected_for_rebuild++;
1017     }
1018     _cm->update_top_at_rebuild_start(hr);
1019   }
1020 
1021 public:
1022   G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) :
1023     _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0) { }
1024 
1025   virtual bool do_heap_region(HeapRegion* r) {
1026     update_remset_before_rebuild(r);
1027     return false;
1028   }
1029 
1030   uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1031 };
1032 
1033 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1034   G1CollectedHeap* _g1h;
1035 public:
1036   G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1037 
1038   virtual bool do_heap_region(HeapRegion* r) {
1039     _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r);
1040     return false;
1041   }
1042 };
1043 
1044 void G1ConcurrentMark::remark() {
1045   assert_at_safepoint_on_vm_thread();
1046 
1047   // If a full collection has happened, we should not continue. However we might
1048   // have ended up here as the Remark VM operation has been scheduled already.
1049   if (has_aborted()) {
1050     return;
1051   }
1052 
1053   G1Policy* g1p = _g1h->g1_policy();
1054   g1p->record_concurrent_mark_remark_start();
1055 
1056   double start = os::elapsedTime();
1057 
1058   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1059 
1060   {
1061     GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1062     finalize_marking();
1063   }
1064 
1065   double mark_work_end = os::elapsedTime();
1066 
1067   bool const mark_finished = !has_overflown();
1068   if (mark_finished) {
1069     weak_refs_work(false /* clear_all_soft_refs */);
1070 
1071     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1072     // We're done with marking.
1073     // This is the end of the marking cycle, we're expected all
1074     // threads to have SATB queues with active set to true.
1075     satb_mq_set.set_active_all_threads(false, /* new active value */
1076                                        true /* expected_active */);
1077 
1078     {
1079       GCTraceTime(Debug, gc, phases)("Flush Task Caches");
1080       flush_all_task_caches();
1081     }
1082 
1083     {
1084       GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild");
1085       G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this);
1086       _g1h->heap_region_iterate(&cl);
1087       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1088                                       _g1h->num_regions(), cl.num_selected_for_rebuild());
1089     }
1090 
1091     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UseNextMarking, "Remark after");
1092 
1093     assert(!restart_for_overflow(), "sanity");
1094     // Completely reset the marking state since marking completed
1095     reset_at_marking_complete();
1096   } else {
1097     // We overflowed.  Restart concurrent marking.
1098     _restart_for_overflow = true;
1099 
1100     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1101 
1102     // Clear the marking state because we will be restarting
1103     // marking due to overflowing the global mark stack.
1104     reset_marking_for_restart();
1105   }
1106 
1107   {
1108     GCTraceTime(Debug, gc, phases)("Report Object Count");
1109     report_object_count();
1110   }
1111 
1112   // Statistics
1113   double now = os::elapsedTime();
1114   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1115   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1116   _remark_times.add((now - start) * 1000.0);
1117 
1118   g1p->record_concurrent_mark_remark_end();
1119 }
1120 
1121 class G1CleanupTask : public AbstractGangTask {
1122   // Per-region work during the Cleanup pause.
1123   class G1CleanupRegionsClosure : public HeapRegionClosure {
1124     G1CollectedHeap* _g1h;
1125     size_t _freed_bytes;
1126     FreeRegionList* _local_cleanup_list;
1127     uint _old_regions_removed;
1128     uint _humongous_regions_removed;
1129     HRRSCleanupTask* _hrrs_cleanup_task;
1130 
1131   public:
1132     G1CleanupRegionsClosure(G1CollectedHeap* g1,
1133                             FreeRegionList* local_cleanup_list,
1134                             HRRSCleanupTask* hrrs_cleanup_task) :
1135       _g1h(g1),
1136       _freed_bytes(0),
1137       _local_cleanup_list(local_cleanup_list),
1138       _old_regions_removed(0),
1139       _humongous_regions_removed(0),
1140       _hrrs_cleanup_task(hrrs_cleanup_task) { }
1141 
1142     size_t freed_bytes() { return _freed_bytes; }
1143     const uint old_regions_removed() { return _old_regions_removed; }
1144     const uint humongous_regions_removed() { return _humongous_regions_removed; }
1145 
1146     bool do_heap_region(HeapRegion *hr) {
1147       hr->note_end_of_marking();
1148 
1149       if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1150         _freed_bytes += hr->used();
1151         hr->set_containing_set(NULL);
1152         if (hr->is_humongous()) {
1153           _humongous_regions_removed++;
1154           _g1h->free_humongous_region(hr, _local_cleanup_list);
1155         } else {
1156           _old_regions_removed++;
1157           _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */);
1158         }
1159         hr->clear_cardtable();
1160         _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1161         log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1162       } else {
1163         hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1164       }
1165 
1166       return false;
1167     }
1168   };
1169 
1170   G1CollectedHeap* _g1h;
1171   FreeRegionList* _cleanup_list;
1172   HeapRegionClaimer _hrclaimer;
1173 
1174 public:
1175   G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1176     AbstractGangTask("G1 Cleanup"),
1177     _g1h(g1h),
1178     _cleanup_list(cleanup_list),
1179     _hrclaimer(n_workers) {
1180 
1181     HeapRegionRemSet::reset_for_cleanup_tasks();
1182   }
1183 
1184   void work(uint worker_id) {
1185     FreeRegionList local_cleanup_list("Local Cleanup List");
1186     HRRSCleanupTask hrrs_cleanup_task;
1187     G1CleanupRegionsClosure cl(_g1h,
1188                                &local_cleanup_list,
1189                                &hrrs_cleanup_task);
1190     _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1191     assert(cl.is_complete(), "Shouldn't have aborted!");
1192 
1193     // Now update the old/humongous region sets
1194     _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1195     {
1196       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1197       _g1h->decrement_summary_bytes(cl.freed_bytes());
1198 
1199       _cleanup_list->add_ordered(&local_cleanup_list);
1200       assert(local_cleanup_list.is_empty(), "post-condition");
1201 
1202       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1203     }
1204   }
1205 };
1206 
1207 void G1ConcurrentMark::reclaim_empty_regions() {
1208   WorkGang* workers = _g1h->workers();
1209   FreeRegionList empty_regions_list("Empty Regions After Mark List");
1210 
1211   G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers());
1212   workers->run_task(&cl);
1213 
1214   if (!empty_regions_list.is_empty()) {
1215     log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1216     // Now print the empty regions list.
1217     G1HRPrinter* hrp = _g1h->hr_printer();
1218     if (hrp->is_active()) {
1219       FreeRegionListIterator iter(&empty_regions_list);
1220       while (iter.more_available()) {
1221         HeapRegion* hr = iter.get_next();
1222         hrp->cleanup(hr);
1223       }
1224     }
1225     // And actually make them available.
1226     _g1h->prepend_to_freelist(&empty_regions_list);
1227   }
1228 }
1229 
1230 void G1ConcurrentMark::cleanup() {
1231   assert_at_safepoint_on_vm_thread();
1232 
1233   // If a full collection has happened, we shouldn't do this.
1234   if (has_aborted()) {
1235     return;
1236   }
1237 
1238   G1Policy* g1p = _g1h->g1_policy();
1239   g1p->record_concurrent_mark_cleanup_start();
1240 
1241   double start = os::elapsedTime();
1242 
1243   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UseNextMarking, "Cleanup before");
1244 
1245   {
1246     GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild");
1247     G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1248     _g1h->heap_region_iterate(&cl);
1249   }
1250 
1251   if (log_is_enabled(Trace, gc, liveness)) {
1252     G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1253     _g1h->heap_region_iterate(&cl);
1254   }
1255 
1256   // Install newly created mark bitmap as "prev".
1257   swap_mark_bitmaps();
1258   {
1259     GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions");
1260     reclaim_empty_regions();
1261   }
1262 
1263   // Cleanup will have freed any regions completely full of garbage.
1264   // Update the soft reference policy with the new heap occupancy.
1265   Universe::update_heap_info_at_gc();
1266 
1267   // Clean out dead classes and update Metaspace sizes.
1268   if (ClassUnloadingWithConcurrentMark) {
1269     GCTraceTime(Debug, gc, phases)("Purge Metaspace");
1270     ClassLoaderDataGraph::purge();
1271   }
1272   MetaspaceGC::compute_new_size();
1273 
1274   // We reclaimed old regions so we should calculate the sizes to make
1275   // sure we update the old gen/space data.
1276   _g1h->g1mm()->update_sizes();
1277 
1278   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1279 
1280   // We need to make this be a "collection" so any collection pause that
1281   // races with it goes around and waits for Cleanup to finish.
1282   _g1h->increment_total_collections();
1283 
1284   // Local statistics
1285   double recent_cleanup_time = (os::elapsedTime() - start);
1286   _total_cleanup_time += recent_cleanup_time;
1287   _cleanup_times.add(recent_cleanup_time);
1288 
1289   {
1290     GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup");
1291     _g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1292   }
1293 }
1294 
1295 // Supporting Object and Oop closures for reference discovery
1296 // and processing in during marking
1297 
1298 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1299   HeapWord* addr = (HeapWord*)obj;
1300   return addr != NULL &&
1301          (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj));
1302 }
1303 
1304 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1305 // Uses the G1CMTask associated with a worker thread (for serial reference
1306 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1307 // trace referent objects.
1308 //
1309 // Using the G1CMTask and embedded local queues avoids having the worker
1310 // threads operating on the global mark stack. This reduces the risk
1311 // of overflowing the stack - which we would rather avoid at this late
1312 // state. Also using the tasks' local queues removes the potential
1313 // of the workers interfering with each other that could occur if
1314 // operating on the global stack.
1315 
1316 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1317   G1ConcurrentMark* _cm;
1318   G1CMTask*         _task;
1319   int               _ref_counter_limit;
1320   int               _ref_counter;
1321   bool              _is_serial;
1322 public:
1323   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1324     _cm(cm), _task(task), _is_serial(is_serial),
1325     _ref_counter_limit(G1RefProcDrainInterval) {
1326     assert(_ref_counter_limit > 0, "sanity");
1327     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1328     _ref_counter = _ref_counter_limit;
1329   }
1330 
1331   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1332   virtual void do_oop(      oop* p) { do_oop_work(p); }
1333 
1334   template <class T> void do_oop_work(T* p) {
1335     if (!_cm->has_overflown()) {
1336       _task->deal_with_reference(p);
1337       _ref_counter--;
1338 
1339       if (_ref_counter == 0) {
1340         // We have dealt with _ref_counter_limit references, pushing them
1341         // and objects reachable from them on to the local stack (and
1342         // possibly the global stack). Call G1CMTask::do_marking_step() to
1343         // process these entries.
1344         //
1345         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1346         // there's nothing more to do (i.e. we're done with the entries that
1347         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1348         // above) or we overflow.
1349         //
1350         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1351         // flag while there may still be some work to do. (See the comment at
1352         // the beginning of G1CMTask::do_marking_step() for those conditions -
1353         // one of which is reaching the specified time target.) It is only
1354         // when G1CMTask::do_marking_step() returns without setting the
1355         // has_aborted() flag that the marking step has completed.
1356         do {
1357           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1358           _task->do_marking_step(mark_step_duration_ms,
1359                                  false      /* do_termination */,
1360                                  _is_serial);
1361         } while (_task->has_aborted() && !_cm->has_overflown());
1362         _ref_counter = _ref_counter_limit;
1363       }
1364     }
1365   }
1366 };
1367 
1368 // 'Drain' oop closure used by both serial and parallel reference processing.
1369 // Uses the G1CMTask associated with a given worker thread (for serial
1370 // reference processing the G1CMtask for worker 0 is used). Calls the
1371 // do_marking_step routine, with an unbelievably large timeout value,
1372 // to drain the marking data structures of the remaining entries
1373 // added by the 'keep alive' oop closure above.
1374 
1375 class G1CMDrainMarkingStackClosure : public VoidClosure {
1376   G1ConcurrentMark* _cm;
1377   G1CMTask*         _task;
1378   bool              _is_serial;
1379  public:
1380   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1381     _cm(cm), _task(task), _is_serial(is_serial) {
1382     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1383   }
1384 
1385   void do_void() {
1386     do {
1387       // We call G1CMTask::do_marking_step() to completely drain the local
1388       // and global marking stacks of entries pushed by the 'keep alive'
1389       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1390       //
1391       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1392       // if there's nothing more to do (i.e. we've completely drained the
1393       // entries that were pushed as a a result of applying the 'keep alive'
1394       // closure to the entries on the discovered ref lists) or we overflow
1395       // the global marking stack.
1396       //
1397       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1398       // flag while there may still be some work to do. (See the comment at
1399       // the beginning of G1CMTask::do_marking_step() for those conditions -
1400       // one of which is reaching the specified time target.) It is only
1401       // when G1CMTask::do_marking_step() returns without setting the
1402       // has_aborted() flag that the marking step has completed.
1403 
1404       _task->do_marking_step(1000000000.0 /* something very large */,
1405                              true         /* do_termination */,
1406                              _is_serial);
1407     } while (_task->has_aborted() && !_cm->has_overflown());
1408   }
1409 };
1410 
1411 // Implementation of AbstractRefProcTaskExecutor for parallel
1412 // reference processing at the end of G1 concurrent marking
1413 
1414 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1415 private:
1416   G1CollectedHeap*  _g1h;
1417   G1ConcurrentMark* _cm;
1418   WorkGang*         _workers;
1419   uint              _active_workers;
1420 
1421 public:
1422   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1423                           G1ConcurrentMark* cm,
1424                           WorkGang* workers,
1425                           uint n_workers) :
1426     _g1h(g1h), _cm(cm),
1427     _workers(workers), _active_workers(n_workers) { }
1428 
1429   // Executes the given task using concurrent marking worker threads.
1430   virtual void execute(ProcessTask& task);
1431   virtual void execute(EnqueueTask& task);
1432 };
1433 
1434 class G1CMRefProcTaskProxy : public AbstractGangTask {
1435   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1436   ProcessTask&      _proc_task;
1437   G1CollectedHeap*  _g1h;
1438   G1ConcurrentMark* _cm;
1439 
1440 public:
1441   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1442                        G1CollectedHeap* g1h,
1443                        G1ConcurrentMark* cm) :
1444     AbstractGangTask("Process reference objects in parallel"),
1445     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1446     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1447     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1448   }
1449 
1450   virtual void work(uint worker_id) {
1451     ResourceMark rm;
1452     HandleMark hm;
1453     G1CMTask* task = _cm->task(worker_id);
1454     G1CMIsAliveClosure g1_is_alive(_g1h);
1455     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1456     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1457 
1458     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1459   }
1460 };
1461 
1462 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1463   assert(_workers != NULL, "Need parallel worker threads.");
1464   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1465 
1466   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1467 
1468   // We need to reset the concurrency level before each
1469   // proxy task execution, so that the termination protocol
1470   // and overflow handling in G1CMTask::do_marking_step() knows
1471   // how many workers to wait for.
1472   _cm->set_concurrency(_active_workers);
1473   _workers->run_task(&proc_task_proxy);
1474 }
1475 
1476 class G1CMRefEnqueueTaskProxy : public AbstractGangTask {
1477   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1478   EnqueueTask& _enq_task;
1479 
1480 public:
1481   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1482     AbstractGangTask("Enqueue reference objects in parallel"),
1483     _enq_task(enq_task) { }
1484 
1485   virtual void work(uint worker_id) {
1486     _enq_task.work(worker_id);
1487   }
1488 };
1489 
1490 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1491   assert(_workers != NULL, "Need parallel worker threads.");
1492   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1493 
1494   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1495 
1496   // Not strictly necessary but...
1497   //
1498   // We need to reset the concurrency level before each
1499   // proxy task execution, so that the termination protocol
1500   // and overflow handling in G1CMTask::do_marking_step() knows
1501   // how many workers to wait for.
1502   _cm->set_concurrency(_active_workers);
1503   _workers->run_task(&enq_task_proxy);
1504 }
1505 
1506 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1507   ResourceMark rm;
1508   HandleMark   hm;
1509 
1510   // Is alive closure.
1511   G1CMIsAliveClosure g1_is_alive(_g1h);
1512 
1513   // Inner scope to exclude the cleaning of the string and symbol
1514   // tables from the displayed time.
1515   {
1516     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1517 
1518     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1519 
1520     // See the comment in G1CollectedHeap::ref_processing_init()
1521     // about how reference processing currently works in G1.
1522 
1523     // Set the soft reference policy
1524     rp->setup_policy(clear_all_soft_refs);
1525     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1526 
1527     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1528     // in serial reference processing. Note these closures are also
1529     // used for serially processing (by the the current thread) the
1530     // JNI references during parallel reference processing.
1531     //
1532     // These closures do not need to synchronize with the worker
1533     // threads involved in parallel reference processing as these
1534     // instances are executed serially by the current thread (e.g.
1535     // reference processing is not multi-threaded and is thus
1536     // performed by the current thread instead of a gang worker).
1537     //
1538     // The gang tasks involved in parallel reference processing create
1539     // their own instances of these closures, which do their own
1540     // synchronization among themselves.
1541     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1542     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1543 
1544     // We need at least one active thread. If reference processing
1545     // is not multi-threaded we use the current (VMThread) thread,
1546     // otherwise we use the work gang from the G1CollectedHeap and
1547     // we utilize all the worker threads we can.
1548     bool processing_is_mt = rp->processing_is_mt();
1549     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1550     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1551 
1552     // Parallel processing task executor.
1553     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1554                                               _g1h->workers(), active_workers);
1555     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1556 
1557     // Set the concurrency level. The phase was already set prior to
1558     // executing the remark task.
1559     set_concurrency(active_workers);
1560 
1561     // Set the degree of MT processing here.  If the discovery was done MT,
1562     // the number of threads involved during discovery could differ from
1563     // the number of active workers.  This is OK as long as the discovered
1564     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1565     rp->set_active_mt_degree(active_workers);
1566 
1567     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1568 
1569     // Process the weak references.
1570     const ReferenceProcessorStats& stats =
1571         rp->process_discovered_references(&g1_is_alive,
1572                                           &g1_keep_alive,
1573                                           &g1_drain_mark_stack,
1574                                           executor,
1575                                           &pt);
1576     _gc_tracer_cm->report_gc_reference_stats(stats);
1577     pt.print_all_references();
1578 
1579     // The do_oop work routines of the keep_alive and drain_marking_stack
1580     // oop closures will set the has_overflown flag if we overflow the
1581     // global marking stack.
1582 
1583     assert(has_overflown() || _global_mark_stack.is_empty(),
1584            "Mark stack should be empty (unless it has overflown)");
1585 
1586     assert(rp->num_q() == active_workers, "why not");
1587 
1588     rp->enqueue_discovered_references(executor, &pt);
1589 
1590     rp->verify_no_references_recorded();
1591 
1592     pt.print_enqueue_phase();
1593 
1594     assert(!rp->discovery_enabled(), "Post condition");
1595   }
1596 
1597   assert(has_overflown() || _global_mark_stack.is_empty(),
1598          "Mark stack should be empty (unless it has overflown)");
1599 
1600   {
1601     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1602     WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
1603   }
1604 
1605   if (has_overflown()) {
1606     // We can not trust g1_is_alive if the marking stack overflowed
1607     return;
1608   }
1609 
1610   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1611 
1612   // Unload Klasses, String, Symbols, Code Cache, etc.
1613   if (ClassUnloadingWithConcurrentMark) {
1614     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1615     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1616     _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1617   } else {
1618     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1619     // No need to clean string table and symbol table as they are treated as strong roots when
1620     // class unloading is disabled.
1621     _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1622   }
1623 }
1624 
1625 void G1ConcurrentMark::report_object_count() {
1626   G1CMIsAliveClosure is_alive(_g1h);
1627   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1628 }
1629 
1630 void G1ConcurrentMark::swap_mark_bitmaps() {
1631   G1CMBitMap* temp = _prev_mark_bitmap;
1632   _prev_mark_bitmap = _next_mark_bitmap;
1633   _next_mark_bitmap = temp;
1634   _g1h->collector_state()->set_clearing_next_bitmap(true);
1635 }
1636 
1637 // Closure for marking entries in SATB buffers.
1638 class G1CMSATBBufferClosure : public SATBBufferClosure {
1639 private:
1640   G1CMTask* _task;
1641   G1CollectedHeap* _g1h;
1642 
1643   // This is very similar to G1CMTask::deal_with_reference, but with
1644   // more relaxed requirements for the argument, so this must be more
1645   // circumspect about treating the argument as an object.
1646   void do_entry(void* entry) const {
1647     _task->increment_refs_reached();
1648     oop const obj = static_cast<oop>(entry);
1649     _task->make_reference_grey(obj);
1650   }
1651 
1652 public:
1653   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1654     : _task(task), _g1h(g1h) { }
1655 
1656   virtual void do_buffer(void** buffer, size_t size) {
1657     for (size_t i = 0; i < size; ++i) {
1658       do_entry(buffer[i]);
1659     }
1660   }
1661 };
1662 
1663 class G1RemarkThreadsClosure : public ThreadClosure {
1664   G1CMSATBBufferClosure _cm_satb_cl;
1665   G1CMOopClosure _cm_cl;
1666   MarkingCodeBlobClosure _code_cl;
1667   int _thread_parity;
1668 
1669  public:
1670   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1671     _cm_satb_cl(task, g1h),
1672     _cm_cl(g1h, task),
1673     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1674     _thread_parity(Threads::thread_claim_parity()) {}
1675 
1676   void do_thread(Thread* thread) {
1677     if (thread->is_Java_thread()) {
1678       if (thread->claim_oops_do(true, _thread_parity)) {
1679         JavaThread* jt = (JavaThread*)thread;
1680 
1681         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1682         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1683         // * Alive if on the stack of an executing method
1684         // * Weakly reachable otherwise
1685         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1686         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1687         jt->nmethods_do(&_code_cl);
1688 
1689         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1690       }
1691     } else if (thread->is_VM_thread()) {
1692       if (thread->claim_oops_do(true, _thread_parity)) {
1693         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1694       }
1695     }
1696   }
1697 };
1698 
1699 class G1CMRemarkTask : public AbstractGangTask {
1700   G1ConcurrentMark* _cm;
1701 public:
1702   void work(uint worker_id) {
1703     G1CMTask* task = _cm->task(worker_id);
1704     task->record_start_time();
1705     {
1706       ResourceMark rm;
1707       HandleMark hm;
1708 
1709       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1710       Threads::threads_do(&threads_f);
1711     }
1712 
1713     do {
1714       task->do_marking_step(1000000000.0 /* something very large */,
1715                             true         /* do_termination       */,
1716                             false        /* is_serial            */);
1717     } while (task->has_aborted() && !_cm->has_overflown());
1718     // If we overflow, then we do not want to restart. We instead
1719     // want to abort remark and do concurrent marking again.
1720     task->record_end_time();
1721   }
1722 
1723   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1724     AbstractGangTask("Par Remark"), _cm(cm) {
1725     _cm->terminator()->reset_for_reuse(active_workers);
1726   }
1727 };
1728 
1729 void G1ConcurrentMark::finalize_marking() {
1730   ResourceMark rm;
1731   HandleMark   hm;
1732 
1733   _g1h->ensure_parsability(false);
1734 
1735   // this is remark, so we'll use up all active threads
1736   uint active_workers = _g1h->workers()->active_workers();
1737   set_concurrency_and_phase(active_workers, false /* concurrent */);
1738   // Leave _parallel_marking_threads at it's
1739   // value originally calculated in the G1ConcurrentMark
1740   // constructor and pass values of the active workers
1741   // through the gang in the task.
1742 
1743   {
1744     StrongRootsScope srs(active_workers);
1745 
1746     G1CMRemarkTask remarkTask(this, active_workers);
1747     // We will start all available threads, even if we decide that the
1748     // active_workers will be fewer. The extra ones will just bail out
1749     // immediately.
1750     _g1h->workers()->run_task(&remarkTask);
1751   }
1752 
1753   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1754   guarantee(has_overflown() ||
1755             satb_mq_set.completed_buffers_num() == 0,
1756             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1757             BOOL_TO_STR(has_overflown()),
1758             satb_mq_set.completed_buffers_num());
1759 
1760   print_stats();
1761 }
1762 
1763 void G1ConcurrentMark::flush_all_task_caches() {
1764   size_t hits = 0;
1765   size_t misses = 0;
1766   for (uint i = 0; i < _max_num_tasks; i++) {
1767     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1768     hits += stats.first;
1769     misses += stats.second;
1770   }
1771   size_t sum = hits + misses;
1772   log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1773                        hits, misses, percent_of(hits, sum));
1774 }
1775 
1776 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1777   _prev_mark_bitmap->clear_range(mr);
1778 }
1779 
1780 HeapRegion*
1781 G1ConcurrentMark::claim_region(uint worker_id) {
1782   // "checkpoint" the finger
1783   HeapWord* finger = _finger;
1784 
1785   while (finger < _heap.end()) {
1786     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1787 
1788     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1789     // Make sure that the reads below do not float before loading curr_region.
1790     OrderAccess::loadload();
1791     // Above heap_region_containing may return NULL as we always scan claim
1792     // until the end of the heap. In this case, just jump to the next region.
1793     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1794 
1795     // Is the gap between reading the finger and doing the CAS too long?
1796     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1797     if (res == finger && curr_region != NULL) {
1798       // we succeeded
1799       HeapWord*   bottom        = curr_region->bottom();
1800       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1801 
1802       // notice that _finger == end cannot be guaranteed here since,
1803       // someone else might have moved the finger even further
1804       assert(_finger >= end, "the finger should have moved forward");
1805 
1806       if (limit > bottom) {
1807         return curr_region;
1808       } else {
1809         assert(limit == bottom,
1810                "the region limit should be at bottom");
1811         // we return NULL and the caller should try calling
1812         // claim_region() again.
1813         return NULL;
1814       }
1815     } else {
1816       assert(_finger > finger, "the finger should have moved forward");
1817       // read it again
1818       finger = _finger;
1819     }
1820   }
1821 
1822   return NULL;
1823 }
1824 
1825 #ifndef PRODUCT
1826 class VerifyNoCSetOops {
1827   G1CollectedHeap* _g1h;
1828   const char* _phase;
1829   int _info;
1830 
1831 public:
1832   VerifyNoCSetOops(const char* phase, int info = -1) :
1833     _g1h(G1CollectedHeap::heap()),
1834     _phase(phase),
1835     _info(info)
1836   { }
1837 
1838   void operator()(G1TaskQueueEntry task_entry) const {
1839     if (task_entry.is_array_slice()) {
1840       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1841       return;
1842     }
1843     guarantee(oopDesc::is_oop(task_entry.obj()),
1844               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1845               p2i(task_entry.obj()), _phase, _info);
1846     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1847               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1848               p2i(task_entry.obj()), _phase, _info);
1849   }
1850 };
1851 
1852 void G1ConcurrentMark::verify_no_cset_oops() {
1853   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1854   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1855     return;
1856   }
1857 
1858   // Verify entries on the global mark stack
1859   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1860 
1861   // Verify entries on the task queues
1862   for (uint i = 0; i < _max_num_tasks; ++i) {
1863     G1CMTaskQueue* queue = _task_queues->queue(i);
1864     queue->iterate(VerifyNoCSetOops("Queue", i));
1865   }
1866 
1867   // Verify the global finger
1868   HeapWord* global_finger = finger();
1869   if (global_finger != NULL && global_finger < _heap.end()) {
1870     // Since we always iterate over all regions, we might get a NULL HeapRegion
1871     // here.
1872     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1873     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1874               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1875               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1876   }
1877 
1878   // Verify the task fingers
1879   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1880   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1881     G1CMTask* task = _tasks[i];
1882     HeapWord* task_finger = task->finger();
1883     if (task_finger != NULL && task_finger < _heap.end()) {
1884       // See above note on the global finger verification.
1885       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1886       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1887                 !task_hr->in_collection_set(),
1888                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1889                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1890     }
1891   }
1892 }
1893 #endif // PRODUCT
1894 
1895 void G1ConcurrentMark::rebuild_rem_set_concurrently() {
1896   _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
1897 }
1898 
1899 void G1ConcurrentMark::print_stats() {
1900   if (!log_is_enabled(Debug, gc, stats)) {
1901     return;
1902   }
1903   log_debug(gc, stats)("---------------------------------------------------------------------");
1904   for (size_t i = 0; i < _num_active_tasks; ++i) {
1905     _tasks[i]->print_stats();
1906     log_debug(gc, stats)("---------------------------------------------------------------------");
1907   }
1908 }
1909 
1910 void G1ConcurrentMark::concurrent_cycle_abort() {
1911   if (!cm_thread()->during_cycle() || _has_aborted) {
1912     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1913     return;
1914   }
1915 
1916   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1917   // concurrent bitmap clearing.
1918   {
1919     GCTraceTime(Debug, gc)("Clear Next Bitmap");
1920     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1921   }
1922   // Note we cannot clear the previous marking bitmap here
1923   // since VerifyDuringGC verifies the objects marked during
1924   // a full GC against the previous bitmap.
1925 
1926   // Empty mark stack
1927   reset_marking_for_restart();
1928   for (uint i = 0; i < _max_num_tasks; ++i) {
1929     _tasks[i]->clear_region_fields();
1930   }
1931   _first_overflow_barrier_sync.abort();
1932   _second_overflow_barrier_sync.abort();
1933   _has_aborted = true;
1934 
1935   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1936   satb_mq_set.abandon_partial_marking();
1937   // This can be called either during or outside marking, we'll read
1938   // the expected_active value from the SATB queue set.
1939   satb_mq_set.set_active_all_threads(
1940                                  false, /* new active value */
1941                                  satb_mq_set.is_active() /* expected_active */);
1942 }
1943 
1944 static void print_ms_time_info(const char* prefix, const char* name,
1945                                NumberSeq& ns) {
1946   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1947                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1948   if (ns.num() > 0) {
1949     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1950                            prefix, ns.sd(), ns.maximum());
1951   }
1952 }
1953 
1954 void G1ConcurrentMark::print_summary_info() {
1955   Log(gc, marking) log;
1956   if (!log.is_trace()) {
1957     return;
1958   }
1959 
1960   log.trace(" Concurrent marking:");
1961   print_ms_time_info("  ", "init marks", _init_times);
1962   print_ms_time_info("  ", "remarks", _remark_times);
1963   {
1964     print_ms_time_info("     ", "final marks", _remark_mark_times);
1965     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
1966 
1967   }
1968   print_ms_time_info("  ", "cleanups", _cleanup_times);
1969   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
1970             _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
1971   log.trace("  Total stop_world time = %8.2f s.",
1972             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
1973   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
1974             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
1975 }
1976 
1977 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
1978   _concurrent_workers->print_worker_threads_on(st);
1979 }
1980 
1981 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
1982   _concurrent_workers->threads_do(tc);
1983 }
1984 
1985 void G1ConcurrentMark::print_on_error(outputStream* st) const {
1986   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
1987                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
1988   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
1989   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
1990 }
1991 
1992 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
1993   ReferenceProcessor* result = g1h->ref_processor_cm();
1994   assert(result != NULL, "CM reference processor should not be NULL");
1995   return result;
1996 }
1997 
1998 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
1999                                G1CMTask* task)
2000   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2001     _g1h(g1h), _task(task)
2002 { }
2003 
2004 void G1CMTask::setup_for_region(HeapRegion* hr) {
2005   assert(hr != NULL,
2006         "claim_region() should have filtered out NULL regions");
2007   _curr_region  = hr;
2008   _finger       = hr->bottom();
2009   update_region_limit();
2010 }
2011 
2012 void G1CMTask::update_region_limit() {
2013   HeapRegion* hr            = _curr_region;
2014   HeapWord* bottom          = hr->bottom();
2015   HeapWord* limit           = hr->next_top_at_mark_start();
2016 
2017   if (limit == bottom) {
2018     // The region was collected underneath our feet.
2019     // We set the finger to bottom to ensure that the bitmap
2020     // iteration that will follow this will not do anything.
2021     // (this is not a condition that holds when we set the region up,
2022     // as the region is not supposed to be empty in the first place)
2023     _finger = bottom;
2024   } else if (limit >= _region_limit) {
2025     assert(limit >= _finger, "peace of mind");
2026   } else {
2027     assert(limit < _region_limit, "only way to get here");
2028     // This can happen under some pretty unusual circumstances.  An
2029     // evacuation pause empties the region underneath our feet (NTAMS
2030     // at bottom). We then do some allocation in the region (NTAMS
2031     // stays at bottom), followed by the region being used as a GC
2032     // alloc region (NTAMS will move to top() and the objects
2033     // originally below it will be grayed). All objects now marked in
2034     // the region are explicitly grayed, if below the global finger,
2035     // and we do not need in fact to scan anything else. So, we simply
2036     // set _finger to be limit to ensure that the bitmap iteration
2037     // doesn't do anything.
2038     _finger = limit;
2039   }
2040 
2041   _region_limit = limit;
2042 }
2043 
2044 void G1CMTask::giveup_current_region() {
2045   assert(_curr_region != NULL, "invariant");
2046   clear_region_fields();
2047 }
2048 
2049 void G1CMTask::clear_region_fields() {
2050   // Values for these three fields that indicate that we're not
2051   // holding on to a region.
2052   _curr_region   = NULL;
2053   _finger        = NULL;
2054   _region_limit  = NULL;
2055 }
2056 
2057 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2058   if (cm_oop_closure == NULL) {
2059     assert(_cm_oop_closure != NULL, "invariant");
2060   } else {
2061     assert(_cm_oop_closure == NULL, "invariant");
2062   }
2063   _cm_oop_closure = cm_oop_closure;
2064 }
2065 
2066 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2067   guarantee(next_mark_bitmap != NULL, "invariant");
2068   _next_mark_bitmap              = next_mark_bitmap;
2069   clear_region_fields();
2070 
2071   _calls                         = 0;
2072   _elapsed_time_ms               = 0.0;
2073   _termination_time_ms           = 0.0;
2074   _termination_start_time_ms     = 0.0;
2075 
2076   _mark_stats_cache.reset();
2077 }
2078 
2079 bool G1CMTask::should_exit_termination() {
2080   regular_clock_call();
2081   // This is called when we are in the termination protocol. We should
2082   // quit if, for some reason, this task wants to abort or the global
2083   // stack is not empty (this means that we can get work from it).
2084   return !_cm->mark_stack_empty() || has_aborted();
2085 }
2086 
2087 void G1CMTask::reached_limit() {
2088   assert(_words_scanned >= _words_scanned_limit ||
2089          _refs_reached >= _refs_reached_limit ,
2090          "shouldn't have been called otherwise");
2091   regular_clock_call();
2092 }
2093 
2094 void G1CMTask::regular_clock_call() {
2095   if (has_aborted()) {
2096     return;
2097   }
2098 
2099   // First, we need to recalculate the words scanned and refs reached
2100   // limits for the next clock call.
2101   recalculate_limits();
2102 
2103   // During the regular clock call we do the following
2104 
2105   // (1) If an overflow has been flagged, then we abort.
2106   if (_cm->has_overflown()) {
2107     set_has_aborted();
2108     return;
2109   }
2110 
2111   // If we are not concurrent (i.e. we're doing remark) we don't need
2112   // to check anything else. The other steps are only needed during
2113   // the concurrent marking phase.
2114   if (!_cm->concurrent()) {
2115     return;
2116   }
2117 
2118   // (2) If marking has been aborted for Full GC, then we also abort.
2119   if (_cm->has_aborted()) {
2120     set_has_aborted();
2121     return;
2122   }
2123 
2124   double curr_time_ms = os::elapsedVTime() * 1000.0;
2125 
2126   // (4) We check whether we should yield. If we have to, then we abort.
2127   if (SuspendibleThreadSet::should_yield()) {
2128     // We should yield. To do this we abort the task. The caller is
2129     // responsible for yielding.
2130     set_has_aborted();
2131     return;
2132   }
2133 
2134   // (5) We check whether we've reached our time quota. If we have,
2135   // then we abort.
2136   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2137   if (elapsed_time_ms > _time_target_ms) {
2138     set_has_aborted();
2139     _has_timed_out = true;
2140     return;
2141   }
2142 
2143   // (6) Finally, we check whether there are enough completed STAB
2144   // buffers available for processing. If there are, we abort.
2145   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2146   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2147     // we do need to process SATB buffers, we'll abort and restart
2148     // the marking task to do so
2149     set_has_aborted();
2150     return;
2151   }
2152 }
2153 
2154 void G1CMTask::recalculate_limits() {
2155   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2156   _words_scanned_limit      = _real_words_scanned_limit;
2157 
2158   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2159   _refs_reached_limit       = _real_refs_reached_limit;
2160 }
2161 
2162 void G1CMTask::decrease_limits() {
2163   // This is called when we believe that we're going to do an infrequent
2164   // operation which will increase the per byte scanned cost (i.e. move
2165   // entries to/from the global stack). It basically tries to decrease the
2166   // scanning limit so that the clock is called earlier.
2167 
2168   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2169   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2170 }
2171 
2172 void G1CMTask::move_entries_to_global_stack() {
2173   // Local array where we'll store the entries that will be popped
2174   // from the local queue.
2175   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2176 
2177   size_t n = 0;
2178   G1TaskQueueEntry task_entry;
2179   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2180     buffer[n] = task_entry;
2181     ++n;
2182   }
2183   if (n < G1CMMarkStack::EntriesPerChunk) {
2184     buffer[n] = G1TaskQueueEntry();
2185   }
2186 
2187   if (n > 0) {
2188     if (!_cm->mark_stack_push(buffer)) {
2189       set_has_aborted();
2190     }
2191   }
2192 
2193   // This operation was quite expensive, so decrease the limits.
2194   decrease_limits();
2195 }
2196 
2197 bool G1CMTask::get_entries_from_global_stack() {
2198   // Local array where we'll store the entries that will be popped
2199   // from the global stack.
2200   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2201 
2202   if (!_cm->mark_stack_pop(buffer)) {
2203     return false;
2204   }
2205 
2206   // We did actually pop at least one entry.
2207   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2208     G1TaskQueueEntry task_entry = buffer[i];
2209     if (task_entry.is_null()) {
2210       break;
2211     }
2212     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2213     bool success = _task_queue->push(task_entry);
2214     // We only call this when the local queue is empty or under a
2215     // given target limit. So, we do not expect this push to fail.
2216     assert(success, "invariant");
2217   }
2218 
2219   // This operation was quite expensive, so decrease the limits
2220   decrease_limits();
2221   return true;
2222 }
2223 
2224 void G1CMTask::drain_local_queue(bool partially) {
2225   if (has_aborted()) {
2226     return;
2227   }
2228 
2229   // Decide what the target size is, depending whether we're going to
2230   // drain it partially (so that other tasks can steal if they run out
2231   // of things to do) or totally (at the very end).
2232   size_t target_size;
2233   if (partially) {
2234     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2235   } else {
2236     target_size = 0;
2237   }
2238 
2239   if (_task_queue->size() > target_size) {
2240     G1TaskQueueEntry entry;
2241     bool ret = _task_queue->pop_local(entry);
2242     while (ret) {
2243       scan_task_entry(entry);
2244       if (_task_queue->size() <= target_size || has_aborted()) {
2245         ret = false;
2246       } else {
2247         ret = _task_queue->pop_local(entry);
2248       }
2249     }
2250   }
2251 }
2252 
2253 void G1CMTask::drain_global_stack(bool partially) {
2254   if (has_aborted()) {
2255     return;
2256   }
2257 
2258   // We have a policy to drain the local queue before we attempt to
2259   // drain the global stack.
2260   assert(partially || _task_queue->size() == 0, "invariant");
2261 
2262   // Decide what the target size is, depending whether we're going to
2263   // drain it partially (so that other tasks can steal if they run out
2264   // of things to do) or totally (at the very end).
2265   // Notice that when draining the global mark stack partially, due to the racyness
2266   // of the mark stack size update we might in fact drop below the target. But,
2267   // this is not a problem.
2268   // In case of total draining, we simply process until the global mark stack is
2269   // totally empty, disregarding the size counter.
2270   if (partially) {
2271     size_t const target_size = _cm->partial_mark_stack_size_target();
2272     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2273       if (get_entries_from_global_stack()) {
2274         drain_local_queue(partially);
2275       }
2276     }
2277   } else {
2278     while (!has_aborted() && get_entries_from_global_stack()) {
2279       drain_local_queue(partially);
2280     }
2281   }
2282 }
2283 
2284 // SATB Queue has several assumptions on whether to call the par or
2285 // non-par versions of the methods. this is why some of the code is
2286 // replicated. We should really get rid of the single-threaded version
2287 // of the code to simplify things.
2288 void G1CMTask::drain_satb_buffers() {
2289   if (has_aborted()) {
2290     return;
2291   }
2292 
2293   // We set this so that the regular clock knows that we're in the
2294   // middle of draining buffers and doesn't set the abort flag when it
2295   // notices that SATB buffers are available for draining. It'd be
2296   // very counter productive if it did that. :-)
2297   _draining_satb_buffers = true;
2298 
2299   G1CMSATBBufferClosure satb_cl(this, _g1h);
2300   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2301 
2302   // This keeps claiming and applying the closure to completed buffers
2303   // until we run out of buffers or we need to abort.
2304   while (!has_aborted() &&
2305          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2306     regular_clock_call();
2307   }
2308 
2309   _draining_satb_buffers = false;
2310 
2311   assert(has_aborted() ||
2312          _cm->concurrent() ||
2313          satb_mq_set.completed_buffers_num() == 0, "invariant");
2314 
2315   // again, this was a potentially expensive operation, decrease the
2316   // limits to get the regular clock call early
2317   decrease_limits();
2318 }
2319 
2320 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2321   _mark_stats_cache.reset(region_idx);
2322 }
2323 
2324 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2325   return _mark_stats_cache.evict_all();
2326 }
2327 
2328 void G1CMTask::print_stats() {
2329   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2330   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2331                        _elapsed_time_ms, _termination_time_ms);
2332   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2333                        _step_times_ms.num(),
2334                        _step_times_ms.avg(),
2335                        _step_times_ms.sd(),
2336                        _step_times_ms.maximum(),
2337                        _step_times_ms.sum());
2338   size_t const hits = _mark_stats_cache.hits();
2339   size_t const misses = _mark_stats_cache.misses();
2340   log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2341                        hits, misses, percent_of(hits, hits + misses));
2342 }
2343 
2344 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2345   return _task_queues->steal(worker_id, hash_seed, task_entry);
2346 }
2347 
2348 /*****************************************************************************
2349 
2350     The do_marking_step(time_target_ms, ...) method is the building
2351     block of the parallel marking framework. It can be called in parallel
2352     with other invocations of do_marking_step() on different tasks
2353     (but only one per task, obviously) and concurrently with the
2354     mutator threads, or during remark, hence it eliminates the need
2355     for two versions of the code. When called during remark, it will
2356     pick up from where the task left off during the concurrent marking
2357     phase. Interestingly, tasks are also claimable during evacuation
2358     pauses too, since do_marking_step() ensures that it aborts before
2359     it needs to yield.
2360 
2361     The data structures that it uses to do marking work are the
2362     following:
2363 
2364       (1) Marking Bitmap. If there are gray objects that appear only
2365       on the bitmap (this happens either when dealing with an overflow
2366       or when the initial marking phase has simply marked the roots
2367       and didn't push them on the stack), then tasks claim heap
2368       regions whose bitmap they then scan to find gray objects. A
2369       global finger indicates where the end of the last claimed region
2370       is. A local finger indicates how far into the region a task has
2371       scanned. The two fingers are used to determine how to gray an
2372       object (i.e. whether simply marking it is OK, as it will be
2373       visited by a task in the future, or whether it needs to be also
2374       pushed on a stack).
2375 
2376       (2) Local Queue. The local queue of the task which is accessed
2377       reasonably efficiently by the task. Other tasks can steal from
2378       it when they run out of work. Throughout the marking phase, a
2379       task attempts to keep its local queue short but not totally
2380       empty, so that entries are available for stealing by other
2381       tasks. Only when there is no more work, a task will totally
2382       drain its local queue.
2383 
2384       (3) Global Mark Stack. This handles local queue overflow. During
2385       marking only sets of entries are moved between it and the local
2386       queues, as access to it requires a mutex and more fine-grain
2387       interaction with it which might cause contention. If it
2388       overflows, then the marking phase should restart and iterate
2389       over the bitmap to identify gray objects. Throughout the marking
2390       phase, tasks attempt to keep the global mark stack at a small
2391       length but not totally empty, so that entries are available for
2392       popping by other tasks. Only when there is no more work, tasks
2393       will totally drain the global mark stack.
2394 
2395       (4) SATB Buffer Queue. This is where completed SATB buffers are
2396       made available. Buffers are regularly removed from this queue
2397       and scanned for roots, so that the queue doesn't get too
2398       long. During remark, all completed buffers are processed, as
2399       well as the filled in parts of any uncompleted buffers.
2400 
2401     The do_marking_step() method tries to abort when the time target
2402     has been reached. There are a few other cases when the
2403     do_marking_step() method also aborts:
2404 
2405       (1) When the marking phase has been aborted (after a Full GC).
2406 
2407       (2) When a global overflow (on the global stack) has been
2408       triggered. Before the task aborts, it will actually sync up with
2409       the other tasks to ensure that all the marking data structures
2410       (local queues, stacks, fingers etc.)  are re-initialized so that
2411       when do_marking_step() completes, the marking phase can
2412       immediately restart.
2413 
2414       (3) When enough completed SATB buffers are available. The
2415       do_marking_step() method only tries to drain SATB buffers right
2416       at the beginning. So, if enough buffers are available, the
2417       marking step aborts and the SATB buffers are processed at
2418       the beginning of the next invocation.
2419 
2420       (4) To yield. when we have to yield then we abort and yield
2421       right at the end of do_marking_step(). This saves us from a lot
2422       of hassle as, by yielding we might allow a Full GC. If this
2423       happens then objects will be compacted underneath our feet, the
2424       heap might shrink, etc. We save checking for this by just
2425       aborting and doing the yield right at the end.
2426 
2427     From the above it follows that the do_marking_step() method should
2428     be called in a loop (or, otherwise, regularly) until it completes.
2429 
2430     If a marking step completes without its has_aborted() flag being
2431     true, it means it has completed the current marking phase (and
2432     also all other marking tasks have done so and have all synced up).
2433 
2434     A method called regular_clock_call() is invoked "regularly" (in
2435     sub ms intervals) throughout marking. It is this clock method that
2436     checks all the abort conditions which were mentioned above and
2437     decides when the task should abort. A work-based scheme is used to
2438     trigger this clock method: when the number of object words the
2439     marking phase has scanned or the number of references the marking
2440     phase has visited reach a given limit. Additional invocations to
2441     the method clock have been planted in a few other strategic places
2442     too. The initial reason for the clock method was to avoid calling
2443     vtime too regularly, as it is quite expensive. So, once it was in
2444     place, it was natural to piggy-back all the other conditions on it
2445     too and not constantly check them throughout the code.
2446 
2447     If do_termination is true then do_marking_step will enter its
2448     termination protocol.
2449 
2450     The value of is_serial must be true when do_marking_step is being
2451     called serially (i.e. by the VMThread) and do_marking_step should
2452     skip any synchronization in the termination and overflow code.
2453     Examples include the serial remark code and the serial reference
2454     processing closures.
2455 
2456     The value of is_serial must be false when do_marking_step is
2457     being called by any of the worker threads in a work gang.
2458     Examples include the concurrent marking code (CMMarkingTask),
2459     the MT remark code, and the MT reference processing closures.
2460 
2461  *****************************************************************************/
2462 
2463 void G1CMTask::do_marking_step(double time_target_ms,
2464                                bool do_termination,
2465                                bool is_serial) {
2466   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2467 
2468   _start_time_ms = os::elapsedVTime() * 1000.0;
2469 
2470   // If do_stealing is true then do_marking_step will attempt to
2471   // steal work from the other G1CMTasks. It only makes sense to
2472   // enable stealing when the termination protocol is enabled
2473   // and do_marking_step() is not being called serially.
2474   bool do_stealing = do_termination && !is_serial;
2475 
2476   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2477   _time_target_ms = time_target_ms - diff_prediction_ms;
2478 
2479   // set up the variables that are used in the work-based scheme to
2480   // call the regular clock method
2481   _words_scanned = 0;
2482   _refs_reached  = 0;
2483   recalculate_limits();
2484 
2485   // clear all flags
2486   clear_has_aborted();
2487   _has_timed_out = false;
2488   _draining_satb_buffers = false;
2489 
2490   ++_calls;
2491 
2492   // Set up the bitmap and oop closures. Anything that uses them is
2493   // eventually called from this method, so it is OK to allocate these
2494   // statically.
2495   G1CMBitMapClosure bitmap_closure(this, _cm);
2496   G1CMOopClosure cm_oop_closure(_g1h, this);
2497   set_cm_oop_closure(&cm_oop_closure);
2498 
2499   if (_cm->has_overflown()) {
2500     // This can happen if the mark stack overflows during a GC pause
2501     // and this task, after a yield point, restarts. We have to abort
2502     // as we need to get into the overflow protocol which happens
2503     // right at the end of this task.
2504     set_has_aborted();
2505   }
2506 
2507   // First drain any available SATB buffers. After this, we will not
2508   // look at SATB buffers before the next invocation of this method.
2509   // If enough completed SATB buffers are queued up, the regular clock
2510   // will abort this task so that it restarts.
2511   drain_satb_buffers();
2512   // ...then partially drain the local queue and the global stack
2513   drain_local_queue(true);
2514   drain_global_stack(true);
2515 
2516   do {
2517     if (!has_aborted() && _curr_region != NULL) {
2518       // This means that we're already holding on to a region.
2519       assert(_finger != NULL, "if region is not NULL, then the finger "
2520              "should not be NULL either");
2521 
2522       // We might have restarted this task after an evacuation pause
2523       // which might have evacuated the region we're holding on to
2524       // underneath our feet. Let's read its limit again to make sure
2525       // that we do not iterate over a region of the heap that
2526       // contains garbage (update_region_limit() will also move
2527       // _finger to the start of the region if it is found empty).
2528       update_region_limit();
2529       // We will start from _finger not from the start of the region,
2530       // as we might be restarting this task after aborting half-way
2531       // through scanning this region. In this case, _finger points to
2532       // the address where we last found a marked object. If this is a
2533       // fresh region, _finger points to start().
2534       MemRegion mr = MemRegion(_finger, _region_limit);
2535 
2536       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2537              "humongous regions should go around loop once only");
2538 
2539       // Some special cases:
2540       // If the memory region is empty, we can just give up the region.
2541       // If the current region is humongous then we only need to check
2542       // the bitmap for the bit associated with the start of the object,
2543       // scan the object if it's live, and give up the region.
2544       // Otherwise, let's iterate over the bitmap of the part of the region
2545       // that is left.
2546       // If the iteration is successful, give up the region.
2547       if (mr.is_empty()) {
2548         giveup_current_region();
2549         regular_clock_call();
2550       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2551         if (_next_mark_bitmap->is_marked(mr.start())) {
2552           // The object is marked - apply the closure
2553           bitmap_closure.do_addr(mr.start());
2554         }
2555         // Even if this task aborted while scanning the humongous object
2556         // we can (and should) give up the current region.
2557         giveup_current_region();
2558         regular_clock_call();
2559       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2560         giveup_current_region();
2561         regular_clock_call();
2562       } else {
2563         assert(has_aborted(), "currently the only way to do so");
2564         // The only way to abort the bitmap iteration is to return
2565         // false from the do_bit() method. However, inside the
2566         // do_bit() method we move the _finger to point to the
2567         // object currently being looked at. So, if we bail out, we
2568         // have definitely set _finger to something non-null.
2569         assert(_finger != NULL, "invariant");
2570 
2571         // Region iteration was actually aborted. So now _finger
2572         // points to the address of the object we last scanned. If we
2573         // leave it there, when we restart this task, we will rescan
2574         // the object. It is easy to avoid this. We move the finger by
2575         // enough to point to the next possible object header.
2576         assert(_finger < _region_limit, "invariant");
2577         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2578         // Check if bitmap iteration was aborted while scanning the last object
2579         if (new_finger >= _region_limit) {
2580           giveup_current_region();
2581         } else {
2582           move_finger_to(new_finger);
2583         }
2584       }
2585     }
2586     // At this point we have either completed iterating over the
2587     // region we were holding on to, or we have aborted.
2588 
2589     // We then partially drain the local queue and the global stack.
2590     // (Do we really need this?)
2591     drain_local_queue(true);
2592     drain_global_stack(true);
2593 
2594     // Read the note on the claim_region() method on why it might
2595     // return NULL with potentially more regions available for
2596     // claiming and why we have to check out_of_regions() to determine
2597     // whether we're done or not.
2598     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2599       // We are going to try to claim a new region. We should have
2600       // given up on the previous one.
2601       // Separated the asserts so that we know which one fires.
2602       assert(_curr_region  == NULL, "invariant");
2603       assert(_finger       == NULL, "invariant");
2604       assert(_region_limit == NULL, "invariant");
2605       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2606       if (claimed_region != NULL) {
2607         // Yes, we managed to claim one
2608         setup_for_region(claimed_region);
2609         assert(_curr_region == claimed_region, "invariant");
2610       }
2611       // It is important to call the regular clock here. It might take
2612       // a while to claim a region if, for example, we hit a large
2613       // block of empty regions. So we need to call the regular clock
2614       // method once round the loop to make sure it's called
2615       // frequently enough.
2616       regular_clock_call();
2617     }
2618 
2619     if (!has_aborted() && _curr_region == NULL) {
2620       assert(_cm->out_of_regions(),
2621              "at this point we should be out of regions");
2622     }
2623   } while ( _curr_region != NULL && !has_aborted());
2624 
2625   if (!has_aborted()) {
2626     // We cannot check whether the global stack is empty, since other
2627     // tasks might be pushing objects to it concurrently.
2628     assert(_cm->out_of_regions(),
2629            "at this point we should be out of regions");
2630     // Try to reduce the number of available SATB buffers so that
2631     // remark has less work to do.
2632     drain_satb_buffers();
2633   }
2634 
2635   // Since we've done everything else, we can now totally drain the
2636   // local queue and global stack.
2637   drain_local_queue(false);
2638   drain_global_stack(false);
2639 
2640   // Attempt at work stealing from other task's queues.
2641   if (do_stealing && !has_aborted()) {
2642     // We have not aborted. This means that we have finished all that
2643     // we could. Let's try to do some stealing...
2644 
2645     // We cannot check whether the global stack is empty, since other
2646     // tasks might be pushing objects to it concurrently.
2647     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2648            "only way to reach here");
2649     while (!has_aborted()) {
2650       G1TaskQueueEntry entry;
2651       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2652         scan_task_entry(entry);
2653 
2654         // And since we're towards the end, let's totally drain the
2655         // local queue and global stack.
2656         drain_local_queue(false);
2657         drain_global_stack(false);
2658       } else {
2659         break;
2660       }
2661     }
2662   }
2663 
2664   // We still haven't aborted. Now, let's try to get into the
2665   // termination protocol.
2666   if (do_termination && !has_aborted()) {
2667     // We cannot check whether the global stack is empty, since other
2668     // tasks might be concurrently pushing objects on it.
2669     // Separated the asserts so that we know which one fires.
2670     assert(_cm->out_of_regions(), "only way to reach here");
2671     assert(_task_queue->size() == 0, "only way to reach here");
2672     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2673 
2674     // The G1CMTask class also extends the TerminatorTerminator class,
2675     // hence its should_exit_termination() method will also decide
2676     // whether to exit the termination protocol or not.
2677     bool finished = (is_serial ||
2678                      _cm->terminator()->offer_termination(this));
2679     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2680     _termination_time_ms +=
2681       termination_end_time_ms - _termination_start_time_ms;
2682 
2683     if (finished) {
2684       // We're all done.
2685 
2686       // We can now guarantee that the global stack is empty, since
2687       // all other tasks have finished. We separated the guarantees so
2688       // that, if a condition is false, we can immediately find out
2689       // which one.
2690       guarantee(_cm->out_of_regions(), "only way to reach here");
2691       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2692       guarantee(_task_queue->size() == 0, "only way to reach here");
2693       guarantee(!_cm->has_overflown(), "only way to reach here");
2694     } else {
2695       // Apparently there's more work to do. Let's abort this task. It
2696       // will restart it and we can hopefully find more things to do.
2697       set_has_aborted();
2698     }
2699   }
2700 
2701   // Mainly for debugging purposes to make sure that a pointer to the
2702   // closure which was statically allocated in this frame doesn't
2703   // escape it by accident.
2704   set_cm_oop_closure(NULL);
2705   double end_time_ms = os::elapsedVTime() * 1000.0;
2706   double elapsed_time_ms = end_time_ms - _start_time_ms;
2707   // Update the step history.
2708   _step_times_ms.add(elapsed_time_ms);
2709 
2710   if (has_aborted()) {
2711     // The task was aborted for some reason.
2712     if (_has_timed_out) {
2713       double diff_ms = elapsed_time_ms - _time_target_ms;
2714       // Keep statistics of how well we did with respect to hitting
2715       // our target only if we actually timed out (if we aborted for
2716       // other reasons, then the results might get skewed).
2717       _marking_step_diffs_ms.add(diff_ms);
2718     }
2719 
2720     if (_cm->has_overflown()) {
2721       // This is the interesting one. We aborted because a global
2722       // overflow was raised. This means we have to restart the
2723       // marking phase and start iterating over regions. However, in
2724       // order to do this we have to make sure that all tasks stop
2725       // what they are doing and re-initialize in a safe manner. We
2726       // will achieve this with the use of two barrier sync points.
2727 
2728       if (!is_serial) {
2729         // We only need to enter the sync barrier if being called
2730         // from a parallel context
2731         _cm->enter_first_sync_barrier(_worker_id);
2732 
2733         // When we exit this sync barrier we know that all tasks have
2734         // stopped doing marking work. So, it's now safe to
2735         // re-initialize our data structures.
2736       }
2737 
2738       clear_region_fields();
2739       flush_mark_stats_cache();
2740 
2741       if (!is_serial) {
2742         // If we're executing the concurrent phase of marking, reset the marking
2743         // state; otherwise the marking state is reset after reference processing,
2744         // during the remark pause.
2745         // If we reset here as a result of an overflow during the remark we will
2746         // see assertion failures from any subsequent set_concurrency_and_phase()
2747         // calls.
2748         if (_cm->concurrent() && _worker_id == 0) {
2749           // Worker 0 is responsible for clearing the global data structures because
2750           // of an overflow. During STW we should not clear the overflow flag (in
2751           // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2752           // method to abort the pause and restart concurrent marking.
2753           _cm->reset_marking_for_restart();
2754 
2755           log_info(gc, marking)("Concurrent Mark reset for overflow");
2756         }
2757 
2758         // ...and enter the second barrier.
2759         _cm->enter_second_sync_barrier(_worker_id);
2760       }
2761       // At this point, if we're during the concurrent phase of
2762       // marking, everything has been re-initialized and we're
2763       // ready to restart.
2764     }
2765   }
2766 }
2767 
2768 G1CMTask::G1CMTask(uint worker_id,
2769                    G1ConcurrentMark* cm,
2770                    G1CMTaskQueue* task_queue,
2771                    G1RegionMarkStats* mark_stats,
2772                    uint max_regions) :
2773   _objArray_processor(this),
2774   _worker_id(worker_id),
2775   _g1h(G1CollectedHeap::heap()),
2776   _cm(cm),
2777   _next_mark_bitmap(NULL),
2778   _task_queue(task_queue),
2779   _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2780   _calls(0),
2781   _time_target_ms(0.0),
2782   _start_time_ms(0.0),
2783   _cm_oop_closure(NULL),
2784   _curr_region(NULL),
2785   _finger(NULL),
2786   _region_limit(NULL),
2787   _words_scanned(0),
2788   _words_scanned_limit(0),
2789   _real_words_scanned_limit(0),
2790   _refs_reached(0),
2791   _refs_reached_limit(0),
2792   _real_refs_reached_limit(0),
2793   _hash_seed(17),
2794   _has_aborted(false),
2795   _has_timed_out(false),
2796   _draining_satb_buffers(false),
2797   _step_times_ms(),
2798   _elapsed_time_ms(0.0),
2799   _termination_time_ms(0.0),
2800   _termination_start_time_ms(0.0),
2801   _marking_step_diffs_ms()
2802 {
2803   guarantee(task_queue != NULL, "invariant");
2804 
2805   _marking_step_diffs_ms.add(0.5);
2806 }
2807 
2808 // These are formatting macros that are used below to ensure
2809 // consistent formatting. The *_H_* versions are used to format the
2810 // header for a particular value and they should be kept consistent
2811 // with the corresponding macro. Also note that most of the macros add
2812 // the necessary white space (as a prefix) which makes them a bit
2813 // easier to compose.
2814 
2815 // All the output lines are prefixed with this string to be able to
2816 // identify them easily in a large log file.
2817 #define G1PPRL_LINE_PREFIX            "###"
2818 
2819 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2820 #ifdef _LP64
2821 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2822 #else // _LP64
2823 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2824 #endif // _LP64
2825 
2826 // For per-region info
2827 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2828 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2829 #define G1PPRL_STATE_FORMAT           "   %-5s"
2830 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2831 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2832 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2833 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2834 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2835 
2836 // For summary info
2837 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2838 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2839 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2840 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2841 
2842 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2843   _total_used_bytes(0), _total_capacity_bytes(0),
2844   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2845   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2846 {
2847   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2848   MemRegion g1_reserved = g1h->g1_reserved();
2849   double now = os::elapsedTime();
2850 
2851   // Print the header of the output.
2852   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2853   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2854                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2855                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2856                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2857                           HeapRegion::GrainBytes);
2858   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2859   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2860                           G1PPRL_TYPE_H_FORMAT
2861                           G1PPRL_ADDR_BASE_H_FORMAT
2862                           G1PPRL_BYTE_H_FORMAT
2863                           G1PPRL_BYTE_H_FORMAT
2864                           G1PPRL_BYTE_H_FORMAT
2865                           G1PPRL_DOUBLE_H_FORMAT
2866                           G1PPRL_BYTE_H_FORMAT
2867                           G1PPRL_STATE_H_FORMAT
2868                           G1PPRL_BYTE_H_FORMAT,
2869                           "type", "address-range",
2870                           "used", "prev-live", "next-live", "gc-eff",
2871                           "remset", "state", "code-roots");
2872   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2873                           G1PPRL_TYPE_H_FORMAT
2874                           G1PPRL_ADDR_BASE_H_FORMAT
2875                           G1PPRL_BYTE_H_FORMAT
2876                           G1PPRL_BYTE_H_FORMAT
2877                           G1PPRL_BYTE_H_FORMAT
2878                           G1PPRL_DOUBLE_H_FORMAT
2879                           G1PPRL_BYTE_H_FORMAT
2880                           G1PPRL_STATE_H_FORMAT
2881                           G1PPRL_BYTE_H_FORMAT,
2882                           "", "",
2883                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2884                           "(bytes)", "", "(bytes)");
2885 }
2886 
2887 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2888   const char* type       = r->get_type_str();
2889   HeapWord* bottom       = r->bottom();
2890   HeapWord* end          = r->end();
2891   size_t capacity_bytes  = r->capacity();
2892   size_t used_bytes      = r->used();
2893   size_t prev_live_bytes = r->live_bytes();
2894   size_t next_live_bytes = r->next_live_bytes();
2895   double gc_eff          = r->gc_efficiency();
2896   size_t remset_bytes    = r->rem_set()->mem_size();
2897   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2898   const char* remset_type = r->rem_set()->get_short_state_str();
2899 
2900   _total_used_bytes      += used_bytes;
2901   _total_capacity_bytes  += capacity_bytes;
2902   _total_prev_live_bytes += prev_live_bytes;
2903   _total_next_live_bytes += next_live_bytes;
2904   _total_remset_bytes    += remset_bytes;
2905   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2906 
2907   // Print a line for this particular region.
2908   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2909                           G1PPRL_TYPE_FORMAT
2910                           G1PPRL_ADDR_BASE_FORMAT
2911                           G1PPRL_BYTE_FORMAT
2912                           G1PPRL_BYTE_FORMAT
2913                           G1PPRL_BYTE_FORMAT
2914                           G1PPRL_DOUBLE_FORMAT
2915                           G1PPRL_BYTE_FORMAT
2916                           G1PPRL_STATE_FORMAT
2917                           G1PPRL_BYTE_FORMAT,
2918                           type, p2i(bottom), p2i(end),
2919                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2920                           remset_bytes, remset_type, strong_code_roots_bytes);
2921 
2922   return false;
2923 }
2924 
2925 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2926   // add static memory usages to remembered set sizes
2927   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2928   // Print the footer of the output.
2929   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2930   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2931                          " SUMMARY"
2932                          G1PPRL_SUM_MB_FORMAT("capacity")
2933                          G1PPRL_SUM_MB_PERC_FORMAT("used")
2934                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
2935                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
2936                          G1PPRL_SUM_MB_FORMAT("remset")
2937                          G1PPRL_SUM_MB_FORMAT("code-roots"),
2938                          bytes_to_mb(_total_capacity_bytes),
2939                          bytes_to_mb(_total_used_bytes),
2940                          percent_of(_total_used_bytes, _total_capacity_bytes),
2941                          bytes_to_mb(_total_prev_live_bytes),
2942                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
2943                          bytes_to_mb(_total_next_live_bytes),
2944                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
2945                          bytes_to_mb(_total_remset_bytes),
2946                          bytes_to_mb(_total_strong_code_roots_bytes));
2947 }