1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/g1BarrierSet.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  34 #include "gc/g1/g1HeapVerifier.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionRemSet.hpp"
  41 #include "gc/g1/heapRegionSet.inline.hpp"
  42 #include "gc/shared/adaptiveSizePolicy.hpp"
  43 #include "gc/shared/gcId.hpp"
  44 #include "gc/shared/gcTimer.hpp"
  45 #include "gc/shared/gcTrace.hpp"
  46 #include "gc/shared/gcTraceTime.inline.hpp"
  47 #include "gc/shared/genOopClosures.inline.hpp"
  48 #include "gc/shared/referencePolicy.hpp"
  49 #include "gc/shared/strongRootsScope.hpp"
  50 #include "gc/shared/suspendibleThreadSet.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "gc/shared/weakProcessor.hpp"
  54 #include "include/jvm.h"
  55 #include "logging/log.hpp"
  56 #include "memory/allocation.hpp"
  57 #include "memory/resourceArea.hpp"
  58 #include "oops/access.inline.hpp"
  59 #include "oops/oop.inline.hpp"
  60 #include "runtime/atomic.hpp"
  61 #include "runtime/handles.inline.hpp"
  62 #include "runtime/java.hpp"
  63 #include "runtime/prefetch.inline.hpp"
  64 #include "services/memTracker.hpp"
  65 #include "utilities/align.hpp"
  66 #include "utilities/growableArray.hpp"
  67 
  68 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  69   assert(addr < _cm->finger(), "invariant");
  70   assert(addr >= _task->finger(), "invariant");
  71 
  72   // We move that task's local finger along.
  73   _task->move_finger_to(addr);
  74 
  75   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  76   // we only partially drain the local queue and global stack
  77   _task->drain_local_queue(true);
  78   _task->drain_global_stack(true);
  79 
  80   // if the has_aborted flag has been raised, we need to bail out of
  81   // the iteration
  82   return !_task->has_aborted();
  83 }
  84 
  85 G1CMMarkStack::G1CMMarkStack() :
  86   _max_chunk_capacity(0),
  87   _base(NULL),
  88   _chunk_capacity(0) {
  89   set_empty();
  90 }
  91 
  92 bool G1CMMarkStack::resize(size_t new_capacity) {
  93   assert(is_empty(), "Only resize when stack is empty.");
  94   assert(new_capacity <= _max_chunk_capacity,
  95          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  96 
  97   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  98 
  99   if (new_base == NULL) {
 100     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 101     return false;
 102   }
 103   // Release old mapping.
 104   if (_base != NULL) {
 105     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 106   }
 107 
 108   _base = new_base;
 109   _chunk_capacity = new_capacity;
 110   set_empty();
 111 
 112   return true;
 113 }
 114 
 115 size_t G1CMMarkStack::capacity_alignment() {
 116   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 117 }
 118 
 119 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 120   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 121 
 122   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 123 
 124   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 125   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 126 
 127   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 128             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 129             _max_chunk_capacity,
 130             initial_chunk_capacity);
 131 
 132   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 133                 initial_chunk_capacity, _max_chunk_capacity);
 134 
 135   return resize(initial_chunk_capacity);
 136 }
 137 
 138 void G1CMMarkStack::expand() {
 139   if (_chunk_capacity == _max_chunk_capacity) {
 140     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 141     return;
 142   }
 143   size_t old_capacity = _chunk_capacity;
 144   // Double capacity if possible
 145   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 146 
 147   if (resize(new_capacity)) {
 148     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 149                   old_capacity, new_capacity);
 150   } else {
 151     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 152                     old_capacity, new_capacity);
 153   }
 154 }
 155 
 156 G1CMMarkStack::~G1CMMarkStack() {
 157   if (_base != NULL) {
 158     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 159   }
 160 }
 161 
 162 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 163   elem->next = *list;
 164   *list = elem;
 165 }
 166 
 167 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 168   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 169   add_chunk_to_list(&_chunk_list, elem);
 170   _chunks_in_chunk_list++;
 171 }
 172 
 173 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 174   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 175   add_chunk_to_list(&_free_list, elem);
 176 }
 177 
 178 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 179   TaskQueueEntryChunk* result = *list;
 180   if (result != NULL) {
 181     *list = (*list)->next;
 182   }
 183   return result;
 184 }
 185 
 186 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 187   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 188   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 189   if (result != NULL) {
 190     _chunks_in_chunk_list--;
 191   }
 192   return result;
 193 }
 194 
 195 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 196   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 197   return remove_chunk_from_list(&_free_list);
 198 }
 199 
 200 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 201   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 202   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 203   // wraparound of _hwm.
 204   if (_hwm >= _chunk_capacity) {
 205     return NULL;
 206   }
 207 
 208   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 209   if (cur_idx >= _chunk_capacity) {
 210     return NULL;
 211   }
 212 
 213   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 214   result->next = NULL;
 215   return result;
 216 }
 217 
 218 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 219   // Get a new chunk.
 220   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 221 
 222   if (new_chunk == NULL) {
 223     // Did not get a chunk from the free list. Allocate from backing memory.
 224     new_chunk = allocate_new_chunk();
 225 
 226     if (new_chunk == NULL) {
 227       return false;
 228     }
 229   }
 230 
 231   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 232 
 233   add_chunk_to_chunk_list(new_chunk);
 234 
 235   return true;
 236 }
 237 
 238 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 239   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 240 
 241   if (cur == NULL) {
 242     return false;
 243   }
 244 
 245   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 246 
 247   add_chunk_to_free_list(cur);
 248   return true;
 249 }
 250 
 251 void G1CMMarkStack::set_empty() {
 252   _chunks_in_chunk_list = 0;
 253   _hwm = 0;
 254   _chunk_list = NULL;
 255   _free_list = NULL;
 256 }
 257 
 258 G1CMRootRegions::G1CMRootRegions() :
 259   _survivors(NULL), _cm(NULL), _scan_in_progress(false),
 260   _should_abort(false), _claimed_survivor_index(0) { }
 261 
 262 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 263   _survivors = survivors;
 264   _cm = cm;
 265 }
 266 
 267 void G1CMRootRegions::prepare_for_scan() {
 268   assert(!scan_in_progress(), "pre-condition");
 269 
 270   // Currently, only survivors can be root regions.
 271   _claimed_survivor_index = 0;
 272   _scan_in_progress = _survivors->regions()->is_nonempty();
 273   _should_abort = false;
 274 }
 275 
 276 HeapRegion* G1CMRootRegions::claim_next() {
 277   if (_should_abort) {
 278     // If someone has set the should_abort flag, we return NULL to
 279     // force the caller to bail out of their loop.
 280     return NULL;
 281   }
 282 
 283   // Currently, only survivors can be root regions.
 284   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 285 
 286   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 287   if (claimed_index < survivor_regions->length()) {
 288     return survivor_regions->at(claimed_index);
 289   }
 290   return NULL;
 291 }
 292 
 293 uint G1CMRootRegions::num_root_regions() const {
 294   return (uint)_survivors->regions()->length();
 295 }
 296 
 297 void G1CMRootRegions::notify_scan_done() {
 298   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 299   _scan_in_progress = false;
 300   RootRegionScan_lock->notify_all();
 301 }
 302 
 303 void G1CMRootRegions::cancel_scan() {
 304   notify_scan_done();
 305 }
 306 
 307 void G1CMRootRegions::scan_finished() {
 308   assert(scan_in_progress(), "pre-condition");
 309 
 310   // Currently, only survivors can be root regions.
 311   if (!_should_abort) {
 312     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 313     assert((uint)_claimed_survivor_index >= _survivors->length(),
 314            "we should have claimed all survivors, claimed index = %u, length = %u",
 315            (uint)_claimed_survivor_index, _survivors->length());
 316   }
 317 
 318   notify_scan_done();
 319 }
 320 
 321 bool G1CMRootRegions::wait_until_scan_finished() {
 322   if (!scan_in_progress()) {
 323     return false;
 324   }
 325 
 326   {
 327     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 328     while (scan_in_progress()) {
 329       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 330     }
 331   }
 332   return true;
 333 }
 334 
 335 // Returns the maximum number of workers to be used in a concurrent
 336 // phase based on the number of GC workers being used in a STW
 337 // phase.
 338 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 339   return MAX2((num_gc_workers + 2) / 4, 1U);
 340 }
 341 
 342 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 343                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 344                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 345   // _cm_thread set inside the constructor
 346   _g1h(g1h),
 347   _completed_initialization(false),
 348 
 349   _mark_bitmap_1(),
 350   _mark_bitmap_2(),
 351   _prev_mark_bitmap(&_mark_bitmap_1),
 352   _next_mark_bitmap(&_mark_bitmap_2),
 353 
 354   _heap(_g1h->reserved_region()),
 355 
 356   _root_regions(),
 357 
 358   _global_mark_stack(),
 359 
 360   // _finger set in set_non_marking_state
 361 
 362   _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 363   _max_num_tasks(ParallelGCThreads),
 364   // _num_active_tasks set in set_non_marking_state()
 365   // _tasks set inside the constructor
 366 
 367   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 368   _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
 369 
 370   _first_overflow_barrier_sync(),
 371   _second_overflow_barrier_sync(),
 372 
 373   _has_overflown(false),
 374   _concurrent(false),
 375   _has_aborted(false),
 376   _restart_for_overflow(false),
 377   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 378   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 379 
 380   // _verbose_level set below
 381 
 382   _init_times(),
 383   _remark_times(),
 384   _remark_mark_times(),
 385   _remark_weak_ref_times(),
 386   _cleanup_times(),
 387   _total_cleanup_time(0.0),
 388 
 389   _accum_task_vtime(NULL),
 390 
 391   _concurrent_workers(NULL),
 392   _num_concurrent_workers(0),
 393   _max_concurrent_workers(0),
 394 
 395   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 396   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 397 {
 398   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 399   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 400 
 401   // Create & start ConcurrentMark thread.
 402   _cm_thread = new G1ConcurrentMarkThread(this);
 403   if (_cm_thread->osthread() == NULL) {
 404     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 405   }
 406 
 407   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 408 
 409   SATBMarkQueueSet& satb_qs = G1BarrierSet::satb_mark_queue_set();
 410   satb_qs.set_buffer_size(G1SATBBufferSize);
 411 
 412   _root_regions.init(_g1h->survivor(), this);
 413 
 414   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 415     // Calculate the number of concurrent worker threads by scaling
 416     // the number of parallel GC threads.
 417     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 418     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 419   }
 420 
 421   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 422   if (ConcGCThreads > ParallelGCThreads) {
 423     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 424                     ConcGCThreads, ParallelGCThreads);
 425     return;
 426   }
 427 
 428   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 429   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 430 
 431   _num_concurrent_workers = ConcGCThreads;
 432   _max_concurrent_workers = _num_concurrent_workers;
 433 
 434   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 435   _concurrent_workers->initialize_workers();
 436 
 437   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 438     size_t mark_stack_size =
 439       MIN2(MarkStackSizeMax,
 440           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 441     // Verify that the calculated value for MarkStackSize is in range.
 442     // It would be nice to use the private utility routine from Arguments.
 443     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 444       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 445                       "must be between 1 and " SIZE_FORMAT,
 446                       mark_stack_size, MarkStackSizeMax);
 447       return;
 448     }
 449     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 450   } else {
 451     // Verify MarkStackSize is in range.
 452     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 453       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 454         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 455           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 456                           "must be between 1 and " SIZE_FORMAT,
 457                           MarkStackSize, MarkStackSizeMax);
 458           return;
 459         }
 460       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 461         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 462           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 463                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 464                           MarkStackSize, MarkStackSizeMax);
 465           return;
 466         }
 467       }
 468     }
 469   }
 470 
 471   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 472     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 473   }
 474 
 475   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 476   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 477 
 478   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 479   _num_active_tasks = _max_num_tasks;
 480 
 481   for (uint i = 0; i < _max_num_tasks; ++i) {
 482     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 483     task_queue->initialize();
 484     _task_queues->register_queue(i, task_queue);
 485 
 486     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 487 
 488     _accum_task_vtime[i] = 0.0;
 489   }
 490 
 491   reset_at_marking_complete();
 492   _completed_initialization = true;
 493 }
 494 
 495 void G1ConcurrentMark::reset() {
 496   _has_aborted = false;
 497 
 498   reset_marking_for_restart();
 499 
 500   // Reset all tasks, since different phases will use different number of active
 501   // threads. So, it's easiest to have all of them ready.
 502   for (uint i = 0; i < _max_num_tasks; ++i) {
 503     _tasks[i]->reset(_next_mark_bitmap);
 504   }
 505 
 506   uint max_regions = _g1h->max_regions();
 507   for (uint i = 0; i < max_regions; i++) {
 508     _top_at_rebuild_starts[i] = NULL;
 509     _region_mark_stats[i].clear();
 510   }
 511 }
 512 
 513 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 514   for (uint j = 0; j < _max_num_tasks; ++j) {
 515     _tasks[j]->clear_mark_stats_cache(region_idx);
 516   }
 517   _top_at_rebuild_starts[region_idx] = NULL;
 518   _region_mark_stats[region_idx].clear();
 519 }
 520 
 521 void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 522   uint const region_idx = r->hrm_index();
 523   if (r->is_humongous()) {
 524     assert(r->is_starts_humongous(), "Got humongous continues region here");
 525     uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 526     for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 527       clear_statistics_in_region(j);
 528     }
 529   } else {
 530     clear_statistics_in_region(region_idx);
 531   }
 532 }
 533 
 534 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 535   assert_at_safepoint_on_vm_thread();
 536 
 537   // Need to clear mark bit of the humongous object.
 538   if (_next_mark_bitmap->is_marked(r->bottom())) {
 539     _next_mark_bitmap->clear(r->bottom());
 540   }
 541 
 542   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 543     return;
 544   }
 545 
 546   // Clear any statistics about the region gathered so far.
 547   clear_statistics(r);
 548 }
 549 
 550 void G1ConcurrentMark::reset_marking_for_restart() {
 551   _global_mark_stack.set_empty();
 552 
 553   // Expand the marking stack, if we have to and if we can.
 554   if (has_overflown()) {
 555     _global_mark_stack.expand();
 556 
 557     uint max_regions = _g1h->max_regions();
 558     for (uint i = 0; i < max_regions; i++) {
 559       _region_mark_stats[i].clear_during_overflow();
 560     }
 561   }
 562 
 563   clear_has_overflown();
 564   _finger = _heap.start();
 565 
 566   for (uint i = 0; i < _max_num_tasks; ++i) {
 567     G1CMTaskQueue* queue = _task_queues->queue(i);
 568     queue->set_empty();
 569   }
 570 }
 571 
 572 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 573   assert(active_tasks <= _max_num_tasks, "we should not have more");
 574 
 575   _num_active_tasks = active_tasks;
 576   // Need to update the three data structures below according to the
 577   // number of active threads for this phase.
 578   _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
 579   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 580   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 581 }
 582 
 583 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 584   set_concurrency(active_tasks);
 585 
 586   _concurrent = concurrent;
 587 
 588   if (!concurrent) {
 589     // At this point we should be in a STW phase, and completed marking.
 590     assert_at_safepoint_on_vm_thread();
 591     assert(out_of_regions(),
 592            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 593            p2i(_finger), p2i(_heap.end()));
 594   }
 595 }
 596 
 597 void G1ConcurrentMark::reset_at_marking_complete() {
 598   // We set the global marking state to some default values when we're
 599   // not doing marking.
 600   reset_marking_for_restart();
 601   _num_active_tasks = 0;
 602 }
 603 
 604 G1ConcurrentMark::~G1ConcurrentMark() {
 605   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 606   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 607   // The G1ConcurrentMark instance is never freed.
 608   ShouldNotReachHere();
 609 }
 610 
 611 class G1ClearBitMapTask : public AbstractGangTask {
 612 public:
 613   static size_t chunk_size() { return M; }
 614 
 615 private:
 616   // Heap region closure used for clearing the given mark bitmap.
 617   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 618   private:
 619     G1CMBitMap* _bitmap;
 620     G1ConcurrentMark* _cm;
 621   public:
 622     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 623     }
 624 
 625     virtual bool do_heap_region(HeapRegion* r) {
 626       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 627 
 628       HeapWord* cur = r->bottom();
 629       HeapWord* const end = r->end();
 630 
 631       while (cur < end) {
 632         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 633         _bitmap->clear_range(mr);
 634 
 635         cur += chunk_size_in_words;
 636 
 637         // Abort iteration if after yielding the marking has been aborted.
 638         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 639           return true;
 640         }
 641         // Repeat the asserts from before the start of the closure. We will do them
 642         // as asserts here to minimize their overhead on the product. However, we
 643         // will have them as guarantees at the beginning / end of the bitmap
 644         // clearing to get some checking in the product.
 645         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 646         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 647       }
 648       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 649 
 650       return false;
 651     }
 652   };
 653 
 654   G1ClearBitmapHRClosure _cl;
 655   HeapRegionClaimer _hr_claimer;
 656   bool _suspendible; // If the task is suspendible, workers must join the STS.
 657 
 658 public:
 659   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 660     AbstractGangTask("G1 Clear Bitmap"),
 661     _cl(bitmap, suspendible ? cm : NULL),
 662     _hr_claimer(n_workers),
 663     _suspendible(suspendible)
 664   { }
 665 
 666   void work(uint worker_id) {
 667     SuspendibleThreadSetJoiner sts_join(_suspendible);
 668     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 669   }
 670 
 671   bool is_complete() {
 672     return _cl.is_complete();
 673   }
 674 };
 675 
 676 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 677   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 678 
 679   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 680   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 681 
 682   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 683 
 684   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 685 
 686   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 687   workers->run_task(&cl, num_workers);
 688   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 689 }
 690 
 691 void G1ConcurrentMark::cleanup_for_next_mark() {
 692   // Make sure that the concurrent mark thread looks to still be in
 693   // the current cycle.
 694   guarantee(cm_thread()->during_cycle(), "invariant");
 695 
 696   // We are finishing up the current cycle by clearing the next
 697   // marking bitmap and getting it ready for the next cycle. During
 698   // this time no other cycle can start. So, let's make sure that this
 699   // is the case.
 700   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 701 
 702   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 703 
 704   // Repeat the asserts from above.
 705   guarantee(cm_thread()->during_cycle(), "invariant");
 706   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 707 }
 708 
 709 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 710   assert_at_safepoint_on_vm_thread();
 711   clear_bitmap(_prev_mark_bitmap, workers, false);
 712 }
 713 
 714 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 715   G1CMBitMap* _bitmap;
 716  public:
 717   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 718   }
 719 
 720   virtual bool do_heap_region(HeapRegion* r) {
 721     // This closure can be called concurrently to the mutator, so we must make sure
 722     // that the result of the getNextMarkedWordAddress() call is compared to the
 723     // value passed to it as limit to detect any found bits.
 724     // end never changes in G1.
 725     HeapWord* end = r->end();
 726     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 727   }
 728 };
 729 
 730 bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
 731   CheckBitmapClearHRClosure cl(_next_mark_bitmap);
 732   _g1h->heap_region_iterate(&cl);
 733   return cl.is_complete();
 734 }
 735 
 736 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 737 public:
 738   bool do_heap_region(HeapRegion* r) {
 739     r->note_start_of_marking();
 740     return false;
 741   }
 742 };
 743 
 744 void G1ConcurrentMark::pre_initial_mark() {
 745   // Initialize marking structures. This has to be done in a STW phase.
 746   reset();
 747 
 748   // For each region note start of marking.
 749   NoteStartOfMarkHRClosure startcl;
 750   _g1h->heap_region_iterate(&startcl);
 751 }
 752 
 753 
 754 void G1ConcurrentMark::post_initial_mark() {
 755   // Start Concurrent Marking weak-reference discovery.
 756   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 757   // enable ("weak") refs discovery
 758   rp->enable_discovery();
 759   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 760 
 761   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 762   // This is the start of  the marking cycle, we're expected all
 763   // threads to have SATB queues with active set to false.
 764   satb_mq_set.set_active_all_threads(true, /* new active value */
 765                                      false /* expected_active */);
 766 
 767   _root_regions.prepare_for_scan();
 768 
 769   // update_g1_committed() will be called at the end of an evac pause
 770   // when marking is on. So, it's also called at the end of the
 771   // initial-mark pause to update the heap end, if the heap expands
 772   // during it. No need to call it here.
 773 }
 774 
 775 /*
 776  * Notice that in the next two methods, we actually leave the STS
 777  * during the barrier sync and join it immediately afterwards. If we
 778  * do not do this, the following deadlock can occur: one thread could
 779  * be in the barrier sync code, waiting for the other thread to also
 780  * sync up, whereas another one could be trying to yield, while also
 781  * waiting for the other threads to sync up too.
 782  *
 783  * Note, however, that this code is also used during remark and in
 784  * this case we should not attempt to leave / enter the STS, otherwise
 785  * we'll either hit an assert (debug / fastdebug) or deadlock
 786  * (product). So we should only leave / enter the STS if we are
 787  * operating concurrently.
 788  *
 789  * Because the thread that does the sync barrier has left the STS, it
 790  * is possible to be suspended for a Full GC or an evacuation pause
 791  * could occur. This is actually safe, since the entering the sync
 792  * barrier is one of the last things do_marking_step() does, and it
 793  * doesn't manipulate any data structures afterwards.
 794  */
 795 
 796 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 797   bool barrier_aborted;
 798   {
 799     SuspendibleThreadSetLeaver sts_leave(concurrent());
 800     barrier_aborted = !_first_overflow_barrier_sync.enter();
 801   }
 802 
 803   // at this point everyone should have synced up and not be doing any
 804   // more work
 805 
 806   if (barrier_aborted) {
 807     // If the barrier aborted we ignore the overflow condition and
 808     // just abort the whole marking phase as quickly as possible.
 809     return;
 810   }
 811 }
 812 
 813 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 814   SuspendibleThreadSetLeaver sts_leave(concurrent());
 815   _second_overflow_barrier_sync.enter();
 816 
 817   // at this point everything should be re-initialized and ready to go
 818 }
 819 
 820 class G1CMConcurrentMarkingTask : public AbstractGangTask {
 821   G1ConcurrentMark*     _cm;
 822 
 823 public:
 824   void work(uint worker_id) {
 825     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 826     ResourceMark rm;
 827 
 828     double start_vtime = os::elapsedVTime();
 829 
 830     {
 831       SuspendibleThreadSetJoiner sts_join;
 832 
 833       assert(worker_id < _cm->active_tasks(), "invariant");
 834 
 835       G1CMTask* task = _cm->task(worker_id);
 836       task->record_start_time();
 837       if (!_cm->has_aborted()) {
 838         do {
 839           task->do_marking_step(G1ConcMarkStepDurationMillis,
 840                                 true  /* do_termination */,
 841                                 false /* is_serial*/);
 842 
 843           _cm->do_yield_check();
 844         } while (!_cm->has_aborted() && task->has_aborted());
 845       }
 846       task->record_end_time();
 847       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 848     }
 849 
 850     double end_vtime = os::elapsedVTime();
 851     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 852   }
 853 
 854   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 855       AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 856 
 857   ~G1CMConcurrentMarkingTask() { }
 858 };
 859 
 860 uint G1ConcurrentMark::calc_active_marking_workers() {
 861   uint result = 0;
 862   if (!UseDynamicNumberOfGCThreads ||
 863       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 864        !ForceDynamicNumberOfGCThreads)) {
 865     result = _max_concurrent_workers;
 866   } else {
 867     result =
 868       AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
 869                                                       1, /* Minimum workers */
 870                                                       _num_concurrent_workers,
 871                                                       Threads::number_of_non_daemon_threads());
 872     // Don't scale the result down by scale_concurrent_workers() because
 873     // that scaling has already gone into "_max_concurrent_workers".
 874   }
 875   assert(result > 0 && result <= _max_concurrent_workers,
 876          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 877          _max_concurrent_workers, result);
 878   return result;
 879 }
 880 
 881 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) {
 882   // Currently, only survivors can be root regions.
 883   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 884   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 885 
 886   const uintx interval = PrefetchScanIntervalInBytes;
 887   HeapWord* curr = hr->bottom();
 888   const HeapWord* end = hr->top();
 889   while (curr < end) {
 890     Prefetch::read(curr, interval);
 891     oop obj = oop(curr);
 892     int size = obj->oop_iterate_size(&cl);
 893     assert(size == obj->size(), "sanity");
 894     curr += size;
 895   }
 896 }
 897 
 898 class G1CMRootRegionScanTask : public AbstractGangTask {
 899   G1ConcurrentMark* _cm;
 900 public:
 901   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 902     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 903 
 904   void work(uint worker_id) {
 905     assert(Thread::current()->is_ConcurrentGC_thread(),
 906            "this should only be done by a conc GC thread");
 907 
 908     G1CMRootRegions* root_regions = _cm->root_regions();
 909     HeapRegion* hr = root_regions->claim_next();
 910     while (hr != NULL) {
 911       _cm->scan_root_region(hr, worker_id);
 912       hr = root_regions->claim_next();
 913     }
 914   }
 915 };
 916 
 917 void G1ConcurrentMark::scan_root_regions() {
 918   // scan_in_progress() will have been set to true only if there was
 919   // at least one root region to scan. So, if it's false, we
 920   // should not attempt to do any further work.
 921   if (root_regions()->scan_in_progress()) {
 922     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 923 
 924     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 925                                    // We distribute work on a per-region basis, so starting
 926                                    // more threads than that is useless.
 927                                    root_regions()->num_root_regions());
 928     assert(_num_concurrent_workers <= _max_concurrent_workers,
 929            "Maximum number of marking threads exceeded");
 930 
 931     G1CMRootRegionScanTask task(this);
 932     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 933                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 934     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 935 
 936     // It's possible that has_aborted() is true here without actually
 937     // aborting the survivor scan earlier. This is OK as it's
 938     // mainly used for sanity checking.
 939     root_regions()->scan_finished();
 940   }
 941 }
 942 
 943 void G1ConcurrentMark::concurrent_cycle_start() {
 944   _gc_timer_cm->register_gc_start();
 945 
 946   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 947 
 948   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 949 }
 950 
 951 void G1ConcurrentMark::concurrent_cycle_end() {
 952   _g1h->collector_state()->set_clearing_next_bitmap(false);
 953 
 954   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 955 
 956   if (has_aborted()) {
 957     log_info(gc, marking)("Concurrent Mark Abort");
 958     _gc_tracer_cm->report_concurrent_mode_failure();
 959   }
 960 
 961   _gc_timer_cm->register_gc_end();
 962 
 963   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 964 }
 965 
 966 void G1ConcurrentMark::mark_from_roots() {
 967   _restart_for_overflow = false;
 968 
 969   _num_concurrent_workers = calc_active_marking_workers();
 970 
 971   uint active_workers = MAX2(1U, _num_concurrent_workers);
 972 
 973   // Setting active workers is not guaranteed since fewer
 974   // worker threads may currently exist and more may not be
 975   // available.
 976   active_workers = _concurrent_workers->update_active_workers(active_workers);
 977   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 978 
 979   // Parallel task terminator is set in "set_concurrency_and_phase()"
 980   set_concurrency_and_phase(active_workers, true /* concurrent */);
 981 
 982   G1CMConcurrentMarkingTask marking_task(this);
 983   _concurrent_workers->run_task(&marking_task);
 984   print_stats();
 985 }
 986 
 987 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
 988   G1HeapVerifier* verifier = _g1h->verifier();
 989 
 990   verifier->verify_region_sets_optional();
 991 
 992   if (VerifyDuringGC) {
 993     GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm);
 994 
 995     size_t const BufLen = 512;
 996     char buffer[BufLen];
 997 
 998     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
 999     verifier->verify(type, vo, buffer);
1000   }
1001 
1002   verifier->check_bitmaps(caller);
1003 }
1004 
1005 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
1006   G1CollectedHeap* _g1h;
1007   G1ConcurrentMark* _cm;
1008 
1009   uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
1010 
1011   void update_remset_before_rebuild(HeapRegion * hr) {
1012     G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker();
1013 
1014     size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize;
1015     bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
1016     if (selected_for_rebuild) {
1017       _num_regions_selected_for_rebuild++;
1018     }
1019     _cm->update_top_at_rebuild_start(hr);
1020   }
1021 
1022 public:
1023   G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) :
1024     _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0) { }
1025 
1026   virtual bool do_heap_region(HeapRegion* r) {
1027     update_remset_before_rebuild(r);
1028     return false;
1029   }
1030 
1031   uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1032 };
1033 
1034 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1035   G1CollectedHeap* _g1h;
1036 public:
1037   G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1038 
1039   virtual bool do_heap_region(HeapRegion* r) {
1040     _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r);
1041     return false;
1042   }
1043 };
1044 
1045 void G1ConcurrentMark::remark() {
1046   assert_at_safepoint_on_vm_thread();
1047 
1048   // If a full collection has happened, we should not continue. However we might
1049   // have ended up here as the Remark VM operation has been scheduled already.
1050   if (has_aborted()) {
1051     return;
1052   }
1053 
1054   G1Policy* g1p = _g1h->g1_policy();
1055   g1p->record_concurrent_mark_remark_start();
1056 
1057   double start = os::elapsedTime();
1058 
1059   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1060 
1061   {
1062     GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1063     finalize_marking();
1064   }
1065 
1066   double mark_work_end = os::elapsedTime();
1067 
1068   bool const mark_finished = !has_overflown();
1069   if (mark_finished) {
1070     weak_refs_work(false /* clear_all_soft_refs */);
1071 
1072     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1073     // We're done with marking.
1074     // This is the end of the marking cycle, we're expected all
1075     // threads to have SATB queues with active set to true.
1076     satb_mq_set.set_active_all_threads(false, /* new active value */
1077                                        true /* expected_active */);
1078 
1079     {
1080       GCTraceTime(Debug, gc, phases)("Flush Task Caches");
1081       flush_all_task_caches();
1082     }
1083 
1084     {
1085       GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild");
1086       G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this);
1087       _g1h->heap_region_iterate(&cl);
1088       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1089                                       _g1h->num_regions(), cl.num_selected_for_rebuild());
1090     }
1091 
1092     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UseNextMarking, "Remark after");
1093 
1094     assert(!restart_for_overflow(), "sanity");
1095     // Completely reset the marking state since marking completed
1096     reset_at_marking_complete();
1097   } else {
1098     // We overflowed.  Restart concurrent marking.
1099     _restart_for_overflow = true;
1100 
1101     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1102 
1103     // Clear the marking state because we will be restarting
1104     // marking due to overflowing the global mark stack.
1105     reset_marking_for_restart();
1106   }
1107 
1108   {
1109     GCTraceTime(Debug, gc, phases)("Report Object Count");
1110     report_object_count();
1111   }
1112 
1113   // Statistics
1114   double now = os::elapsedTime();
1115   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1116   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1117   _remark_times.add((now - start) * 1000.0);
1118 
1119   g1p->record_concurrent_mark_remark_end();
1120 }
1121 
1122 class G1CleanupTask : public AbstractGangTask {
1123   // Per-region work during the Cleanup pause.
1124   class G1CleanupRegionsClosure : public HeapRegionClosure {
1125     G1CollectedHeap* _g1h;
1126     size_t _freed_bytes;
1127     FreeRegionList* _local_cleanup_list;
1128     uint _old_regions_removed;
1129     uint _humongous_regions_removed;
1130     HRRSCleanupTask* _hrrs_cleanup_task;
1131 
1132   public:
1133     G1CleanupRegionsClosure(G1CollectedHeap* g1,
1134                             FreeRegionList* local_cleanup_list,
1135                             HRRSCleanupTask* hrrs_cleanup_task) :
1136       _g1h(g1),
1137       _freed_bytes(0),
1138       _local_cleanup_list(local_cleanup_list),
1139       _old_regions_removed(0),
1140       _humongous_regions_removed(0),
1141       _hrrs_cleanup_task(hrrs_cleanup_task) { }
1142 
1143     size_t freed_bytes() { return _freed_bytes; }
1144     const uint old_regions_removed() { return _old_regions_removed; }
1145     const uint humongous_regions_removed() { return _humongous_regions_removed; }
1146 
1147     bool do_heap_region(HeapRegion *hr) {
1148       hr->note_end_of_marking();
1149 
1150       if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1151         _freed_bytes += hr->used();
1152         hr->set_containing_set(NULL);
1153         if (hr->is_humongous()) {
1154           _humongous_regions_removed++;
1155           _g1h->free_humongous_region(hr, _local_cleanup_list);
1156         } else {
1157           _old_regions_removed++;
1158           _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */);
1159         }
1160         hr->clear_cardtable();
1161         _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1162         log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1163       } else {
1164         hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1165       }
1166 
1167       return false;
1168     }
1169   };
1170 
1171   G1CollectedHeap* _g1h;
1172   FreeRegionList* _cleanup_list;
1173   HeapRegionClaimer _hrclaimer;
1174 
1175 public:
1176   G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1177     AbstractGangTask("G1 Cleanup"),
1178     _g1h(g1h),
1179     _cleanup_list(cleanup_list),
1180     _hrclaimer(n_workers) {
1181 
1182     HeapRegionRemSet::reset_for_cleanup_tasks();
1183   }
1184 
1185   void work(uint worker_id) {
1186     FreeRegionList local_cleanup_list("Local Cleanup List");
1187     HRRSCleanupTask hrrs_cleanup_task;
1188     G1CleanupRegionsClosure cl(_g1h,
1189                                &local_cleanup_list,
1190                                &hrrs_cleanup_task);
1191     _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1192     assert(cl.is_complete(), "Shouldn't have aborted!");
1193 
1194     // Now update the old/humongous region sets
1195     _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1196     {
1197       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1198       _g1h->decrement_summary_bytes(cl.freed_bytes());
1199 
1200       _cleanup_list->add_ordered(&local_cleanup_list);
1201       assert(local_cleanup_list.is_empty(), "post-condition");
1202 
1203       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1204     }
1205   }
1206 };
1207 
1208 void G1ConcurrentMark::reclaim_empty_regions() {
1209   WorkGang* workers = _g1h->workers();
1210   FreeRegionList empty_regions_list("Empty Regions After Mark List");
1211 
1212   G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers());
1213   workers->run_task(&cl);
1214 
1215   if (!empty_regions_list.is_empty()) {
1216     log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1217     // Now print the empty regions list.
1218     G1HRPrinter* hrp = _g1h->hr_printer();
1219     if (hrp->is_active()) {
1220       FreeRegionListIterator iter(&empty_regions_list);
1221       while (iter.more_available()) {
1222         HeapRegion* hr = iter.get_next();
1223         hrp->cleanup(hr);
1224       }
1225     }
1226     // And actually make them available.
1227     _g1h->prepend_to_freelist(&empty_regions_list);
1228   }
1229 }
1230 
1231 void G1ConcurrentMark::cleanup() {
1232   assert_at_safepoint_on_vm_thread();
1233 
1234   // If a full collection has happened, we shouldn't do this.
1235   if (has_aborted()) {
1236     return;
1237   }
1238 
1239   G1Policy* g1p = _g1h->g1_policy();
1240   g1p->record_concurrent_mark_cleanup_start();
1241 
1242   double start = os::elapsedTime();
1243 
1244   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UseNextMarking, "Cleanup before");
1245 
1246   {
1247     GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild");
1248     G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1249     _g1h->heap_region_iterate(&cl);
1250   }
1251 
1252   if (log_is_enabled(Trace, gc, liveness)) {
1253     G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1254     _g1h->heap_region_iterate(&cl);
1255   }
1256 
1257   // Install newly created mark bitmap as "prev".
1258   swap_mark_bitmaps();
1259   {
1260     GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions");
1261     reclaim_empty_regions();
1262   }
1263 
1264   // Cleanup will have freed any regions completely full of garbage.
1265   // Update the soft reference policy with the new heap occupancy.
1266   Universe::update_heap_info_at_gc();
1267 
1268   // Clean out dead classes and update Metaspace sizes.
1269   if (ClassUnloadingWithConcurrentMark) {
1270     GCTraceTime(Debug, gc, phases)("Purge Metaspace");
1271     ClassLoaderDataGraph::purge();
1272   }
1273   MetaspaceGC::compute_new_size();
1274 
1275   // We reclaimed old regions so we should calculate the sizes to make
1276   // sure we update the old gen/space data.
1277   _g1h->g1mm()->update_sizes();
1278 
1279   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1280 
1281   // We need to make this be a "collection" so any collection pause that
1282   // races with it goes around and waits for Cleanup to finish.
1283   _g1h->increment_total_collections();
1284 
1285   // Local statistics
1286   double recent_cleanup_time = (os::elapsedTime() - start);
1287   _total_cleanup_time += recent_cleanup_time;
1288   _cleanup_times.add(recent_cleanup_time);
1289 
1290   {
1291     GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup");
1292     _g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1293   }
1294 }
1295 
1296 // Supporting Object and Oop closures for reference discovery
1297 // and processing in during marking
1298 
1299 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1300   HeapWord* addr = (HeapWord*)obj;
1301   return addr != NULL &&
1302          (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj));
1303 }
1304 
1305 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1306 // Uses the G1CMTask associated with a worker thread (for serial reference
1307 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1308 // trace referent objects.
1309 //
1310 // Using the G1CMTask and embedded local queues avoids having the worker
1311 // threads operating on the global mark stack. This reduces the risk
1312 // of overflowing the stack - which we would rather avoid at this late
1313 // state. Also using the tasks' local queues removes the potential
1314 // of the workers interfering with each other that could occur if
1315 // operating on the global stack.
1316 
1317 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1318   G1ConcurrentMark* _cm;
1319   G1CMTask*         _task;
1320   int               _ref_counter_limit;
1321   int               _ref_counter;
1322   bool              _is_serial;
1323 public:
1324   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1325     _cm(cm), _task(task), _is_serial(is_serial),
1326     _ref_counter_limit(G1RefProcDrainInterval) {
1327     assert(_ref_counter_limit > 0, "sanity");
1328     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1329     _ref_counter = _ref_counter_limit;
1330   }
1331 
1332   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1333   virtual void do_oop(      oop* p) { do_oop_work(p); }
1334 
1335   template <class T> void do_oop_work(T* p) {
1336     if (!_cm->has_overflown()) {
1337       _task->deal_with_reference(p);
1338       _ref_counter--;
1339 
1340       if (_ref_counter == 0) {
1341         // We have dealt with _ref_counter_limit references, pushing them
1342         // and objects reachable from them on to the local stack (and
1343         // possibly the global stack). Call G1CMTask::do_marking_step() to
1344         // process these entries.
1345         //
1346         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1347         // there's nothing more to do (i.e. we're done with the entries that
1348         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1349         // above) or we overflow.
1350         //
1351         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1352         // flag while there may still be some work to do. (See the comment at
1353         // the beginning of G1CMTask::do_marking_step() for those conditions -
1354         // one of which is reaching the specified time target.) It is only
1355         // when G1CMTask::do_marking_step() returns without setting the
1356         // has_aborted() flag that the marking step has completed.
1357         do {
1358           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1359           _task->do_marking_step(mark_step_duration_ms,
1360                                  false      /* do_termination */,
1361                                  _is_serial);
1362         } while (_task->has_aborted() && !_cm->has_overflown());
1363         _ref_counter = _ref_counter_limit;
1364       }
1365     }
1366   }
1367 };
1368 
1369 // 'Drain' oop closure used by both serial and parallel reference processing.
1370 // Uses the G1CMTask associated with a given worker thread (for serial
1371 // reference processing the G1CMtask for worker 0 is used). Calls the
1372 // do_marking_step routine, with an unbelievably large timeout value,
1373 // to drain the marking data structures of the remaining entries
1374 // added by the 'keep alive' oop closure above.
1375 
1376 class G1CMDrainMarkingStackClosure : public VoidClosure {
1377   G1ConcurrentMark* _cm;
1378   G1CMTask*         _task;
1379   bool              _is_serial;
1380  public:
1381   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1382     _cm(cm), _task(task), _is_serial(is_serial) {
1383     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1384   }
1385 
1386   void do_void() {
1387     do {
1388       // We call G1CMTask::do_marking_step() to completely drain the local
1389       // and global marking stacks of entries pushed by the 'keep alive'
1390       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1391       //
1392       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1393       // if there's nothing more to do (i.e. we've completely drained the
1394       // entries that were pushed as a a result of applying the 'keep alive'
1395       // closure to the entries on the discovered ref lists) or we overflow
1396       // the global marking stack.
1397       //
1398       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1399       // flag while there may still be some work to do. (See the comment at
1400       // the beginning of G1CMTask::do_marking_step() for those conditions -
1401       // one of which is reaching the specified time target.) It is only
1402       // when G1CMTask::do_marking_step() returns without setting the
1403       // has_aborted() flag that the marking step has completed.
1404 
1405       _task->do_marking_step(1000000000.0 /* something very large */,
1406                              true         /* do_termination */,
1407                              _is_serial);
1408     } while (_task->has_aborted() && !_cm->has_overflown());
1409   }
1410 };
1411 
1412 // Implementation of AbstractRefProcTaskExecutor for parallel
1413 // reference processing at the end of G1 concurrent marking
1414 
1415 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1416 private:
1417   G1CollectedHeap*  _g1h;
1418   G1ConcurrentMark* _cm;
1419   WorkGang*         _workers;
1420   uint              _active_workers;
1421 
1422 public:
1423   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1424                           G1ConcurrentMark* cm,
1425                           WorkGang* workers,
1426                           uint n_workers) :
1427     _g1h(g1h), _cm(cm),
1428     _workers(workers), _active_workers(n_workers) { }
1429 
1430   // Executes the given task using concurrent marking worker threads.
1431   virtual void execute(ProcessTask& task);
1432   virtual void execute(EnqueueTask& task);
1433 };
1434 
1435 class G1CMRefProcTaskProxy : public AbstractGangTask {
1436   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1437   ProcessTask&      _proc_task;
1438   G1CollectedHeap*  _g1h;
1439   G1ConcurrentMark* _cm;
1440 
1441 public:
1442   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1443                        G1CollectedHeap* g1h,
1444                        G1ConcurrentMark* cm) :
1445     AbstractGangTask("Process reference objects in parallel"),
1446     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1447     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1448     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1449   }
1450 
1451   virtual void work(uint worker_id) {
1452     ResourceMark rm;
1453     HandleMark hm;
1454     G1CMTask* task = _cm->task(worker_id);
1455     G1CMIsAliveClosure g1_is_alive(_g1h);
1456     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1457     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1458 
1459     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1460   }
1461 };
1462 
1463 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1464   assert(_workers != NULL, "Need parallel worker threads.");
1465   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1466 
1467   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1468 
1469   // We need to reset the concurrency level before each
1470   // proxy task execution, so that the termination protocol
1471   // and overflow handling in G1CMTask::do_marking_step() knows
1472   // how many workers to wait for.
1473   _cm->set_concurrency(_active_workers);
1474   _workers->run_task(&proc_task_proxy);
1475 }
1476 
1477 class G1CMRefEnqueueTaskProxy : public AbstractGangTask {
1478   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1479   EnqueueTask& _enq_task;
1480 
1481 public:
1482   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1483     AbstractGangTask("Enqueue reference objects in parallel"),
1484     _enq_task(enq_task) { }
1485 
1486   virtual void work(uint worker_id) {
1487     _enq_task.work(worker_id);
1488   }
1489 };
1490 
1491 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1492   assert(_workers != NULL, "Need parallel worker threads.");
1493   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1494 
1495   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1496 
1497   // Not strictly necessary but...
1498   //
1499   // We need to reset the concurrency level before each
1500   // proxy task execution, so that the termination protocol
1501   // and overflow handling in G1CMTask::do_marking_step() knows
1502   // how many workers to wait for.
1503   _cm->set_concurrency(_active_workers);
1504   _workers->run_task(&enq_task_proxy);
1505 }
1506 
1507 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1508   ResourceMark rm;
1509   HandleMark   hm;
1510 
1511   // Is alive closure.
1512   G1CMIsAliveClosure g1_is_alive(_g1h);
1513 
1514   // Inner scope to exclude the cleaning of the string and symbol
1515   // tables from the displayed time.
1516   {
1517     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1518 
1519     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1520 
1521     // See the comment in G1CollectedHeap::ref_processing_init()
1522     // about how reference processing currently works in G1.
1523 
1524     // Set the soft reference policy
1525     rp->setup_policy(clear_all_soft_refs);
1526     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1527 
1528     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1529     // in serial reference processing. Note these closures are also
1530     // used for serially processing (by the the current thread) the
1531     // JNI references during parallel reference processing.
1532     //
1533     // These closures do not need to synchronize with the worker
1534     // threads involved in parallel reference processing as these
1535     // instances are executed serially by the current thread (e.g.
1536     // reference processing is not multi-threaded and is thus
1537     // performed by the current thread instead of a gang worker).
1538     //
1539     // The gang tasks involved in parallel reference processing create
1540     // their own instances of these closures, which do their own
1541     // synchronization among themselves.
1542     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1543     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1544 
1545     // We need at least one active thread. If reference processing
1546     // is not multi-threaded we use the current (VMThread) thread,
1547     // otherwise we use the work gang from the G1CollectedHeap and
1548     // we utilize all the worker threads we can.
1549     bool processing_is_mt = rp->processing_is_mt();
1550     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1551     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1552 
1553     // Parallel processing task executor.
1554     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1555                                               _g1h->workers(), active_workers);
1556     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1557 
1558     // Set the concurrency level. The phase was already set prior to
1559     // executing the remark task.
1560     set_concurrency(active_workers);
1561 
1562     // Set the degree of MT processing here.  If the discovery was done MT,
1563     // the number of threads involved during discovery could differ from
1564     // the number of active workers.  This is OK as long as the discovered
1565     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1566     rp->set_active_mt_degree(active_workers);
1567 
1568     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1569 
1570     // Process the weak references.
1571     const ReferenceProcessorStats& stats =
1572         rp->process_discovered_references(&g1_is_alive,
1573                                           &g1_keep_alive,
1574                                           &g1_drain_mark_stack,
1575                                           executor,
1576                                           &pt);
1577     _gc_tracer_cm->report_gc_reference_stats(stats);
1578     pt.print_all_references();
1579 
1580     // The do_oop work routines of the keep_alive and drain_marking_stack
1581     // oop closures will set the has_overflown flag if we overflow the
1582     // global marking stack.
1583 
1584     assert(has_overflown() || _global_mark_stack.is_empty(),
1585            "Mark stack should be empty (unless it has overflown)");
1586 
1587     assert(rp->num_q() == active_workers, "why not");
1588 
1589     rp->enqueue_discovered_references(executor, &pt);
1590 
1591     rp->verify_no_references_recorded();
1592 
1593     pt.print_enqueue_phase();
1594 
1595     assert(!rp->discovery_enabled(), "Post condition");
1596   }
1597 
1598   assert(has_overflown() || _global_mark_stack.is_empty(),
1599          "Mark stack should be empty (unless it has overflown)");
1600 
1601   {
1602     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1603     WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
1604   }
1605 
1606   if (has_overflown()) {
1607     // We can not trust g1_is_alive if the marking stack overflowed
1608     return;
1609   }
1610 
1611   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1612 
1613   // Unload Klasses, String, Symbols, Code Cache, etc.
1614   if (ClassUnloadingWithConcurrentMark) {
1615     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1616     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1617     _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1618   } else {
1619     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1620     // No need to clean string table and symbol table as they are treated as strong roots when
1621     // class unloading is disabled.
1622     _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1623   }
1624 }
1625 
1626 void G1ConcurrentMark::report_object_count() {
1627   G1CMIsAliveClosure is_alive(_g1h);
1628   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1629 }
1630 
1631 void G1ConcurrentMark::swap_mark_bitmaps() {
1632   G1CMBitMap* temp = _prev_mark_bitmap;
1633   _prev_mark_bitmap = _next_mark_bitmap;
1634   _next_mark_bitmap = temp;
1635   _g1h->collector_state()->set_clearing_next_bitmap(true);
1636 }
1637 
1638 // Closure for marking entries in SATB buffers.
1639 class G1CMSATBBufferClosure : public SATBBufferClosure {
1640 private:
1641   G1CMTask* _task;
1642   G1CollectedHeap* _g1h;
1643 
1644   // This is very similar to G1CMTask::deal_with_reference, but with
1645   // more relaxed requirements for the argument, so this must be more
1646   // circumspect about treating the argument as an object.
1647   void do_entry(void* entry) const {
1648     _task->increment_refs_reached();
1649     oop const obj = static_cast<oop>(entry);
1650     _task->make_reference_grey(obj);
1651   }
1652 
1653 public:
1654   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1655     : _task(task), _g1h(g1h) { }
1656 
1657   virtual void do_buffer(void** buffer, size_t size) {
1658     for (size_t i = 0; i < size; ++i) {
1659       do_entry(buffer[i]);
1660     }
1661   }
1662 };
1663 
1664 class G1RemarkThreadsClosure : public ThreadClosure {
1665   G1CMSATBBufferClosure _cm_satb_cl;
1666   G1CMOopClosure _cm_cl;
1667   MarkingCodeBlobClosure _code_cl;
1668   int _thread_parity;
1669 
1670  public:
1671   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1672     _cm_satb_cl(task, g1h),
1673     _cm_cl(g1h, task),
1674     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1675     _thread_parity(Threads::thread_claim_parity()) {}
1676 
1677   void do_thread(Thread* thread) {
1678     if (thread->is_Java_thread()) {
1679       if (thread->claim_oops_do(true, _thread_parity)) {
1680         JavaThread* jt = (JavaThread*)thread;
1681 
1682         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1683         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1684         // * Alive if on the stack of an executing method
1685         // * Weakly reachable otherwise
1686         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1687         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1688         jt->nmethods_do(&_code_cl);
1689 
1690         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1691       }
1692     } else if (thread->is_VM_thread()) {
1693       if (thread->claim_oops_do(true, _thread_parity)) {
1694         G1BarrierSet::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1695       }
1696     }
1697   }
1698 };
1699 
1700 class G1CMRemarkTask : public AbstractGangTask {
1701   G1ConcurrentMark* _cm;
1702 public:
1703   void work(uint worker_id) {
1704     G1CMTask* task = _cm->task(worker_id);
1705     task->record_start_time();
1706     {
1707       ResourceMark rm;
1708       HandleMark hm;
1709 
1710       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1711       Threads::threads_do(&threads_f);
1712     }
1713 
1714     do {
1715       task->do_marking_step(1000000000.0 /* something very large */,
1716                             true         /* do_termination       */,
1717                             false        /* is_serial            */);
1718     } while (task->has_aborted() && !_cm->has_overflown());
1719     // If we overflow, then we do not want to restart. We instead
1720     // want to abort remark and do concurrent marking again.
1721     task->record_end_time();
1722   }
1723 
1724   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1725     AbstractGangTask("Par Remark"), _cm(cm) {
1726     _cm->terminator()->reset_for_reuse(active_workers);
1727   }
1728 };
1729 
1730 void G1ConcurrentMark::finalize_marking() {
1731   ResourceMark rm;
1732   HandleMark   hm;
1733 
1734   _g1h->ensure_parsability(false);
1735 
1736   // this is remark, so we'll use up all active threads
1737   uint active_workers = _g1h->workers()->active_workers();
1738   set_concurrency_and_phase(active_workers, false /* concurrent */);
1739   // Leave _parallel_marking_threads at it's
1740   // value originally calculated in the G1ConcurrentMark
1741   // constructor and pass values of the active workers
1742   // through the gang in the task.
1743 
1744   {
1745     StrongRootsScope srs(active_workers);
1746 
1747     G1CMRemarkTask remarkTask(this, active_workers);
1748     // We will start all available threads, even if we decide that the
1749     // active_workers will be fewer. The extra ones will just bail out
1750     // immediately.
1751     _g1h->workers()->run_task(&remarkTask);
1752   }
1753 
1754   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1755   guarantee(has_overflown() ||
1756             satb_mq_set.completed_buffers_num() == 0,
1757             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1758             BOOL_TO_STR(has_overflown()),
1759             satb_mq_set.completed_buffers_num());
1760 
1761   print_stats();
1762 }
1763 
1764 void G1ConcurrentMark::flush_all_task_caches() {
1765   size_t hits = 0;
1766   size_t misses = 0;
1767   for (uint i = 0; i < _max_num_tasks; i++) {
1768     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1769     hits += stats.first;
1770     misses += stats.second;
1771   }
1772   size_t sum = hits + misses;
1773   log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1774                        hits, misses, percent_of(hits, sum));
1775 }
1776 
1777 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1778   _prev_mark_bitmap->clear_range(mr);
1779 }
1780 
1781 HeapRegion*
1782 G1ConcurrentMark::claim_region(uint worker_id) {
1783   // "checkpoint" the finger
1784   HeapWord* finger = _finger;
1785 
1786   while (finger < _heap.end()) {
1787     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1788 
1789     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1790     // Make sure that the reads below do not float before loading curr_region.
1791     OrderAccess::loadload();
1792     // Above heap_region_containing may return NULL as we always scan claim
1793     // until the end of the heap. In this case, just jump to the next region.
1794     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1795 
1796     // Is the gap between reading the finger and doing the CAS too long?
1797     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1798     if (res == finger && curr_region != NULL) {
1799       // we succeeded
1800       HeapWord*   bottom        = curr_region->bottom();
1801       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1802 
1803       // notice that _finger == end cannot be guaranteed here since,
1804       // someone else might have moved the finger even further
1805       assert(_finger >= end, "the finger should have moved forward");
1806 
1807       if (limit > bottom) {
1808         return curr_region;
1809       } else {
1810         assert(limit == bottom,
1811                "the region limit should be at bottom");
1812         // we return NULL and the caller should try calling
1813         // claim_region() again.
1814         return NULL;
1815       }
1816     } else {
1817       assert(_finger > finger, "the finger should have moved forward");
1818       // read it again
1819       finger = _finger;
1820     }
1821   }
1822 
1823   return NULL;
1824 }
1825 
1826 #ifndef PRODUCT
1827 class VerifyNoCSetOops {
1828   G1CollectedHeap* _g1h;
1829   const char* _phase;
1830   int _info;
1831 
1832 public:
1833   VerifyNoCSetOops(const char* phase, int info = -1) :
1834     _g1h(G1CollectedHeap::heap()),
1835     _phase(phase),
1836     _info(info)
1837   { }
1838 
1839   void operator()(G1TaskQueueEntry task_entry) const {
1840     if (task_entry.is_array_slice()) {
1841       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1842       return;
1843     }
1844     guarantee(oopDesc::is_oop(task_entry.obj()),
1845               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1846               p2i(task_entry.obj()), _phase, _info);
1847     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1848               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1849               p2i(task_entry.obj()), _phase, _info);
1850   }
1851 };
1852 
1853 void G1ConcurrentMark::verify_no_cset_oops() {
1854   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1855   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1856     return;
1857   }
1858 
1859   // Verify entries on the global mark stack
1860   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1861 
1862   // Verify entries on the task queues
1863   for (uint i = 0; i < _max_num_tasks; ++i) {
1864     G1CMTaskQueue* queue = _task_queues->queue(i);
1865     queue->iterate(VerifyNoCSetOops("Queue", i));
1866   }
1867 
1868   // Verify the global finger
1869   HeapWord* global_finger = finger();
1870   if (global_finger != NULL && global_finger < _heap.end()) {
1871     // Since we always iterate over all regions, we might get a NULL HeapRegion
1872     // here.
1873     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1874     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1875               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1876               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1877   }
1878 
1879   // Verify the task fingers
1880   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1881   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1882     G1CMTask* task = _tasks[i];
1883     HeapWord* task_finger = task->finger();
1884     if (task_finger != NULL && task_finger < _heap.end()) {
1885       // See above note on the global finger verification.
1886       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1887       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1888                 !task_hr->in_collection_set(),
1889                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1890                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1891     }
1892   }
1893 }
1894 #endif // PRODUCT
1895 
1896 void G1ConcurrentMark::rebuild_rem_set_concurrently() {
1897   _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
1898 }
1899 
1900 void G1ConcurrentMark::print_stats() {
1901   if (!log_is_enabled(Debug, gc, stats)) {
1902     return;
1903   }
1904   log_debug(gc, stats)("---------------------------------------------------------------------");
1905   for (size_t i = 0; i < _num_active_tasks; ++i) {
1906     _tasks[i]->print_stats();
1907     log_debug(gc, stats)("---------------------------------------------------------------------");
1908   }
1909 }
1910 
1911 void G1ConcurrentMark::concurrent_cycle_abort() {
1912   if (!cm_thread()->during_cycle() || _has_aborted) {
1913     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1914     return;
1915   }
1916 
1917   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1918   // concurrent bitmap clearing.
1919   {
1920     GCTraceTime(Debug, gc)("Clear Next Bitmap");
1921     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1922   }
1923   // Note we cannot clear the previous marking bitmap here
1924   // since VerifyDuringGC verifies the objects marked during
1925   // a full GC against the previous bitmap.
1926 
1927   // Empty mark stack
1928   reset_marking_for_restart();
1929   for (uint i = 0; i < _max_num_tasks; ++i) {
1930     _tasks[i]->clear_region_fields();
1931   }
1932   _first_overflow_barrier_sync.abort();
1933   _second_overflow_barrier_sync.abort();
1934   _has_aborted = true;
1935 
1936   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1937   satb_mq_set.abandon_partial_marking();
1938   // This can be called either during or outside marking, we'll read
1939   // the expected_active value from the SATB queue set.
1940   satb_mq_set.set_active_all_threads(
1941                                  false, /* new active value */
1942                                  satb_mq_set.is_active() /* expected_active */);
1943 }
1944 
1945 static void print_ms_time_info(const char* prefix, const char* name,
1946                                NumberSeq& ns) {
1947   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1948                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1949   if (ns.num() > 0) {
1950     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1951                            prefix, ns.sd(), ns.maximum());
1952   }
1953 }
1954 
1955 void G1ConcurrentMark::print_summary_info() {
1956   Log(gc, marking) log;
1957   if (!log.is_trace()) {
1958     return;
1959   }
1960 
1961   log.trace(" Concurrent marking:");
1962   print_ms_time_info("  ", "init marks", _init_times);
1963   print_ms_time_info("  ", "remarks", _remark_times);
1964   {
1965     print_ms_time_info("     ", "final marks", _remark_mark_times);
1966     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
1967 
1968   }
1969   print_ms_time_info("  ", "cleanups", _cleanup_times);
1970   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
1971             _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
1972   log.trace("  Total stop_world time = %8.2f s.",
1973             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
1974   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
1975             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
1976 }
1977 
1978 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
1979   _concurrent_workers->print_worker_threads_on(st);
1980 }
1981 
1982 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
1983   _concurrent_workers->threads_do(tc);
1984 }
1985 
1986 void G1ConcurrentMark::print_on_error(outputStream* st) const {
1987   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
1988                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
1989   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
1990   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
1991 }
1992 
1993 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
1994   ReferenceProcessor* result = g1h->ref_processor_cm();
1995   assert(result != NULL, "CM reference processor should not be NULL");
1996   return result;
1997 }
1998 
1999 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2000                                G1CMTask* task)
2001   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2002     _g1h(g1h), _task(task)
2003 { }
2004 
2005 void G1CMTask::setup_for_region(HeapRegion* hr) {
2006   assert(hr != NULL,
2007         "claim_region() should have filtered out NULL regions");
2008   _curr_region  = hr;
2009   _finger       = hr->bottom();
2010   update_region_limit();
2011 }
2012 
2013 void G1CMTask::update_region_limit() {
2014   HeapRegion* hr            = _curr_region;
2015   HeapWord* bottom          = hr->bottom();
2016   HeapWord* limit           = hr->next_top_at_mark_start();
2017 
2018   if (limit == bottom) {
2019     // The region was collected underneath our feet.
2020     // We set the finger to bottom to ensure that the bitmap
2021     // iteration that will follow this will not do anything.
2022     // (this is not a condition that holds when we set the region up,
2023     // as the region is not supposed to be empty in the first place)
2024     _finger = bottom;
2025   } else if (limit >= _region_limit) {
2026     assert(limit >= _finger, "peace of mind");
2027   } else {
2028     assert(limit < _region_limit, "only way to get here");
2029     // This can happen under some pretty unusual circumstances.  An
2030     // evacuation pause empties the region underneath our feet (NTAMS
2031     // at bottom). We then do some allocation in the region (NTAMS
2032     // stays at bottom), followed by the region being used as a GC
2033     // alloc region (NTAMS will move to top() and the objects
2034     // originally below it will be grayed). All objects now marked in
2035     // the region are explicitly grayed, if below the global finger,
2036     // and we do not need in fact to scan anything else. So, we simply
2037     // set _finger to be limit to ensure that the bitmap iteration
2038     // doesn't do anything.
2039     _finger = limit;
2040   }
2041 
2042   _region_limit = limit;
2043 }
2044 
2045 void G1CMTask::giveup_current_region() {
2046   assert(_curr_region != NULL, "invariant");
2047   clear_region_fields();
2048 }
2049 
2050 void G1CMTask::clear_region_fields() {
2051   // Values for these three fields that indicate that we're not
2052   // holding on to a region.
2053   _curr_region   = NULL;
2054   _finger        = NULL;
2055   _region_limit  = NULL;
2056 }
2057 
2058 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2059   if (cm_oop_closure == NULL) {
2060     assert(_cm_oop_closure != NULL, "invariant");
2061   } else {
2062     assert(_cm_oop_closure == NULL, "invariant");
2063   }
2064   _cm_oop_closure = cm_oop_closure;
2065 }
2066 
2067 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2068   guarantee(next_mark_bitmap != NULL, "invariant");
2069   _next_mark_bitmap              = next_mark_bitmap;
2070   clear_region_fields();
2071 
2072   _calls                         = 0;
2073   _elapsed_time_ms               = 0.0;
2074   _termination_time_ms           = 0.0;
2075   _termination_start_time_ms     = 0.0;
2076 
2077   _mark_stats_cache.reset();
2078 }
2079 
2080 bool G1CMTask::should_exit_termination() {
2081   regular_clock_call();
2082   // This is called when we are in the termination protocol. We should
2083   // quit if, for some reason, this task wants to abort or the global
2084   // stack is not empty (this means that we can get work from it).
2085   return !_cm->mark_stack_empty() || has_aborted();
2086 }
2087 
2088 void G1CMTask::reached_limit() {
2089   assert(_words_scanned >= _words_scanned_limit ||
2090          _refs_reached >= _refs_reached_limit ,
2091          "shouldn't have been called otherwise");
2092   regular_clock_call();
2093 }
2094 
2095 void G1CMTask::regular_clock_call() {
2096   if (has_aborted()) {
2097     return;
2098   }
2099 
2100   // First, we need to recalculate the words scanned and refs reached
2101   // limits for the next clock call.
2102   recalculate_limits();
2103 
2104   // During the regular clock call we do the following
2105 
2106   // (1) If an overflow has been flagged, then we abort.
2107   if (_cm->has_overflown()) {
2108     set_has_aborted();
2109     return;
2110   }
2111 
2112   // If we are not concurrent (i.e. we're doing remark) we don't need
2113   // to check anything else. The other steps are only needed during
2114   // the concurrent marking phase.
2115   if (!_cm->concurrent()) {
2116     return;
2117   }
2118 
2119   // (2) If marking has been aborted for Full GC, then we also abort.
2120   if (_cm->has_aborted()) {
2121     set_has_aborted();
2122     return;
2123   }
2124 
2125   double curr_time_ms = os::elapsedVTime() * 1000.0;
2126 
2127   // (4) We check whether we should yield. If we have to, then we abort.
2128   if (SuspendibleThreadSet::should_yield()) {
2129     // We should yield. To do this we abort the task. The caller is
2130     // responsible for yielding.
2131     set_has_aborted();
2132     return;
2133   }
2134 
2135   // (5) We check whether we've reached our time quota. If we have,
2136   // then we abort.
2137   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2138   if (elapsed_time_ms > _time_target_ms) {
2139     set_has_aborted();
2140     _has_timed_out = true;
2141     return;
2142   }
2143 
2144   // (6) Finally, we check whether there are enough completed STAB
2145   // buffers available for processing. If there are, we abort.
2146   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2147   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2148     // we do need to process SATB buffers, we'll abort and restart
2149     // the marking task to do so
2150     set_has_aborted();
2151     return;
2152   }
2153 }
2154 
2155 void G1CMTask::recalculate_limits() {
2156   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2157   _words_scanned_limit      = _real_words_scanned_limit;
2158 
2159   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2160   _refs_reached_limit       = _real_refs_reached_limit;
2161 }
2162 
2163 void G1CMTask::decrease_limits() {
2164   // This is called when we believe that we're going to do an infrequent
2165   // operation which will increase the per byte scanned cost (i.e. move
2166   // entries to/from the global stack). It basically tries to decrease the
2167   // scanning limit so that the clock is called earlier.
2168 
2169   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2170   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2171 }
2172 
2173 void G1CMTask::move_entries_to_global_stack() {
2174   // Local array where we'll store the entries that will be popped
2175   // from the local queue.
2176   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2177 
2178   size_t n = 0;
2179   G1TaskQueueEntry task_entry;
2180   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2181     buffer[n] = task_entry;
2182     ++n;
2183   }
2184   if (n < G1CMMarkStack::EntriesPerChunk) {
2185     buffer[n] = G1TaskQueueEntry();
2186   }
2187 
2188   if (n > 0) {
2189     if (!_cm->mark_stack_push(buffer)) {
2190       set_has_aborted();
2191     }
2192   }
2193 
2194   // This operation was quite expensive, so decrease the limits.
2195   decrease_limits();
2196 }
2197 
2198 bool G1CMTask::get_entries_from_global_stack() {
2199   // Local array where we'll store the entries that will be popped
2200   // from the global stack.
2201   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2202 
2203   if (!_cm->mark_stack_pop(buffer)) {
2204     return false;
2205   }
2206 
2207   // We did actually pop at least one entry.
2208   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2209     G1TaskQueueEntry task_entry = buffer[i];
2210     if (task_entry.is_null()) {
2211       break;
2212     }
2213     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2214     bool success = _task_queue->push(task_entry);
2215     // We only call this when the local queue is empty or under a
2216     // given target limit. So, we do not expect this push to fail.
2217     assert(success, "invariant");
2218   }
2219 
2220   // This operation was quite expensive, so decrease the limits
2221   decrease_limits();
2222   return true;
2223 }
2224 
2225 void G1CMTask::drain_local_queue(bool partially) {
2226   if (has_aborted()) {
2227     return;
2228   }
2229 
2230   // Decide what the target size is, depending whether we're going to
2231   // drain it partially (so that other tasks can steal if they run out
2232   // of things to do) or totally (at the very end).
2233   size_t target_size;
2234   if (partially) {
2235     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2236   } else {
2237     target_size = 0;
2238   }
2239 
2240   if (_task_queue->size() > target_size) {
2241     G1TaskQueueEntry entry;
2242     bool ret = _task_queue->pop_local(entry);
2243     while (ret) {
2244       scan_task_entry(entry);
2245       if (_task_queue->size() <= target_size || has_aborted()) {
2246         ret = false;
2247       } else {
2248         ret = _task_queue->pop_local(entry);
2249       }
2250     }
2251   }
2252 }
2253 
2254 void G1CMTask::drain_global_stack(bool partially) {
2255   if (has_aborted()) {
2256     return;
2257   }
2258 
2259   // We have a policy to drain the local queue before we attempt to
2260   // drain the global stack.
2261   assert(partially || _task_queue->size() == 0, "invariant");
2262 
2263   // Decide what the target size is, depending whether we're going to
2264   // drain it partially (so that other tasks can steal if they run out
2265   // of things to do) or totally (at the very end).
2266   // Notice that when draining the global mark stack partially, due to the racyness
2267   // of the mark stack size update we might in fact drop below the target. But,
2268   // this is not a problem.
2269   // In case of total draining, we simply process until the global mark stack is
2270   // totally empty, disregarding the size counter.
2271   if (partially) {
2272     size_t const target_size = _cm->partial_mark_stack_size_target();
2273     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2274       if (get_entries_from_global_stack()) {
2275         drain_local_queue(partially);
2276       }
2277     }
2278   } else {
2279     while (!has_aborted() && get_entries_from_global_stack()) {
2280       drain_local_queue(partially);
2281     }
2282   }
2283 }
2284 
2285 // SATB Queue has several assumptions on whether to call the par or
2286 // non-par versions of the methods. this is why some of the code is
2287 // replicated. We should really get rid of the single-threaded version
2288 // of the code to simplify things.
2289 void G1CMTask::drain_satb_buffers() {
2290   if (has_aborted()) {
2291     return;
2292   }
2293 
2294   // We set this so that the regular clock knows that we're in the
2295   // middle of draining buffers and doesn't set the abort flag when it
2296   // notices that SATB buffers are available for draining. It'd be
2297   // very counter productive if it did that. :-)
2298   _draining_satb_buffers = true;
2299 
2300   G1CMSATBBufferClosure satb_cl(this, _g1h);
2301   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2302 
2303   // This keeps claiming and applying the closure to completed buffers
2304   // until we run out of buffers or we need to abort.
2305   while (!has_aborted() &&
2306          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2307     regular_clock_call();
2308   }
2309 
2310   _draining_satb_buffers = false;
2311 
2312   assert(has_aborted() ||
2313          _cm->concurrent() ||
2314          satb_mq_set.completed_buffers_num() == 0, "invariant");
2315 
2316   // again, this was a potentially expensive operation, decrease the
2317   // limits to get the regular clock call early
2318   decrease_limits();
2319 }
2320 
2321 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2322   _mark_stats_cache.reset(region_idx);
2323 }
2324 
2325 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2326   return _mark_stats_cache.evict_all();
2327 }
2328 
2329 void G1CMTask::print_stats() {
2330   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2331   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2332                        _elapsed_time_ms, _termination_time_ms);
2333   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2334                        _step_times_ms.num(),
2335                        _step_times_ms.avg(),
2336                        _step_times_ms.sd(),
2337                        _step_times_ms.maximum(),
2338                        _step_times_ms.sum());
2339   size_t const hits = _mark_stats_cache.hits();
2340   size_t const misses = _mark_stats_cache.misses();
2341   log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2342                        hits, misses, percent_of(hits, hits + misses));
2343 }
2344 
2345 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2346   return _task_queues->steal(worker_id, hash_seed, task_entry);
2347 }
2348 
2349 /*****************************************************************************
2350 
2351     The do_marking_step(time_target_ms, ...) method is the building
2352     block of the parallel marking framework. It can be called in parallel
2353     with other invocations of do_marking_step() on different tasks
2354     (but only one per task, obviously) and concurrently with the
2355     mutator threads, or during remark, hence it eliminates the need
2356     for two versions of the code. When called during remark, it will
2357     pick up from where the task left off during the concurrent marking
2358     phase. Interestingly, tasks are also claimable during evacuation
2359     pauses too, since do_marking_step() ensures that it aborts before
2360     it needs to yield.
2361 
2362     The data structures that it uses to do marking work are the
2363     following:
2364 
2365       (1) Marking Bitmap. If there are gray objects that appear only
2366       on the bitmap (this happens either when dealing with an overflow
2367       or when the initial marking phase has simply marked the roots
2368       and didn't push them on the stack), then tasks claim heap
2369       regions whose bitmap they then scan to find gray objects. A
2370       global finger indicates where the end of the last claimed region
2371       is. A local finger indicates how far into the region a task has
2372       scanned. The two fingers are used to determine how to gray an
2373       object (i.e. whether simply marking it is OK, as it will be
2374       visited by a task in the future, or whether it needs to be also
2375       pushed on a stack).
2376 
2377       (2) Local Queue. The local queue of the task which is accessed
2378       reasonably efficiently by the task. Other tasks can steal from
2379       it when they run out of work. Throughout the marking phase, a
2380       task attempts to keep its local queue short but not totally
2381       empty, so that entries are available for stealing by other
2382       tasks. Only when there is no more work, a task will totally
2383       drain its local queue.
2384 
2385       (3) Global Mark Stack. This handles local queue overflow. During
2386       marking only sets of entries are moved between it and the local
2387       queues, as access to it requires a mutex and more fine-grain
2388       interaction with it which might cause contention. If it
2389       overflows, then the marking phase should restart and iterate
2390       over the bitmap to identify gray objects. Throughout the marking
2391       phase, tasks attempt to keep the global mark stack at a small
2392       length but not totally empty, so that entries are available for
2393       popping by other tasks. Only when there is no more work, tasks
2394       will totally drain the global mark stack.
2395 
2396       (4) SATB Buffer Queue. This is where completed SATB buffers are
2397       made available. Buffers are regularly removed from this queue
2398       and scanned for roots, so that the queue doesn't get too
2399       long. During remark, all completed buffers are processed, as
2400       well as the filled in parts of any uncompleted buffers.
2401 
2402     The do_marking_step() method tries to abort when the time target
2403     has been reached. There are a few other cases when the
2404     do_marking_step() method also aborts:
2405 
2406       (1) When the marking phase has been aborted (after a Full GC).
2407 
2408       (2) When a global overflow (on the global stack) has been
2409       triggered. Before the task aborts, it will actually sync up with
2410       the other tasks to ensure that all the marking data structures
2411       (local queues, stacks, fingers etc.)  are re-initialized so that
2412       when do_marking_step() completes, the marking phase can
2413       immediately restart.
2414 
2415       (3) When enough completed SATB buffers are available. The
2416       do_marking_step() method only tries to drain SATB buffers right
2417       at the beginning. So, if enough buffers are available, the
2418       marking step aborts and the SATB buffers are processed at
2419       the beginning of the next invocation.
2420 
2421       (4) To yield. when we have to yield then we abort and yield
2422       right at the end of do_marking_step(). This saves us from a lot
2423       of hassle as, by yielding we might allow a Full GC. If this
2424       happens then objects will be compacted underneath our feet, the
2425       heap might shrink, etc. We save checking for this by just
2426       aborting and doing the yield right at the end.
2427 
2428     From the above it follows that the do_marking_step() method should
2429     be called in a loop (or, otherwise, regularly) until it completes.
2430 
2431     If a marking step completes without its has_aborted() flag being
2432     true, it means it has completed the current marking phase (and
2433     also all other marking tasks have done so and have all synced up).
2434 
2435     A method called regular_clock_call() is invoked "regularly" (in
2436     sub ms intervals) throughout marking. It is this clock method that
2437     checks all the abort conditions which were mentioned above and
2438     decides when the task should abort. A work-based scheme is used to
2439     trigger this clock method: when the number of object words the
2440     marking phase has scanned or the number of references the marking
2441     phase has visited reach a given limit. Additional invocations to
2442     the method clock have been planted in a few other strategic places
2443     too. The initial reason for the clock method was to avoid calling
2444     vtime too regularly, as it is quite expensive. So, once it was in
2445     place, it was natural to piggy-back all the other conditions on it
2446     too and not constantly check them throughout the code.
2447 
2448     If do_termination is true then do_marking_step will enter its
2449     termination protocol.
2450 
2451     The value of is_serial must be true when do_marking_step is being
2452     called serially (i.e. by the VMThread) and do_marking_step should
2453     skip any synchronization in the termination and overflow code.
2454     Examples include the serial remark code and the serial reference
2455     processing closures.
2456 
2457     The value of is_serial must be false when do_marking_step is
2458     being called by any of the worker threads in a work gang.
2459     Examples include the concurrent marking code (CMMarkingTask),
2460     the MT remark code, and the MT reference processing closures.
2461 
2462  *****************************************************************************/
2463 
2464 void G1CMTask::do_marking_step(double time_target_ms,
2465                                bool do_termination,
2466                                bool is_serial) {
2467   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2468 
2469   _start_time_ms = os::elapsedVTime() * 1000.0;
2470 
2471   // If do_stealing is true then do_marking_step will attempt to
2472   // steal work from the other G1CMTasks. It only makes sense to
2473   // enable stealing when the termination protocol is enabled
2474   // and do_marking_step() is not being called serially.
2475   bool do_stealing = do_termination && !is_serial;
2476 
2477   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2478   _time_target_ms = time_target_ms - diff_prediction_ms;
2479 
2480   // set up the variables that are used in the work-based scheme to
2481   // call the regular clock method
2482   _words_scanned = 0;
2483   _refs_reached  = 0;
2484   recalculate_limits();
2485 
2486   // clear all flags
2487   clear_has_aborted();
2488   _has_timed_out = false;
2489   _draining_satb_buffers = false;
2490 
2491   ++_calls;
2492 
2493   // Set up the bitmap and oop closures. Anything that uses them is
2494   // eventually called from this method, so it is OK to allocate these
2495   // statically.
2496   G1CMBitMapClosure bitmap_closure(this, _cm);
2497   G1CMOopClosure cm_oop_closure(_g1h, this);
2498   set_cm_oop_closure(&cm_oop_closure);
2499 
2500   if (_cm->has_overflown()) {
2501     // This can happen if the mark stack overflows during a GC pause
2502     // and this task, after a yield point, restarts. We have to abort
2503     // as we need to get into the overflow protocol which happens
2504     // right at the end of this task.
2505     set_has_aborted();
2506   }
2507 
2508   // First drain any available SATB buffers. After this, we will not
2509   // look at SATB buffers before the next invocation of this method.
2510   // If enough completed SATB buffers are queued up, the regular clock
2511   // will abort this task so that it restarts.
2512   drain_satb_buffers();
2513   // ...then partially drain the local queue and the global stack
2514   drain_local_queue(true);
2515   drain_global_stack(true);
2516 
2517   do {
2518     if (!has_aborted() && _curr_region != NULL) {
2519       // This means that we're already holding on to a region.
2520       assert(_finger != NULL, "if region is not NULL, then the finger "
2521              "should not be NULL either");
2522 
2523       // We might have restarted this task after an evacuation pause
2524       // which might have evacuated the region we're holding on to
2525       // underneath our feet. Let's read its limit again to make sure
2526       // that we do not iterate over a region of the heap that
2527       // contains garbage (update_region_limit() will also move
2528       // _finger to the start of the region if it is found empty).
2529       update_region_limit();
2530       // We will start from _finger not from the start of the region,
2531       // as we might be restarting this task after aborting half-way
2532       // through scanning this region. In this case, _finger points to
2533       // the address where we last found a marked object. If this is a
2534       // fresh region, _finger points to start().
2535       MemRegion mr = MemRegion(_finger, _region_limit);
2536 
2537       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2538              "humongous regions should go around loop once only");
2539 
2540       // Some special cases:
2541       // If the memory region is empty, we can just give up the region.
2542       // If the current region is humongous then we only need to check
2543       // the bitmap for the bit associated with the start of the object,
2544       // scan the object if it's live, and give up the region.
2545       // Otherwise, let's iterate over the bitmap of the part of the region
2546       // that is left.
2547       // If the iteration is successful, give up the region.
2548       if (mr.is_empty()) {
2549         giveup_current_region();
2550         regular_clock_call();
2551       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2552         if (_next_mark_bitmap->is_marked(mr.start())) {
2553           // The object is marked - apply the closure
2554           bitmap_closure.do_addr(mr.start());
2555         }
2556         // Even if this task aborted while scanning the humongous object
2557         // we can (and should) give up the current region.
2558         giveup_current_region();
2559         regular_clock_call();
2560       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2561         giveup_current_region();
2562         regular_clock_call();
2563       } else {
2564         assert(has_aborted(), "currently the only way to do so");
2565         // The only way to abort the bitmap iteration is to return
2566         // false from the do_bit() method. However, inside the
2567         // do_bit() method we move the _finger to point to the
2568         // object currently being looked at. So, if we bail out, we
2569         // have definitely set _finger to something non-null.
2570         assert(_finger != NULL, "invariant");
2571 
2572         // Region iteration was actually aborted. So now _finger
2573         // points to the address of the object we last scanned. If we
2574         // leave it there, when we restart this task, we will rescan
2575         // the object. It is easy to avoid this. We move the finger by
2576         // enough to point to the next possible object header.
2577         assert(_finger < _region_limit, "invariant");
2578         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2579         // Check if bitmap iteration was aborted while scanning the last object
2580         if (new_finger >= _region_limit) {
2581           giveup_current_region();
2582         } else {
2583           move_finger_to(new_finger);
2584         }
2585       }
2586     }
2587     // At this point we have either completed iterating over the
2588     // region we were holding on to, or we have aborted.
2589 
2590     // We then partially drain the local queue and the global stack.
2591     // (Do we really need this?)
2592     drain_local_queue(true);
2593     drain_global_stack(true);
2594 
2595     // Read the note on the claim_region() method on why it might
2596     // return NULL with potentially more regions available for
2597     // claiming and why we have to check out_of_regions() to determine
2598     // whether we're done or not.
2599     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2600       // We are going to try to claim a new region. We should have
2601       // given up on the previous one.
2602       // Separated the asserts so that we know which one fires.
2603       assert(_curr_region  == NULL, "invariant");
2604       assert(_finger       == NULL, "invariant");
2605       assert(_region_limit == NULL, "invariant");
2606       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2607       if (claimed_region != NULL) {
2608         // Yes, we managed to claim one
2609         setup_for_region(claimed_region);
2610         assert(_curr_region == claimed_region, "invariant");
2611       }
2612       // It is important to call the regular clock here. It might take
2613       // a while to claim a region if, for example, we hit a large
2614       // block of empty regions. So we need to call the regular clock
2615       // method once round the loop to make sure it's called
2616       // frequently enough.
2617       regular_clock_call();
2618     }
2619 
2620     if (!has_aborted() && _curr_region == NULL) {
2621       assert(_cm->out_of_regions(),
2622              "at this point we should be out of regions");
2623     }
2624   } while ( _curr_region != NULL && !has_aborted());
2625 
2626   if (!has_aborted()) {
2627     // We cannot check whether the global stack is empty, since other
2628     // tasks might be pushing objects to it concurrently.
2629     assert(_cm->out_of_regions(),
2630            "at this point we should be out of regions");
2631     // Try to reduce the number of available SATB buffers so that
2632     // remark has less work to do.
2633     drain_satb_buffers();
2634   }
2635 
2636   // Since we've done everything else, we can now totally drain the
2637   // local queue and global stack.
2638   drain_local_queue(false);
2639   drain_global_stack(false);
2640 
2641   // Attempt at work stealing from other task's queues.
2642   if (do_stealing && !has_aborted()) {
2643     // We have not aborted. This means that we have finished all that
2644     // we could. Let's try to do some stealing...
2645 
2646     // We cannot check whether the global stack is empty, since other
2647     // tasks might be pushing objects to it concurrently.
2648     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2649            "only way to reach here");
2650     while (!has_aborted()) {
2651       G1TaskQueueEntry entry;
2652       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2653         scan_task_entry(entry);
2654 
2655         // And since we're towards the end, let's totally drain the
2656         // local queue and global stack.
2657         drain_local_queue(false);
2658         drain_global_stack(false);
2659       } else {
2660         break;
2661       }
2662     }
2663   }
2664 
2665   // We still haven't aborted. Now, let's try to get into the
2666   // termination protocol.
2667   if (do_termination && !has_aborted()) {
2668     // We cannot check whether the global stack is empty, since other
2669     // tasks might be concurrently pushing objects on it.
2670     // Separated the asserts so that we know which one fires.
2671     assert(_cm->out_of_regions(), "only way to reach here");
2672     assert(_task_queue->size() == 0, "only way to reach here");
2673     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2674 
2675     // The G1CMTask class also extends the TerminatorTerminator class,
2676     // hence its should_exit_termination() method will also decide
2677     // whether to exit the termination protocol or not.
2678     bool finished = (is_serial ||
2679                      _cm->terminator()->offer_termination(this));
2680     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2681     _termination_time_ms +=
2682       termination_end_time_ms - _termination_start_time_ms;
2683 
2684     if (finished) {
2685       // We're all done.
2686 
2687       // We can now guarantee that the global stack is empty, since
2688       // all other tasks have finished. We separated the guarantees so
2689       // that, if a condition is false, we can immediately find out
2690       // which one.
2691       guarantee(_cm->out_of_regions(), "only way to reach here");
2692       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2693       guarantee(_task_queue->size() == 0, "only way to reach here");
2694       guarantee(!_cm->has_overflown(), "only way to reach here");
2695     } else {
2696       // Apparently there's more work to do. Let's abort this task. It
2697       // will restart it and we can hopefully find more things to do.
2698       set_has_aborted();
2699     }
2700   }
2701 
2702   // Mainly for debugging purposes to make sure that a pointer to the
2703   // closure which was statically allocated in this frame doesn't
2704   // escape it by accident.
2705   set_cm_oop_closure(NULL);
2706   double end_time_ms = os::elapsedVTime() * 1000.0;
2707   double elapsed_time_ms = end_time_ms - _start_time_ms;
2708   // Update the step history.
2709   _step_times_ms.add(elapsed_time_ms);
2710 
2711   if (has_aborted()) {
2712     // The task was aborted for some reason.
2713     if (_has_timed_out) {
2714       double diff_ms = elapsed_time_ms - _time_target_ms;
2715       // Keep statistics of how well we did with respect to hitting
2716       // our target only if we actually timed out (if we aborted for
2717       // other reasons, then the results might get skewed).
2718       _marking_step_diffs_ms.add(diff_ms);
2719     }
2720 
2721     if (_cm->has_overflown()) {
2722       // This is the interesting one. We aborted because a global
2723       // overflow was raised. This means we have to restart the
2724       // marking phase and start iterating over regions. However, in
2725       // order to do this we have to make sure that all tasks stop
2726       // what they are doing and re-initialize in a safe manner. We
2727       // will achieve this with the use of two barrier sync points.
2728 
2729       if (!is_serial) {
2730         // We only need to enter the sync barrier if being called
2731         // from a parallel context
2732         _cm->enter_first_sync_barrier(_worker_id);
2733 
2734         // When we exit this sync barrier we know that all tasks have
2735         // stopped doing marking work. So, it's now safe to
2736         // re-initialize our data structures.
2737       }
2738 
2739       clear_region_fields();
2740       flush_mark_stats_cache();
2741 
2742       if (!is_serial) {
2743         // If we're executing the concurrent phase of marking, reset the marking
2744         // state; otherwise the marking state is reset after reference processing,
2745         // during the remark pause.
2746         // If we reset here as a result of an overflow during the remark we will
2747         // see assertion failures from any subsequent set_concurrency_and_phase()
2748         // calls.
2749         if (_cm->concurrent() && _worker_id == 0) {
2750           // Worker 0 is responsible for clearing the global data structures because
2751           // of an overflow. During STW we should not clear the overflow flag (in
2752           // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2753           // method to abort the pause and restart concurrent marking.
2754           _cm->reset_marking_for_restart();
2755 
2756           log_info(gc, marking)("Concurrent Mark reset for overflow");
2757         }
2758 
2759         // ...and enter the second barrier.
2760         _cm->enter_second_sync_barrier(_worker_id);
2761       }
2762       // At this point, if we're during the concurrent phase of
2763       // marking, everything has been re-initialized and we're
2764       // ready to restart.
2765     }
2766   }
2767 }
2768 
2769 G1CMTask::G1CMTask(uint worker_id,
2770                    G1ConcurrentMark* cm,
2771                    G1CMTaskQueue* task_queue,
2772                    G1RegionMarkStats* mark_stats,
2773                    uint max_regions) :
2774   _objArray_processor(this),
2775   _worker_id(worker_id),
2776   _g1h(G1CollectedHeap::heap()),
2777   _cm(cm),
2778   _next_mark_bitmap(NULL),
2779   _task_queue(task_queue),
2780   _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2781   _calls(0),
2782   _time_target_ms(0.0),
2783   _start_time_ms(0.0),
2784   _cm_oop_closure(NULL),
2785   _curr_region(NULL),
2786   _finger(NULL),
2787   _region_limit(NULL),
2788   _words_scanned(0),
2789   _words_scanned_limit(0),
2790   _real_words_scanned_limit(0),
2791   _refs_reached(0),
2792   _refs_reached_limit(0),
2793   _real_refs_reached_limit(0),
2794   _hash_seed(17),
2795   _has_aborted(false),
2796   _has_timed_out(false),
2797   _draining_satb_buffers(false),
2798   _step_times_ms(),
2799   _elapsed_time_ms(0.0),
2800   _termination_time_ms(0.0),
2801   _termination_start_time_ms(0.0),
2802   _marking_step_diffs_ms()
2803 {
2804   guarantee(task_queue != NULL, "invariant");
2805 
2806   _marking_step_diffs_ms.add(0.5);
2807 }
2808 
2809 // These are formatting macros that are used below to ensure
2810 // consistent formatting. The *_H_* versions are used to format the
2811 // header for a particular value and they should be kept consistent
2812 // with the corresponding macro. Also note that most of the macros add
2813 // the necessary white space (as a prefix) which makes them a bit
2814 // easier to compose.
2815 
2816 // All the output lines are prefixed with this string to be able to
2817 // identify them easily in a large log file.
2818 #define G1PPRL_LINE_PREFIX            "###"
2819 
2820 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2821 #ifdef _LP64
2822 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2823 #else // _LP64
2824 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2825 #endif // _LP64
2826 
2827 // For per-region info
2828 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2829 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2830 #define G1PPRL_STATE_FORMAT           "   %-5s"
2831 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2832 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2833 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2834 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2835 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2836 
2837 // For summary info
2838 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2839 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2840 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2841 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2842 
2843 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2844   _total_used_bytes(0), _total_capacity_bytes(0),
2845   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2846   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2847 {
2848   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2849   MemRegion g1_reserved = g1h->g1_reserved();
2850   double now = os::elapsedTime();
2851 
2852   // Print the header of the output.
2853   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2854   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2855                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2856                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2857                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2858                           HeapRegion::GrainBytes);
2859   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2860   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2861                           G1PPRL_TYPE_H_FORMAT
2862                           G1PPRL_ADDR_BASE_H_FORMAT
2863                           G1PPRL_BYTE_H_FORMAT
2864                           G1PPRL_BYTE_H_FORMAT
2865                           G1PPRL_BYTE_H_FORMAT
2866                           G1PPRL_DOUBLE_H_FORMAT
2867                           G1PPRL_BYTE_H_FORMAT
2868                           G1PPRL_STATE_H_FORMAT
2869                           G1PPRL_BYTE_H_FORMAT,
2870                           "type", "address-range",
2871                           "used", "prev-live", "next-live", "gc-eff",
2872                           "remset", "state", "code-roots");
2873   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2874                           G1PPRL_TYPE_H_FORMAT
2875                           G1PPRL_ADDR_BASE_H_FORMAT
2876                           G1PPRL_BYTE_H_FORMAT
2877                           G1PPRL_BYTE_H_FORMAT
2878                           G1PPRL_BYTE_H_FORMAT
2879                           G1PPRL_DOUBLE_H_FORMAT
2880                           G1PPRL_BYTE_H_FORMAT
2881                           G1PPRL_STATE_H_FORMAT
2882                           G1PPRL_BYTE_H_FORMAT,
2883                           "", "",
2884                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2885                           "(bytes)", "", "(bytes)");
2886 }
2887 
2888 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2889   const char* type       = r->get_type_str();
2890   HeapWord* bottom       = r->bottom();
2891   HeapWord* end          = r->end();
2892   size_t capacity_bytes  = r->capacity();
2893   size_t used_bytes      = r->used();
2894   size_t prev_live_bytes = r->live_bytes();
2895   size_t next_live_bytes = r->next_live_bytes();
2896   double gc_eff          = r->gc_efficiency();
2897   size_t remset_bytes    = r->rem_set()->mem_size();
2898   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2899   const char* remset_type = r->rem_set()->get_short_state_str();
2900 
2901   _total_used_bytes      += used_bytes;
2902   _total_capacity_bytes  += capacity_bytes;
2903   _total_prev_live_bytes += prev_live_bytes;
2904   _total_next_live_bytes += next_live_bytes;
2905   _total_remset_bytes    += remset_bytes;
2906   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2907 
2908   // Print a line for this particular region.
2909   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2910                           G1PPRL_TYPE_FORMAT
2911                           G1PPRL_ADDR_BASE_FORMAT
2912                           G1PPRL_BYTE_FORMAT
2913                           G1PPRL_BYTE_FORMAT
2914                           G1PPRL_BYTE_FORMAT
2915                           G1PPRL_DOUBLE_FORMAT
2916                           G1PPRL_BYTE_FORMAT
2917                           G1PPRL_STATE_FORMAT
2918                           G1PPRL_BYTE_FORMAT,
2919                           type, p2i(bottom), p2i(end),
2920                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2921                           remset_bytes, remset_type, strong_code_roots_bytes);
2922 
2923   return false;
2924 }
2925 
2926 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2927   // add static memory usages to remembered set sizes
2928   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2929   // Print the footer of the output.
2930   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2931   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2932                          " SUMMARY"
2933                          G1PPRL_SUM_MB_FORMAT("capacity")
2934                          G1PPRL_SUM_MB_PERC_FORMAT("used")
2935                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
2936                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
2937                          G1PPRL_SUM_MB_FORMAT("remset")
2938                          G1PPRL_SUM_MB_FORMAT("code-roots"),
2939                          bytes_to_mb(_total_capacity_bytes),
2940                          bytes_to_mb(_total_used_bytes),
2941                          percent_of(_total_used_bytes, _total_capacity_bytes),
2942                          bytes_to_mb(_total_prev_live_bytes),
2943                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
2944                          bytes_to_mb(_total_next_live_bytes),
2945                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
2946                          bytes_to_mb(_total_remset_bytes),
2947                          bytes_to_mb(_total_strong_code_roots_bytes));
2948 }