1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/shared/adaptiveSizePolicy.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/suspendibleThreadSet.hpp"
  50 #include "gc/shared/taskqueue.inline.hpp"
  51 #include "gc/shared/vmGCOperations.hpp"
  52 #include "gc/shared/weakProcessor.hpp"
  53 #include "logging/log.hpp"
  54 #include "memory/allocation.hpp"
  55 #include "memory/resourceArea.hpp"
  56 #include "oops/oop.inline.hpp"
  57 #include "runtime/atomic.hpp"
  58 #include "runtime/handles.inline.hpp"
  59 #include "runtime/java.hpp"
  60 #include "runtime/prefetch.inline.hpp"
  61 #include "services/memTracker.hpp"
  62 #include "utilities/align.hpp"
  63 #include "utilities/growableArray.hpp"
  64 
  65 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  66   assert(addr < _cm->finger(), "invariant");
  67   assert(addr >= _task->finger(), "invariant");
  68 
  69   // We move that task's local finger along.
  70   _task->move_finger_to(addr);
  71 
  72   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  73   // we only partially drain the local queue and global stack
  74   _task->drain_local_queue(true);
  75   _task->drain_global_stack(true);
  76 
  77   // if the has_aborted flag has been raised, we need to bail out of
  78   // the iteration
  79   return !_task->has_aborted();
  80 }
  81 
  82 G1CMMarkStack::G1CMMarkStack() :
  83   _max_chunk_capacity(0),
  84   _base(NULL),
  85   _chunk_capacity(0) {
  86   set_empty();
  87 }
  88 
  89 bool G1CMMarkStack::resize(size_t new_capacity) {
  90   assert(is_empty(), "Only resize when stack is empty.");
  91   assert(new_capacity <= _max_chunk_capacity,
  92          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  93 
  94   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  95 
  96   if (new_base == NULL) {
  97     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
  98     return false;
  99   }
 100   // Release old mapping.
 101   if (_base != NULL) {
 102     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 103   }
 104 
 105   _base = new_base;
 106   _chunk_capacity = new_capacity;
 107   set_empty();
 108 
 109   return true;
 110 }
 111 
 112 size_t G1CMMarkStack::capacity_alignment() {
 113   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 114 }
 115 
 116 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 117   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 118 
 119   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 120 
 121   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 122   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 123 
 124   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 125             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 126             _max_chunk_capacity,
 127             initial_chunk_capacity);
 128 
 129   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 130                 initial_chunk_capacity, _max_chunk_capacity);
 131 
 132   return resize(initial_chunk_capacity);
 133 }
 134 
 135 void G1CMMarkStack::expand() {
 136   if (_chunk_capacity == _max_chunk_capacity) {
 137     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 138     return;
 139   }
 140   size_t old_capacity = _chunk_capacity;
 141   // Double capacity if possible
 142   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 143 
 144   if (resize(new_capacity)) {
 145     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 146                   old_capacity, new_capacity);
 147   } else {
 148     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 149                     old_capacity, new_capacity);
 150   }
 151 }
 152 
 153 G1CMMarkStack::~G1CMMarkStack() {
 154   if (_base != NULL) {
 155     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 156   }
 157 }
 158 
 159 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 160   elem->next = *list;
 161   *list = elem;
 162 }
 163 
 164 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 165   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 166   add_chunk_to_list(&_chunk_list, elem);
 167   _chunks_in_chunk_list++;
 168 }
 169 
 170 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 171   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 172   add_chunk_to_list(&_free_list, elem);
 173 }
 174 
 175 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 176   TaskQueueEntryChunk* result = *list;
 177   if (result != NULL) {
 178     *list = (*list)->next;
 179   }
 180   return result;
 181 }
 182 
 183 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 184   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 185   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 186   if (result != NULL) {
 187     _chunks_in_chunk_list--;
 188   }
 189   return result;
 190 }
 191 
 192 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 193   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 194   return remove_chunk_from_list(&_free_list);
 195 }
 196 
 197 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 198   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 199   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 200   // wraparound of _hwm.
 201   if (_hwm >= _chunk_capacity) {
 202     return NULL;
 203   }
 204 
 205   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 206   if (cur_idx >= _chunk_capacity) {
 207     return NULL;
 208   }
 209 
 210   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 211   result->next = NULL;
 212   return result;
 213 }
 214 
 215 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 216   // Get a new chunk.
 217   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 218 
 219   if (new_chunk == NULL) {
 220     // Did not get a chunk from the free list. Allocate from backing memory.
 221     new_chunk = allocate_new_chunk();
 222 
 223     if (new_chunk == NULL) {
 224       return false;
 225     }
 226   }
 227 
 228   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 229 
 230   add_chunk_to_chunk_list(new_chunk);
 231 
 232   return true;
 233 }
 234 
 235 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 236   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 237 
 238   if (cur == NULL) {
 239     return false;
 240   }
 241 
 242   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 243 
 244   add_chunk_to_free_list(cur);
 245   return true;
 246 }
 247 
 248 void G1CMMarkStack::set_empty() {
 249   _chunks_in_chunk_list = 0;
 250   _hwm = 0;
 251   _chunk_list = NULL;
 252   _free_list = NULL;
 253 }
 254 
 255 G1CMRootRegions::G1CMRootRegions() :
 256   _cm(NULL), _scan_in_progress(false),
 257   _should_abort(false), _claimed_survivor_index(0) { }
 258 
 259 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 260   _survivors = survivors;
 261   _cm = cm;
 262 }
 263 
 264 void G1CMRootRegions::prepare_for_scan() {
 265   assert(!scan_in_progress(), "pre-condition");
 266 
 267   // Currently, only survivors can be root regions.
 268   _claimed_survivor_index = 0;
 269   _scan_in_progress = _survivors->regions()->is_nonempty();
 270   _should_abort = false;
 271 }
 272 
 273 HeapRegion* G1CMRootRegions::claim_next() {
 274   if (_should_abort) {
 275     // If someone has set the should_abort flag, we return NULL to
 276     // force the caller to bail out of their loop.
 277     return NULL;
 278   }
 279 
 280   // Currently, only survivors can be root regions.
 281   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 282 
 283   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 284   if (claimed_index < survivor_regions->length()) {
 285     return survivor_regions->at(claimed_index);
 286   }
 287   return NULL;
 288 }
 289 
 290 uint G1CMRootRegions::num_root_regions() const {
 291   return (uint)_survivors->regions()->length();
 292 }
 293 
 294 void G1CMRootRegions::notify_scan_done() {
 295   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 296   _scan_in_progress = false;
 297   RootRegionScan_lock->notify_all();
 298 }
 299 
 300 void G1CMRootRegions::cancel_scan() {
 301   notify_scan_done();
 302 }
 303 
 304 void G1CMRootRegions::scan_finished() {
 305   assert(scan_in_progress(), "pre-condition");
 306 
 307   // Currently, only survivors can be root regions.
 308   if (!_should_abort) {
 309     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 310     assert((uint)_claimed_survivor_index >= _survivors->length(),
 311            "we should have claimed all survivors, claimed index = %u, length = %u",
 312            (uint)_claimed_survivor_index, _survivors->length());
 313   }
 314 
 315   notify_scan_done();
 316 }
 317 
 318 bool G1CMRootRegions::wait_until_scan_finished() {
 319   if (!scan_in_progress()) return false;
 320 
 321   {
 322     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 323     while (scan_in_progress()) {
 324       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 325     }
 326   }
 327   return true;
 328 }
 329 
 330 // Returns the maximum number of workers to be used in a concurrent
 331 // phase based on the number of GC workers being used in a STW
 332 // phase.
 333 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 334   return MAX2((num_gc_workers + 2) / 4, 1U);
 335 }
 336 
 337 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 338                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 339                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 340   // _cm_thread set inside the constructor
 341   _g1h(g1h),
 342   _completed_initialization(false),
 343 
 344   _cleanup_list("Concurrent Mark Cleanup List"),
 345   _mark_bitmap_1(),
 346   _mark_bitmap_2(),
 347   _prev_mark_bitmap(&_mark_bitmap_1),
 348   _next_mark_bitmap(&_mark_bitmap_2),
 349 
 350   _heap_start(_g1h->reserved_region().start()),
 351   _heap_end(_g1h->reserved_region().end()),
 352 
 353   _root_regions(),
 354 
 355   _global_mark_stack(),
 356 
 357   // _finger set in set_non_marking_state
 358 
 359   _max_num_tasks(ParallelGCThreads),
 360   // _num_active_tasks set in set_non_marking_state()
 361   // _tasks set inside the constructor
 362 
 363   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 364   _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
 365 
 366   _first_overflow_barrier_sync(),
 367   _second_overflow_barrier_sync(),
 368 
 369   _has_overflown(false),
 370   _concurrent(false),
 371   _has_aborted(false),
 372   _restart_for_overflow(false),
 373   _concurrent_marking_in_progress(false),
 374   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 375   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 376 
 377   // _verbose_level set below
 378 
 379   _init_times(),
 380   _remark_times(),
 381   _remark_mark_times(),
 382   _remark_weak_ref_times(),
 383   _cleanup_times(),
 384   _total_counting_time(0.0),
 385   _total_rs_scrub_time(0.0),
 386 
 387   _accum_task_vtime(NULL),
 388 
 389   _concurrent_workers(NULL),
 390   _num_concurrent_workers(0),
 391   _max_concurrent_workers(0)
 392 {
 393   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 394   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 395 
 396   // Create & start ConcurrentMark thread.
 397   _cm_thread = new ConcurrentMarkThread(this);
 398   if (_cm_thread->osthread() == NULL) {
 399     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 400   }
 401 
 402   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 403 
 404   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 405   satb_qs.set_buffer_size(G1SATBBufferSize);
 406 
 407   _root_regions.init(_g1h->survivor(), this);
 408 
 409   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 410     // Calculate the number of concurrent worker threads by scaling
 411     // the number of parallel GC threads.
 412     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 413     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 414   }
 415 
 416   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 417   if (ConcGCThreads > ParallelGCThreads) {
 418     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 419                     ConcGCThreads, ParallelGCThreads);
 420     return;
 421   }
 422 
 423   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 424   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 425 
 426   _num_concurrent_workers = ConcGCThreads;
 427   _max_concurrent_workers = _num_concurrent_workers;
 428 
 429   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 430   _concurrent_workers->initialize_workers();
 431 
 432   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 433     size_t mark_stack_size =
 434       MIN2(MarkStackSizeMax,
 435           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 436     // Verify that the calculated value for MarkStackSize is in range.
 437     // It would be nice to use the private utility routine from Arguments.
 438     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 439       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 440                       "must be between 1 and " SIZE_FORMAT,
 441                       mark_stack_size, MarkStackSizeMax);
 442       return;
 443     }
 444     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 445   } else {
 446     // Verify MarkStackSize is in range.
 447     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 448       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 449         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 450           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 451                           "must be between 1 and " SIZE_FORMAT,
 452                           MarkStackSize, MarkStackSizeMax);
 453           return;
 454         }
 455       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 456         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 457           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 458                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 459                           MarkStackSize, MarkStackSizeMax);
 460           return;
 461         }
 462       }
 463     }
 464   }
 465 
 466   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 467     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 468   }
 469 
 470   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 471   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 472 
 473   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 474   _num_active_tasks = _max_num_tasks;
 475 
 476   for (uint i = 0; i < _max_num_tasks; ++i) {
 477     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 478     task_queue->initialize();
 479     _task_queues->register_queue(i, task_queue);
 480 
 481     _tasks[i] = new G1CMTask(i, this, task_queue);
 482 
 483     _accum_task_vtime[i] = 0.0;
 484   }
 485 
 486   set_non_marking_state();
 487   _completed_initialization = true;
 488 }
 489 
 490 void G1ConcurrentMark::reset() {
 491   // Starting values for these two. This should be called in a STW
 492   // phase.
 493   MemRegion reserved = _g1h->g1_reserved();
 494   _heap_start = reserved.start();
 495   _heap_end   = reserved.end();
 496 
 497   // Separated the asserts so that we know which one fires.
 498   assert(_heap_start != NULL, "heap bounds should look ok");
 499   assert(_heap_end != NULL, "heap bounds should look ok");
 500   assert(_heap_start < _heap_end, "heap bounds should look ok");
 501 
 502   // Reset all the marking data structures and any necessary flags
 503   reset_marking_state();
 504 
 505   // We reset all of them, since different phases will use
 506   // different number of active threads. So, it's easiest to have all
 507   // of them ready.
 508   for (uint i = 0; i < _max_num_tasks; ++i) {
 509     _tasks[i]->reset(_next_mark_bitmap);
 510   }
 511 
 512   // we need this to make sure that the flag is on during the evac
 513   // pause with initial mark piggy-backed
 514   set_concurrent_marking_in_progress();
 515 }
 516 
 517 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 518   assert(SafepointSynchronize::is_at_safepoint(), "May only be called at a safepoint.");
 519 
 520   // Need to clear mark bit of the humongous object if already set and during a marking cycle.
 521   if (_next_mark_bitmap->is_marked(r->bottom())) {
 522     _next_mark_bitmap->clear(r->bottom());
 523   }
 524 }
 525 
 526 void G1ConcurrentMark::reset_marking_state() {
 527   _global_mark_stack.set_empty();
 528 
 529   // Expand the marking stack, if we have to and if we can.
 530   if (has_overflown()) {
 531     _global_mark_stack.expand();
 532   }
 533 
 534   clear_has_overflown();
 535   _finger = _heap_start;
 536 
 537   for (uint i = 0; i < _max_num_tasks; ++i) {
 538     G1CMTaskQueue* queue = _task_queues->queue(i);
 539     queue->set_empty();
 540   }
 541 }
 542 
 543 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 544   assert(active_tasks <= _max_num_tasks, "we should not have more");
 545 
 546   _num_active_tasks = active_tasks;
 547   // Need to update the three data structures below according to the
 548   // number of active threads for this phase.
 549   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 550   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 551   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 552 }
 553 
 554 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 555   set_concurrency(active_tasks);
 556 
 557   _concurrent = concurrent;
 558   // We propagate this to all tasks, not just the active ones.
 559   for (uint i = 0; i < _max_num_tasks; ++i) {
 560     _tasks[i]->set_concurrent(concurrent);
 561   }
 562 
 563   if (concurrent) {
 564     set_concurrent_marking_in_progress();
 565   } else {
 566     // We currently assume that the concurrent flag has been set to
 567     // false before we start remark. At this point we should also be
 568     // in a STW phase.
 569     assert(!concurrent_marking_in_progress(), "invariant");
 570     assert(out_of_regions(),
 571            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 572            p2i(_finger), p2i(_heap_end));
 573   }
 574 }
 575 
 576 void G1ConcurrentMark::set_non_marking_state() {
 577   // We set the global marking state to some default values when we're
 578   // not doing marking.
 579   reset_marking_state();
 580   _num_active_tasks = 0;
 581   clear_concurrent_marking_in_progress();
 582 }
 583 
 584 G1ConcurrentMark::~G1ConcurrentMark() {
 585   // The G1ConcurrentMark instance is never freed.
 586   ShouldNotReachHere();
 587 }
 588 
 589 class G1ClearBitMapTask : public AbstractGangTask {
 590 public:
 591   static size_t chunk_size() { return M; }
 592 
 593 private:
 594   // Heap region closure used for clearing the given mark bitmap.
 595   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 596   private:
 597     G1CMBitMap* _bitmap;
 598     G1ConcurrentMark* _cm;
 599   public:
 600     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 601     }
 602 
 603     virtual bool do_heap_region(HeapRegion* r) {
 604       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 605 
 606       HeapWord* cur = r->bottom();
 607       HeapWord* const end = r->end();
 608 
 609       while (cur < end) {
 610         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 611         _bitmap->clear_range(mr);
 612 
 613         cur += chunk_size_in_words;
 614 
 615         // Abort iteration if after yielding the marking has been aborted.
 616         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 617           return true;
 618         }
 619         // Repeat the asserts from before the start of the closure. We will do them
 620         // as asserts here to minimize their overhead on the product. However, we
 621         // will have them as guarantees at the beginning / end of the bitmap
 622         // clearing to get some checking in the product.
 623         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 624         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 625       }
 626       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 627 
 628       return false;
 629     }
 630   };
 631 
 632   G1ClearBitmapHRClosure _cl;
 633   HeapRegionClaimer _hr_claimer;
 634   bool _suspendible; // If the task is suspendible, workers must join the STS.
 635 
 636 public:
 637   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 638     AbstractGangTask("G1 Clear Bitmap"),
 639     _cl(bitmap, suspendible ? cm : NULL),
 640     _hr_claimer(n_workers),
 641     _suspendible(suspendible)
 642   { }
 643 
 644   void work(uint worker_id) {
 645     SuspendibleThreadSetJoiner sts_join(_suspendible);
 646     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 647   }
 648 
 649   bool is_complete() {
 650     return _cl.is_complete();
 651   }
 652 };
 653 
 654 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 655   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 656 
 657   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 658   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 659 
 660   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 661 
 662   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 663 
 664   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 665   workers->run_task(&cl, num_workers);
 666   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 667 }
 668 
 669 void G1ConcurrentMark::cleanup_for_next_mark() {
 670   // Make sure that the concurrent mark thread looks to still be in
 671   // the current cycle.
 672   guarantee(cm_thread()->during_cycle(), "invariant");
 673 
 674   // We are finishing up the current cycle by clearing the next
 675   // marking bitmap and getting it ready for the next cycle. During
 676   // this time no other cycle can start. So, let's make sure that this
 677   // is the case.
 678   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 679 
 680   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 681 
 682   // Clear the live count data. If the marking has been aborted, the abort()
 683   // call already did that.
 684   if (!has_aborted()) {
 685     clear_live_data(_concurrent_workers);
 686     DEBUG_ONLY(verify_live_data_clear());
 687   }
 688 
 689   // Repeat the asserts from above.
 690   guarantee(cm_thread()->during_cycle(), "invariant");
 691   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 692 }
 693 
 694 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 695   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 696   clear_bitmap(_prev_mark_bitmap, workers, false);
 697 }
 698 
 699 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 700   G1CMBitMap* _bitmap;
 701   bool _error;
 702  public:
 703   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 704   }
 705 
 706   virtual bool do_heap_region(HeapRegion* r) {
 707     // This closure can be called concurrently to the mutator, so we must make sure
 708     // that the result of the getNextMarkedWordAddress() call is compared to the
 709     // value passed to it as limit to detect any found bits.
 710     // end never changes in G1.
 711     HeapWord* end = r->end();
 712     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 713   }
 714 };
 715 
 716 bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
 717   CheckBitmapClearHRClosure cl(_next_mark_bitmap);
 718   _g1h->heap_region_iterate(&cl);
 719   return cl.is_complete();
 720 }
 721 
 722 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 723 public:
 724   bool do_heap_region(HeapRegion* r) {
 725     r->note_start_of_marking();
 726     return false;
 727   }
 728 };
 729 
 730 void G1ConcurrentMark::checkpoint_roots_initial_pre() {
 731   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 732 
 733   _has_aborted = false;
 734 
 735   // Initialize marking structures. This has to be done in a STW phase.
 736   reset();
 737 
 738   // For each region note start of marking.
 739   NoteStartOfMarkHRClosure startcl;
 740   g1h->heap_region_iterate(&startcl);
 741 }
 742 
 743 
 744 void G1ConcurrentMark::checkpoint_roots_initial_post() {
 745   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 746 
 747   // Start Concurrent Marking weak-reference discovery.
 748   ReferenceProcessor* rp = g1h->ref_processor_cm();
 749   // enable ("weak") refs discovery
 750   rp->enable_discovery();
 751   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 752 
 753   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 754   // This is the start of  the marking cycle, we're expected all
 755   // threads to have SATB queues with active set to false.
 756   satb_mq_set.set_active_all_threads(true, /* new active value */
 757                                      false /* expected_active */);
 758 
 759   _root_regions.prepare_for_scan();
 760 
 761   // update_g1_committed() will be called at the end of an evac pause
 762   // when marking is on. So, it's also called at the end of the
 763   // initial-mark pause to update the heap end, if the heap expands
 764   // during it. No need to call it here.
 765 }
 766 
 767 /*
 768  * Notice that in the next two methods, we actually leave the STS
 769  * during the barrier sync and join it immediately afterwards. If we
 770  * do not do this, the following deadlock can occur: one thread could
 771  * be in the barrier sync code, waiting for the other thread to also
 772  * sync up, whereas another one could be trying to yield, while also
 773  * waiting for the other threads to sync up too.
 774  *
 775  * Note, however, that this code is also used during remark and in
 776  * this case we should not attempt to leave / enter the STS, otherwise
 777  * we'll either hit an assert (debug / fastdebug) or deadlock
 778  * (product). So we should only leave / enter the STS if we are
 779  * operating concurrently.
 780  *
 781  * Because the thread that does the sync barrier has left the STS, it
 782  * is possible to be suspended for a Full GC or an evacuation pause
 783  * could occur. This is actually safe, since the entering the sync
 784  * barrier is one of the last things do_marking_step() does, and it
 785  * doesn't manipulate any data structures afterwards.
 786  */
 787 
 788 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 789   bool barrier_aborted;
 790   {
 791     SuspendibleThreadSetLeaver sts_leave(concurrent());
 792     barrier_aborted = !_first_overflow_barrier_sync.enter();
 793   }
 794 
 795   // at this point everyone should have synced up and not be doing any
 796   // more work
 797 
 798   if (barrier_aborted) {
 799     // If the barrier aborted we ignore the overflow condition and
 800     // just abort the whole marking phase as quickly as possible.
 801     return;
 802   }
 803 
 804   // If we're executing the concurrent phase of marking, reset the marking
 805   // state; otherwise the marking state is reset after reference processing,
 806   // during the remark pause.
 807   // If we reset here as a result of an overflow during the remark we will
 808   // see assertion failures from any subsequent set_concurrency_and_phase()
 809   // calls.
 810   if (concurrent()) {
 811     // let the task associated with with worker 0 do this
 812     if (worker_id == 0) {
 813       // task 0 is responsible for clearing the global data structures
 814       // We should be here because of an overflow. During STW we should
 815       // not clear the overflow flag since we rely on it being true when
 816       // we exit this method to abort the pause and restart concurrent
 817       // marking.
 818       reset_marking_state();
 819 
 820       log_info(gc, marking)("Concurrent Mark reset for overflow");
 821     }
 822   }
 823 
 824   // after this, each task should reset its own data structures then
 825   // then go into the second barrier
 826 }
 827 
 828 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 829   SuspendibleThreadSetLeaver sts_leave(concurrent());
 830   _second_overflow_barrier_sync.enter();
 831 
 832   // at this point everything should be re-initialized and ready to go
 833 }
 834 
 835 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 836 private:
 837   G1ConcurrentMark*     _cm;
 838   ConcurrentMarkThread* _cmt;
 839 
 840 public:
 841   void work(uint worker_id) {
 842     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 843     ResourceMark rm;
 844 
 845     double start_vtime = os::elapsedVTime();
 846 
 847     {
 848       SuspendibleThreadSetJoiner sts_join;
 849 
 850       assert(worker_id < _cm->active_tasks(), "invariant");
 851 
 852       G1CMTask* task = _cm->task(worker_id);
 853       task->record_start_time();
 854       if (!_cm->has_aborted()) {
 855         do {
 856           task->do_marking_step(G1ConcMarkStepDurationMillis,
 857                                 true  /* do_termination */,
 858                                 false /* is_serial*/);
 859 
 860           _cm->do_yield_check();
 861         } while (!_cm->has_aborted() && task->has_aborted());
 862       }
 863       task->record_end_time();
 864       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 865     }
 866 
 867     double end_vtime = os::elapsedVTime();
 868     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 869   }
 870 
 871   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 872                             ConcurrentMarkThread* cmt) :
 873       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 874 
 875   ~G1CMConcurrentMarkingTask() { }
 876 };
 877 
 878 uint G1ConcurrentMark::calc_active_marking_workers() {
 879   uint result = 0;
 880   if (!UseDynamicNumberOfGCThreads ||
 881       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 882        !ForceDynamicNumberOfGCThreads)) {
 883     result = _max_concurrent_workers;
 884   } else {
 885     result =
 886       AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
 887                                                       1, /* Minimum workers */
 888                                                       _num_concurrent_workers,
 889                                                       Threads::number_of_non_daemon_threads());
 890     // Don't scale the result down by scale_concurrent_workers() because
 891     // that scaling has already gone into "_max_concurrent_workers".
 892   }
 893   assert(result > 0 && result <= _max_concurrent_workers,
 894          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 895          _max_concurrent_workers, result);
 896   return result;
 897 }
 898 
 899 void G1ConcurrentMark::scan_root_region(HeapRegion* hr) {
 900   // Currently, only survivors can be root regions.
 901   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 902   G1RootRegionScanClosure cl(_g1h, this);
 903 
 904   const uintx interval = PrefetchScanIntervalInBytes;
 905   HeapWord* curr = hr->bottom();
 906   const HeapWord* end = hr->top();
 907   while (curr < end) {
 908     Prefetch::read(curr, interval);
 909     oop obj = oop(curr);
 910     int size = obj->oop_iterate_size(&cl);
 911     assert(size == obj->size(), "sanity");
 912     curr += size;
 913   }
 914 }
 915 
 916 class G1CMRootRegionScanTask : public AbstractGangTask {
 917 private:
 918   G1ConcurrentMark* _cm;
 919 
 920 public:
 921   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 922     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 923 
 924   void work(uint worker_id) {
 925     assert(Thread::current()->is_ConcurrentGC_thread(),
 926            "this should only be done by a conc GC thread");
 927 
 928     G1CMRootRegions* root_regions = _cm->root_regions();
 929     HeapRegion* hr = root_regions->claim_next();
 930     while (hr != NULL) {
 931       _cm->scan_root_region(hr);
 932       hr = root_regions->claim_next();
 933     }
 934   }
 935 };
 936 
 937 void G1ConcurrentMark::scan_root_regions() {
 938   // scan_in_progress() will have been set to true only if there was
 939   // at least one root region to scan. So, if it's false, we
 940   // should not attempt to do any further work.
 941   if (root_regions()->scan_in_progress()) {
 942     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 943 
 944     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 945                                    // We distribute work on a per-region basis, so starting
 946                                    // more threads than that is useless.
 947                                    root_regions()->num_root_regions());
 948     assert(_num_concurrent_workers <= _max_concurrent_workers,
 949            "Maximum number of marking threads exceeded");
 950 
 951     G1CMRootRegionScanTask task(this);
 952     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 953                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 954     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 955 
 956     // It's possible that has_aborted() is true here without actually
 957     // aborting the survivor scan earlier. This is OK as it's
 958     // mainly used for sanity checking.
 959     root_regions()->scan_finished();
 960   }
 961 }
 962 
 963 void G1ConcurrentMark::concurrent_cycle_start() {
 964   _gc_timer_cm->register_gc_start();
 965 
 966   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 967 
 968   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 969 }
 970 
 971 void G1ConcurrentMark::concurrent_cycle_end() {
 972   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 973 
 974   if (has_aborted()) {
 975     _gc_tracer_cm->report_concurrent_mode_failure();
 976   }
 977 
 978   _gc_timer_cm->register_gc_end();
 979 
 980   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 981 }
 982 
 983 void G1ConcurrentMark::mark_from_roots() {
 984   // we might be tempted to assert that:
 985   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
 986   //        "inconsistent argument?");
 987   // However that wouldn't be right, because it's possible that
 988   // a safepoint is indeed in progress as a younger generation
 989   // stop-the-world GC happens even as we mark in this generation.
 990 
 991   _restart_for_overflow = false;
 992 
 993   _num_concurrent_workers = calc_active_marking_workers();
 994 
 995   uint active_workers = MAX2(1U, _num_concurrent_workers);
 996 
 997   // Setting active workers is not guaranteed since fewer
 998   // worker threads may currently exist and more may not be
 999   // available.
1000   active_workers = _concurrent_workers->update_active_workers(active_workers);
1001   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
1002 
1003   // Parallel task terminator is set in "set_concurrency_and_phase()"
1004   set_concurrency_and_phase(active_workers, true /* concurrent */);
1005 
1006   G1CMConcurrentMarkingTask marking_task(this, cm_thread());
1007   _concurrent_workers->run_task(&marking_task);
1008   print_stats();
1009 }
1010 
1011 void G1ConcurrentMark::checkpoint_roots_final(bool clear_all_soft_refs) {
1012   // world is stopped at this checkpoint
1013   assert(SafepointSynchronize::is_at_safepoint(),
1014          "world should be stopped");
1015 
1016   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1017 
1018   // If a full collection has happened, we shouldn't do this.
1019   if (has_aborted()) {
1020     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1021     return;
1022   }
1023 
1024   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1025 
1026   if (VerifyDuringGC) {
1027     g1h->verifier()->verify(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "During GC (before)");
1028   }
1029   g1h->verifier()->check_bitmaps("Remark Start");
1030 
1031   G1Policy* g1p = g1h->g1_policy();
1032   g1p->record_concurrent_mark_remark_start();
1033 
1034   double start = os::elapsedTime();
1035 
1036   checkpoint_roots_final_work();
1037 
1038   double mark_work_end = os::elapsedTime();
1039 
1040   weak_refs_work(clear_all_soft_refs);
1041 
1042   if (has_overflown()) {
1043     // We overflowed.  Restart concurrent marking.
1044     _restart_for_overflow = true;
1045 
1046     // Verify the heap w.r.t. the previous marking bitmap.
1047     if (VerifyDuringGC) {
1048       g1h->verifier()->verify(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1049     }
1050 
1051     // Clear the marking state because we will be restarting
1052     // marking due to overflowing the global mark stack.
1053     reset_marking_state();
1054   } else {
1055     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1056     // We're done with marking.
1057     // This is the end of  the marking cycle, we're expected all
1058     // threads to have SATB queues with active set to true.
1059     satb_mq_set.set_active_all_threads(false, /* new active value */
1060                                        true /* expected_active */);
1061 
1062     if (VerifyDuringGC) {
1063       g1h->verifier()->verify(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UseNextMarking, "During GC (after)");
1064     }
1065     g1h->verifier()->check_bitmaps("Remark End");
1066     assert(!restart_for_overflow(), "sanity");
1067     // Completely reset the marking state since marking completed
1068     set_non_marking_state();
1069   }
1070 
1071   // Statistics
1072   double now = os::elapsedTime();
1073   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1074   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1075   _remark_times.add((now - start) * 1000.0);
1076 
1077   g1p->record_concurrent_mark_remark_end();
1078 
1079   G1CMIsAliveClosure is_alive(g1h);
1080   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1081 }
1082 
1083 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1084   G1CollectedHeap* _g1;
1085   size_t _freed_bytes;
1086   FreeRegionList* _local_cleanup_list;
1087   uint _old_regions_removed;
1088   uint _humongous_regions_removed;
1089   HRRSCleanupTask* _hrrs_cleanup_task;
1090 
1091 public:
1092   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1093                              FreeRegionList* local_cleanup_list,
1094                              HRRSCleanupTask* hrrs_cleanup_task) :
1095     _g1(g1),
1096     _freed_bytes(0),
1097     _local_cleanup_list(local_cleanup_list),
1098     _old_regions_removed(0),
1099     _humongous_regions_removed(0),
1100     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1101 
1102   size_t freed_bytes() { return _freed_bytes; }
1103   const uint old_regions_removed() { return _old_regions_removed; }
1104   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1105 
1106   bool do_heap_region(HeapRegion *hr) {
1107     _g1->reset_gc_time_stamps(hr);
1108     hr->note_end_of_marking();
1109 
1110     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1111       _freed_bytes += hr->used();
1112       hr->set_containing_set(NULL);
1113       if (hr->is_humongous()) {
1114         _humongous_regions_removed++;
1115         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1116       } else {
1117         _old_regions_removed++;
1118         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1119       }
1120     } else {
1121       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1122     }
1123 
1124     return false;
1125   }
1126 };
1127 
1128 class G1ParNoteEndTask: public AbstractGangTask {
1129   friend class G1NoteEndOfConcMarkClosure;
1130 
1131 protected:
1132   G1CollectedHeap* _g1h;
1133   FreeRegionList* _cleanup_list;
1134   HeapRegionClaimer _hrclaimer;
1135 
1136 public:
1137   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1138       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1139   }
1140 
1141   void work(uint worker_id) {
1142     FreeRegionList local_cleanup_list("Local Cleanup List");
1143     HRRSCleanupTask hrrs_cleanup_task;
1144     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1145                                            &hrrs_cleanup_task);
1146     _g1h->heap_region_par_iterate_from_worker_offset(&g1_note_end, &_hrclaimer, worker_id);
1147     assert(g1_note_end.is_complete(), "Shouldn't have yielded!");
1148 
1149     // Now update the lists
1150     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1151     {
1152       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1153       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1154 
1155       // If we iterate over the global cleanup list at the end of
1156       // cleanup to do this printing we will not guarantee to only
1157       // generate output for the newly-reclaimed regions (the list
1158       // might not be empty at the beginning of cleanup; we might
1159       // still be working on its previous contents). So we do the
1160       // printing here, before we append the new regions to the global
1161       // cleanup list.
1162 
1163       G1HRPrinter* hr_printer = _g1h->hr_printer();
1164       if (hr_printer->is_active()) {
1165         FreeRegionListIterator iter(&local_cleanup_list);
1166         while (iter.more_available()) {
1167           HeapRegion* hr = iter.get_next();
1168           hr_printer->cleanup(hr);
1169         }
1170       }
1171 
1172       _cleanup_list->add_ordered(&local_cleanup_list);
1173       assert(local_cleanup_list.is_empty(), "post-condition");
1174 
1175       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1176     }
1177   }
1178 };
1179 
1180 void G1ConcurrentMark::cleanup() {
1181   // world is stopped at this checkpoint
1182   assert(SafepointSynchronize::is_at_safepoint(),
1183          "world should be stopped");
1184   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1185 
1186   // If a full collection has happened, we shouldn't do this.
1187   if (has_aborted()) {
1188     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1189     return;
1190   }
1191 
1192   g1h->verifier()->verify_region_sets_optional();
1193 
1194   if (VerifyDuringGC) {
1195     g1h->verifier()->verify(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "During GC (before)");
1196   }
1197   g1h->verifier()->check_bitmaps("Cleanup Start");
1198 
1199   G1Policy* g1p = g1h->g1_policy();
1200   g1p->record_concurrent_mark_cleanup_start();
1201 
1202   double start = os::elapsedTime();
1203 
1204   HeapRegionRemSet::reset_for_cleanup_tasks();
1205 
1206   {
1207     GCTraceTime(Debug, gc)("Finalize Live Data");
1208     finalize_live_data();
1209   }
1210 
1211   if (VerifyDuringGC) {
1212     GCTraceTime(Debug, gc)("Verify Live Data");
1213     verify_live_data();
1214   }
1215 
1216   g1h->collector_state()->set_mark_in_progress(false);
1217 
1218   double count_end = os::elapsedTime();
1219   double this_final_counting_time = (count_end - start);
1220   _total_counting_time += this_final_counting_time;
1221 
1222   if (log_is_enabled(Trace, gc, liveness)) {
1223     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1224     _g1h->heap_region_iterate(&cl);
1225   }
1226 
1227   // Install newly created mark bitMap as "prev".
1228   swap_mark_bitmaps();
1229 
1230   g1h->reset_gc_time_stamp();
1231 
1232   uint n_workers = _g1h->workers()->active_workers();
1233 
1234   // Note end of marking in all heap regions.
1235   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1236   g1h->workers()->run_task(&g1_par_note_end_task);
1237   g1h->check_gc_time_stamps();
1238 
1239   if (!cleanup_list_is_empty()) {
1240     // The cleanup list is not empty, so we'll have to process it
1241     // concurrently. Notify anyone else that might be wanting free
1242     // regions that there will be more free regions coming soon.
1243     g1h->set_free_regions_coming();
1244   }
1245 
1246   // call below, since it affects the metric by which we sort the heap
1247   // regions.
1248   if (G1ScrubRemSets) {
1249     double rs_scrub_start = os::elapsedTime();
1250     g1h->scrub_rem_set();
1251     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1252   }
1253 
1254   // this will also free any regions totally full of garbage objects,
1255   // and sort the regions.
1256   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1257 
1258   // Statistics.
1259   double end = os::elapsedTime();
1260   _cleanup_times.add((end - start) * 1000.0);
1261 
1262   // Clean up will have freed any regions completely full of garbage.
1263   // Update the soft reference policy with the new heap occupancy.
1264   Universe::update_heap_info_at_gc();
1265 
1266   if (VerifyDuringGC) {
1267     g1h->verifier()->verify(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "During GC (after)");
1268   }
1269 
1270   g1h->verifier()->check_bitmaps("Cleanup End");
1271 
1272   g1h->verifier()->verify_region_sets_optional();
1273 
1274   // We need to make this be a "collection" so any collection pause that
1275   // races with it goes around and waits for completeCleanup to finish.
1276   g1h->increment_total_collections();
1277 
1278   // Clean out dead classes and update Metaspace sizes.
1279   if (ClassUnloadingWithConcurrentMark) {
1280     ClassLoaderDataGraph::purge();
1281   }
1282   MetaspaceGC::compute_new_size();
1283 
1284   // We reclaimed old regions so we should calculate the sizes to make
1285   // sure we update the old gen/space data.
1286   g1h->g1mm()->update_sizes();
1287 }
1288 
1289 void G1ConcurrentMark::complete_cleanup() {
1290   if (has_aborted()) return;
1291 
1292   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1293 
1294   _cleanup_list.verify_optional();
1295   FreeRegionList tmp_free_list("Tmp Free List");
1296 
1297   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1298                                   "cleanup list has %u entries",
1299                                   _cleanup_list.length());
1300 
1301   // No one else should be accessing the _cleanup_list at this point,
1302   // so it is not necessary to take any locks
1303   while (!_cleanup_list.is_empty()) {
1304     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1305     assert(hr != NULL, "Got NULL from a non-empty list");
1306     hr->par_clear();
1307     tmp_free_list.add_ordered(hr);
1308 
1309     // Instead of adding one region at a time to the secondary_free_list,
1310     // we accumulate them in the local list and move them a few at a
1311     // time. This also cuts down on the number of notify_all() calls
1312     // we do during this process. We'll also append the local list when
1313     // _cleanup_list is empty (which means we just removed the last
1314     // region from the _cleanup_list).
1315     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1316         _cleanup_list.is_empty()) {
1317       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1318                                       "appending %u entries to the secondary_free_list, "
1319                                       "cleanup list still has %u entries",
1320                                       tmp_free_list.length(),
1321                                       _cleanup_list.length());
1322 
1323       {
1324         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1325         g1h->secondary_free_list_add(&tmp_free_list);
1326         SecondaryFreeList_lock->notify_all();
1327       }
1328 #ifndef PRODUCT
1329       if (G1StressConcRegionFreeing) {
1330         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1331           os::sleep(Thread::current(), (jlong) 1, false);
1332         }
1333       }
1334 #endif
1335     }
1336   }
1337   assert(tmp_free_list.is_empty(), "post-condition");
1338 }
1339 
1340 // Supporting Object and Oop closures for reference discovery
1341 // and processing in during marking
1342 
1343 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1344   HeapWord* addr = (HeapWord*)obj;
1345   return addr != NULL &&
1346          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1347 }
1348 
1349 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1350 // Uses the G1CMTask associated with a worker thread (for serial reference
1351 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1352 // trace referent objects.
1353 //
1354 // Using the G1CMTask and embedded local queues avoids having the worker
1355 // threads operating on the global mark stack. This reduces the risk
1356 // of overflowing the stack - which we would rather avoid at this late
1357 // state. Also using the tasks' local queues removes the potential
1358 // of the workers interfering with each other that could occur if
1359 // operating on the global stack.
1360 
1361 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1362   G1ConcurrentMark* _cm;
1363   G1CMTask*         _task;
1364   int               _ref_counter_limit;
1365   int               _ref_counter;
1366   bool              _is_serial;
1367  public:
1368   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1369     _cm(cm), _task(task), _is_serial(is_serial),
1370     _ref_counter_limit(G1RefProcDrainInterval) {
1371     assert(_ref_counter_limit > 0, "sanity");
1372     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1373     _ref_counter = _ref_counter_limit;
1374   }
1375 
1376   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1377   virtual void do_oop(      oop* p) { do_oop_work(p); }
1378 
1379   template <class T> void do_oop_work(T* p) {
1380     if (!_cm->has_overflown()) {
1381       oop obj = oopDesc::load_decode_heap_oop(p);
1382       _task->deal_with_reference(obj);
1383       _ref_counter--;
1384 
1385       if (_ref_counter == 0) {
1386         // We have dealt with _ref_counter_limit references, pushing them
1387         // and objects reachable from them on to the local stack (and
1388         // possibly the global stack). Call G1CMTask::do_marking_step() to
1389         // process these entries.
1390         //
1391         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1392         // there's nothing more to do (i.e. we're done with the entries that
1393         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1394         // above) or we overflow.
1395         //
1396         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1397         // flag while there may still be some work to do. (See the comment at
1398         // the beginning of G1CMTask::do_marking_step() for those conditions -
1399         // one of which is reaching the specified time target.) It is only
1400         // when G1CMTask::do_marking_step() returns without setting the
1401         // has_aborted() flag that the marking step has completed.
1402         do {
1403           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1404           _task->do_marking_step(mark_step_duration_ms,
1405                                  false      /* do_termination */,
1406                                  _is_serial);
1407         } while (_task->has_aborted() && !_cm->has_overflown());
1408         _ref_counter = _ref_counter_limit;
1409       }
1410     }
1411   }
1412 };
1413 
1414 // 'Drain' oop closure used by both serial and parallel reference processing.
1415 // Uses the G1CMTask associated with a given worker thread (for serial
1416 // reference processing the G1CMtask for worker 0 is used). Calls the
1417 // do_marking_step routine, with an unbelievably large timeout value,
1418 // to drain the marking data structures of the remaining entries
1419 // added by the 'keep alive' oop closure above.
1420 
1421 class G1CMDrainMarkingStackClosure: public VoidClosure {
1422   G1ConcurrentMark* _cm;
1423   G1CMTask*         _task;
1424   bool              _is_serial;
1425  public:
1426   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1427     _cm(cm), _task(task), _is_serial(is_serial) {
1428     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1429   }
1430 
1431   void do_void() {
1432     do {
1433       // We call G1CMTask::do_marking_step() to completely drain the local
1434       // and global marking stacks of entries pushed by the 'keep alive'
1435       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1436       //
1437       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1438       // if there's nothing more to do (i.e. we've completely drained the
1439       // entries that were pushed as a a result of applying the 'keep alive'
1440       // closure to the entries on the discovered ref lists) or we overflow
1441       // the global marking stack.
1442       //
1443       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1444       // flag while there may still be some work to do. (See the comment at
1445       // the beginning of G1CMTask::do_marking_step() for those conditions -
1446       // one of which is reaching the specified time target.) It is only
1447       // when G1CMTask::do_marking_step() returns without setting the
1448       // has_aborted() flag that the marking step has completed.
1449 
1450       _task->do_marking_step(1000000000.0 /* something very large */,
1451                              true         /* do_termination */,
1452                              _is_serial);
1453     } while (_task->has_aborted() && !_cm->has_overflown());
1454   }
1455 };
1456 
1457 // Implementation of AbstractRefProcTaskExecutor for parallel
1458 // reference processing at the end of G1 concurrent marking
1459 
1460 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1461 private:
1462   G1CollectedHeap*  _g1h;
1463   G1ConcurrentMark* _cm;
1464   WorkGang*         _workers;
1465   uint              _active_workers;
1466 
1467 public:
1468   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1469                           G1ConcurrentMark* cm,
1470                           WorkGang* workers,
1471                           uint n_workers) :
1472     _g1h(g1h), _cm(cm),
1473     _workers(workers), _active_workers(n_workers) { }
1474 
1475   // Executes the given task using concurrent marking worker threads.
1476   virtual void execute(ProcessTask& task);
1477   virtual void execute(EnqueueTask& task);
1478 };
1479 
1480 class G1CMRefProcTaskProxy: public AbstractGangTask {
1481   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1482   ProcessTask&      _proc_task;
1483   G1CollectedHeap*  _g1h;
1484   G1ConcurrentMark* _cm;
1485 
1486 public:
1487   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1488                        G1CollectedHeap* g1h,
1489                        G1ConcurrentMark* cm) :
1490     AbstractGangTask("Process reference objects in parallel"),
1491     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1492     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1493     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1494   }
1495 
1496   virtual void work(uint worker_id) {
1497     ResourceMark rm;
1498     HandleMark hm;
1499     G1CMTask* task = _cm->task(worker_id);
1500     G1CMIsAliveClosure g1_is_alive(_g1h);
1501     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1502     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1503 
1504     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1505   }
1506 };
1507 
1508 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1509   assert(_workers != NULL, "Need parallel worker threads.");
1510   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1511 
1512   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1513 
1514   // We need to reset the concurrency level before each
1515   // proxy task execution, so that the termination protocol
1516   // and overflow handling in G1CMTask::do_marking_step() knows
1517   // how many workers to wait for.
1518   _cm->set_concurrency(_active_workers);
1519   _workers->run_task(&proc_task_proxy);
1520 }
1521 
1522 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1523   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1524   EnqueueTask& _enq_task;
1525 
1526 public:
1527   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1528     AbstractGangTask("Enqueue reference objects in parallel"),
1529     _enq_task(enq_task) { }
1530 
1531   virtual void work(uint worker_id) {
1532     _enq_task.work(worker_id);
1533   }
1534 };
1535 
1536 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1537   assert(_workers != NULL, "Need parallel worker threads.");
1538   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1539 
1540   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1541 
1542   // Not strictly necessary but...
1543   //
1544   // We need to reset the concurrency level before each
1545   // proxy task execution, so that the termination protocol
1546   // and overflow handling in G1CMTask::do_marking_step() knows
1547   // how many workers to wait for.
1548   _cm->set_concurrency(_active_workers);
1549   _workers->run_task(&enq_task_proxy);
1550 }
1551 
1552 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1553   if (has_overflown()) {
1554     // Skip processing the discovered references if we have
1555     // overflown the global marking stack. Reference objects
1556     // only get discovered once so it is OK to not
1557     // de-populate the discovered reference lists. We could have,
1558     // but the only benefit would be that, when marking restarts,
1559     // less reference objects are discovered.
1560     return;
1561   }
1562 
1563   ResourceMark rm;
1564   HandleMark   hm;
1565 
1566   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1567 
1568   // Is alive closure.
1569   G1CMIsAliveClosure g1_is_alive(g1h);
1570 
1571   // Inner scope to exclude the cleaning of the string and symbol
1572   // tables from the displayed time.
1573   {
1574     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1575 
1576     ReferenceProcessor* rp = g1h->ref_processor_cm();
1577 
1578     // See the comment in G1CollectedHeap::ref_processing_init()
1579     // about how reference processing currently works in G1.
1580 
1581     // Set the soft reference policy
1582     rp->setup_policy(clear_all_soft_refs);
1583     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1584 
1585     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1586     // in serial reference processing. Note these closures are also
1587     // used for serially processing (by the the current thread) the
1588     // JNI references during parallel reference processing.
1589     //
1590     // These closures do not need to synchronize with the worker
1591     // threads involved in parallel reference processing as these
1592     // instances are executed serially by the current thread (e.g.
1593     // reference processing is not multi-threaded and is thus
1594     // performed by the current thread instead of a gang worker).
1595     //
1596     // The gang tasks involved in parallel reference processing create
1597     // their own instances of these closures, which do their own
1598     // synchronization among themselves.
1599     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1600     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1601 
1602     // We need at least one active thread. If reference processing
1603     // is not multi-threaded we use the current (VMThread) thread,
1604     // otherwise we use the work gang from the G1CollectedHeap and
1605     // we utilize all the worker threads we can.
1606     bool processing_is_mt = rp->processing_is_mt();
1607     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1608     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1609 
1610     // Parallel processing task executor.
1611     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1612                                               g1h->workers(), active_workers);
1613     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1614 
1615     // Set the concurrency level. The phase was already set prior to
1616     // executing the remark task.
1617     set_concurrency(active_workers);
1618 
1619     // Set the degree of MT processing here.  If the discovery was done MT,
1620     // the number of threads involved during discovery could differ from
1621     // the number of active workers.  This is OK as long as the discovered
1622     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1623     rp->set_active_mt_degree(active_workers);
1624 
1625     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1626 
1627     // Process the weak references.
1628     const ReferenceProcessorStats& stats =
1629         rp->process_discovered_references(&g1_is_alive,
1630                                           &g1_keep_alive,
1631                                           &g1_drain_mark_stack,
1632                                           executor,
1633                                           &pt);
1634     _gc_tracer_cm->report_gc_reference_stats(stats);
1635     pt.print_all_references();
1636 
1637     // The do_oop work routines of the keep_alive and drain_marking_stack
1638     // oop closures will set the has_overflown flag if we overflow the
1639     // global marking stack.
1640 
1641     assert(has_overflown() || _global_mark_stack.is_empty(),
1642             "Mark stack should be empty (unless it has overflown)");
1643 
1644     assert(rp->num_q() == active_workers, "why not");
1645 
1646     rp->enqueue_discovered_references(executor, &pt);
1647 
1648     rp->verify_no_references_recorded();
1649 
1650     pt.print_enqueue_phase();
1651 
1652     assert(!rp->discovery_enabled(), "Post condition");
1653   }
1654 
1655   assert(has_overflown() || _global_mark_stack.is_empty(),
1656           "Mark stack should be empty (unless it has overflown)");
1657 
1658   {
1659     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1660     WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
1661   }
1662 
1663   if (has_overflown()) {
1664     // We can not trust g1_is_alive if the marking stack overflowed
1665     return;
1666   }
1667 
1668   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1669 
1670   // Unload Klasses, String, Symbols, Code Cache, etc.
1671   if (ClassUnloadingWithConcurrentMark) {
1672     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1673     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1674     g1h->complete_cleaning(&g1_is_alive, purged_classes);
1675   } else {
1676     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1677     // No need to clean string table and symbol table as they are treated as strong roots when
1678     // class unloading is disabled.
1679     g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1680 
1681   }
1682 }
1683 
1684 void G1ConcurrentMark::swap_mark_bitmaps() {
1685   G1CMBitMap* temp = _prev_mark_bitmap;
1686   _prev_mark_bitmap = _next_mark_bitmap;
1687   _next_mark_bitmap = temp;
1688 }
1689 
1690 // Closure for marking entries in SATB buffers.
1691 class G1CMSATBBufferClosure : public SATBBufferClosure {
1692 private:
1693   G1CMTask* _task;
1694   G1CollectedHeap* _g1h;
1695 
1696   // This is very similar to G1CMTask::deal_with_reference, but with
1697   // more relaxed requirements for the argument, so this must be more
1698   // circumspect about treating the argument as an object.
1699   void do_entry(void* entry) const {
1700     _task->increment_refs_reached();
1701     oop const obj = static_cast<oop>(entry);
1702     _task->make_reference_grey(obj);
1703   }
1704 
1705 public:
1706   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1707     : _task(task), _g1h(g1h) { }
1708 
1709   virtual void do_buffer(void** buffer, size_t size) {
1710     for (size_t i = 0; i < size; ++i) {
1711       do_entry(buffer[i]);
1712     }
1713   }
1714 };
1715 
1716 class G1RemarkThreadsClosure : public ThreadClosure {
1717   G1CMSATBBufferClosure _cm_satb_cl;
1718   G1CMOopClosure _cm_cl;
1719   MarkingCodeBlobClosure _code_cl;
1720   int _thread_parity;
1721 
1722  public:
1723   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1724     _cm_satb_cl(task, g1h),
1725     _cm_cl(g1h, g1h->concurrent_mark(), task),
1726     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1727     _thread_parity(Threads::thread_claim_parity()) {}
1728 
1729   void do_thread(Thread* thread) {
1730     if (thread->is_Java_thread()) {
1731       if (thread->claim_oops_do(true, _thread_parity)) {
1732         JavaThread* jt = (JavaThread*)thread;
1733 
1734         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1735         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1736         // * Alive if on the stack of an executing method
1737         // * Weakly reachable otherwise
1738         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1739         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1740         jt->nmethods_do(&_code_cl);
1741 
1742         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1743       }
1744     } else if (thread->is_VM_thread()) {
1745       if (thread->claim_oops_do(true, _thread_parity)) {
1746         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1747       }
1748     }
1749   }
1750 };
1751 
1752 class G1CMRemarkTask: public AbstractGangTask {
1753 private:
1754   G1ConcurrentMark* _cm;
1755 public:
1756   void work(uint worker_id) {
1757     G1CMTask* task = _cm->task(worker_id);
1758     task->record_start_time();
1759     {
1760       ResourceMark rm;
1761       HandleMark hm;
1762 
1763       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1764       Threads::threads_do(&threads_f);
1765     }
1766 
1767     do {
1768       task->do_marking_step(1000000000.0 /* something very large */,
1769                             true         /* do_termination       */,
1770                             false        /* is_serial            */);
1771     } while (task->has_aborted() && !_cm->has_overflown());
1772     // If we overflow, then we do not want to restart. We instead
1773     // want to abort remark and do concurrent marking again.
1774     task->record_end_time();
1775   }
1776 
1777   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1778     AbstractGangTask("Par Remark"), _cm(cm) {
1779     _cm->terminator()->reset_for_reuse(active_workers);
1780   }
1781 };
1782 
1783 void G1ConcurrentMark::checkpoint_roots_final_work() {
1784   ResourceMark rm;
1785   HandleMark   hm;
1786   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1787 
1788   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1789 
1790   g1h->ensure_parsability(false);
1791 
1792   // this is remark, so we'll use up all active threads
1793   uint active_workers = g1h->workers()->active_workers();
1794   set_concurrency_and_phase(active_workers, false /* concurrent */);
1795   // Leave _parallel_marking_threads at it's
1796   // value originally calculated in the G1ConcurrentMark
1797   // constructor and pass values of the active workers
1798   // through the gang in the task.
1799 
1800   {
1801     StrongRootsScope srs(active_workers);
1802 
1803     G1CMRemarkTask remarkTask(this, active_workers);
1804     // We will start all available threads, even if we decide that the
1805     // active_workers will be fewer. The extra ones will just bail out
1806     // immediately.
1807     g1h->workers()->run_task(&remarkTask);
1808   }
1809 
1810   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1811   guarantee(has_overflown() ||
1812             satb_mq_set.completed_buffers_num() == 0,
1813             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1814             BOOL_TO_STR(has_overflown()),
1815             satb_mq_set.completed_buffers_num());
1816 
1817   print_stats();
1818 }
1819 
1820 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1821   _prev_mark_bitmap->clear_range(mr);
1822 }
1823 
1824 HeapRegion*
1825 G1ConcurrentMark::claim_region(uint worker_id) {
1826   // "checkpoint" the finger
1827   HeapWord* finger = _finger;
1828 
1829   // _heap_end will not change underneath our feet; it only changes at
1830   // yield points.
1831   while (finger < _heap_end) {
1832     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1833 
1834     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1835     // Make sure that the reads below do not float before loading curr_region.
1836     OrderAccess::loadload();
1837     // Above heap_region_containing may return NULL as we always scan claim
1838     // until the end of the heap. In this case, just jump to the next region.
1839     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1840 
1841     // Is the gap between reading the finger and doing the CAS too long?
1842     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1843     if (res == finger && curr_region != NULL) {
1844       // we succeeded
1845       HeapWord*   bottom        = curr_region->bottom();
1846       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1847 
1848       // notice that _finger == end cannot be guaranteed here since,
1849       // someone else might have moved the finger even further
1850       assert(_finger >= end, "the finger should have moved forward");
1851 
1852       if (limit > bottom) {
1853         return curr_region;
1854       } else {
1855         assert(limit == bottom,
1856                "the region limit should be at bottom");
1857         // we return NULL and the caller should try calling
1858         // claim_region() again.
1859         return NULL;
1860       }
1861     } else {
1862       assert(_finger > finger, "the finger should have moved forward");
1863       // read it again
1864       finger = _finger;
1865     }
1866   }
1867 
1868   return NULL;
1869 }
1870 
1871 #ifndef PRODUCT
1872 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1873 private:
1874   G1CollectedHeap* _g1h;
1875   const char* _phase;
1876   int _info;
1877 
1878 public:
1879   VerifyNoCSetOops(const char* phase, int info = -1) :
1880     _g1h(G1CollectedHeap::heap()),
1881     _phase(phase),
1882     _info(info)
1883   { }
1884 
1885   void operator()(G1TaskQueueEntry task_entry) const {
1886     if (task_entry.is_array_slice()) {
1887       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1888       return;
1889     }
1890     guarantee(oopDesc::is_oop(task_entry.obj()),
1891               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1892               p2i(task_entry.obj()), _phase, _info);
1893     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1894               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1895               p2i(task_entry.obj()), _phase, _info);
1896   }
1897 };
1898 
1899 void G1ConcurrentMark::verify_no_cset_oops() {
1900   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1901   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1902     return;
1903   }
1904 
1905   // Verify entries on the global mark stack
1906   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1907 
1908   // Verify entries on the task queues
1909   for (uint i = 0; i < _max_num_tasks; ++i) {
1910     G1CMTaskQueue* queue = _task_queues->queue(i);
1911     queue->iterate(VerifyNoCSetOops("Queue", i));
1912   }
1913 
1914   // Verify the global finger
1915   HeapWord* global_finger = finger();
1916   if (global_finger != NULL && global_finger < _heap_end) {
1917     // Since we always iterate over all regions, we might get a NULL HeapRegion
1918     // here.
1919     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1920     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1921               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1922               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1923   }
1924 
1925   // Verify the task fingers
1926   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1927   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1928     G1CMTask* task = _tasks[i];
1929     HeapWord* task_finger = task->finger();
1930     if (task_finger != NULL && task_finger < _heap_end) {
1931       // See above note on the global finger verification.
1932       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1933       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1934                 !task_hr->in_collection_set(),
1935                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1936                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1937     }
1938   }
1939 }
1940 #endif // PRODUCT
1941 void G1ConcurrentMark::create_live_data() {
1942   _g1h->g1_rem_set()->create_card_live_data(_concurrent_workers, _next_mark_bitmap);
1943 }
1944 
1945 void G1ConcurrentMark::finalize_live_data() {
1946   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _next_mark_bitmap);
1947 }
1948 
1949 void G1ConcurrentMark::verify_live_data() {
1950   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _next_mark_bitmap);
1951 }
1952 
1953 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
1954   _g1h->g1_rem_set()->clear_card_live_data(workers);
1955 }
1956 
1957 #ifdef ASSERT
1958 void G1ConcurrentMark::verify_live_data_clear() {
1959   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
1960 }
1961 #endif
1962 
1963 void G1ConcurrentMark::print_stats() {
1964   if (!log_is_enabled(Debug, gc, stats)) {
1965     return;
1966   }
1967   log_debug(gc, stats)("---------------------------------------------------------------------");
1968   for (size_t i = 0; i < _num_active_tasks; ++i) {
1969     _tasks[i]->print_stats();
1970     log_debug(gc, stats)("---------------------------------------------------------------------");
1971   }
1972 }
1973 
1974 void G1ConcurrentMark::abort() {
1975   if (!cm_thread()->during_cycle() || _has_aborted) {
1976     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1977     return;
1978   }
1979 
1980   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1981   // concurrent bitmap clearing.
1982   {
1983     GCTraceTime(Debug, gc)("Clear Next Bitmap");
1984     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1985   }
1986   // Note we cannot clear the previous marking bitmap here
1987   // since VerifyDuringGC verifies the objects marked during
1988   // a full GC against the previous bitmap.
1989 
1990   {
1991     GCTraceTime(Debug, gc)("Clear Live Data");
1992     clear_live_data(_g1h->workers());
1993   }
1994   DEBUG_ONLY({
1995     GCTraceTime(Debug, gc)("Verify Live Data Clear");
1996     verify_live_data_clear();
1997   })
1998   // Empty mark stack
1999   reset_marking_state();
2000   for (uint i = 0; i < _max_num_tasks; ++i) {
2001     _tasks[i]->clear_region_fields();
2002   }
2003   _first_overflow_barrier_sync.abort();
2004   _second_overflow_barrier_sync.abort();
2005   _has_aborted = true;
2006 
2007   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2008   satb_mq_set.abandon_partial_marking();
2009   // This can be called either during or outside marking, we'll read
2010   // the expected_active value from the SATB queue set.
2011   satb_mq_set.set_active_all_threads(
2012                                  false, /* new active value */
2013                                  satb_mq_set.is_active() /* expected_active */);
2014 }
2015 
2016 static void print_ms_time_info(const char* prefix, const char* name,
2017                                NumberSeq& ns) {
2018   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2019                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2020   if (ns.num() > 0) {
2021     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2022                            prefix, ns.sd(), ns.maximum());
2023   }
2024 }
2025 
2026 void G1ConcurrentMark::print_summary_info() {
2027   Log(gc, marking) log;
2028   if (!log.is_trace()) {
2029     return;
2030   }
2031 
2032   log.trace(" Concurrent marking:");
2033   print_ms_time_info("  ", "init marks", _init_times);
2034   print_ms_time_info("  ", "remarks", _remark_times);
2035   {
2036     print_ms_time_info("     ", "final marks", _remark_mark_times);
2037     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2038 
2039   }
2040   print_ms_time_info("  ", "cleanups", _cleanup_times);
2041   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2042             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2043   if (G1ScrubRemSets) {
2044     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2045               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2046   }
2047   log.trace("  Total stop_world time = %8.2f s.",
2048             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2049   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2050             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
2051 }
2052 
2053 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2054   _concurrent_workers->print_worker_threads_on(st);
2055 }
2056 
2057 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2058   _concurrent_workers->threads_do(tc);
2059 }
2060 
2061 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2062   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2063                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
2064   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
2065   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
2066 }
2067 
2068 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2069   ReferenceProcessor* result = g1h->ref_processor_cm();
2070   assert(result != NULL, "CM reference processor should not be NULL");
2071   return result;
2072 }
2073 
2074 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2075                                G1ConcurrentMark* cm,
2076                                G1CMTask* task)
2077   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2078     _g1h(g1h), _cm(cm), _task(task)
2079 { }
2080 
2081 void G1CMTask::setup_for_region(HeapRegion* hr) {
2082   assert(hr != NULL,
2083         "claim_region() should have filtered out NULL regions");
2084   _curr_region  = hr;
2085   _finger       = hr->bottom();
2086   update_region_limit();
2087 }
2088 
2089 void G1CMTask::update_region_limit() {
2090   HeapRegion* hr            = _curr_region;
2091   HeapWord* bottom          = hr->bottom();
2092   HeapWord* limit           = hr->next_top_at_mark_start();
2093 
2094   if (limit == bottom) {
2095     // The region was collected underneath our feet.
2096     // We set the finger to bottom to ensure that the bitmap
2097     // iteration that will follow this will not do anything.
2098     // (this is not a condition that holds when we set the region up,
2099     // as the region is not supposed to be empty in the first place)
2100     _finger = bottom;
2101   } else if (limit >= _region_limit) {
2102     assert(limit >= _finger, "peace of mind");
2103   } else {
2104     assert(limit < _region_limit, "only way to get here");
2105     // This can happen under some pretty unusual circumstances.  An
2106     // evacuation pause empties the region underneath our feet (NTAMS
2107     // at bottom). We then do some allocation in the region (NTAMS
2108     // stays at bottom), followed by the region being used as a GC
2109     // alloc region (NTAMS will move to top() and the objects
2110     // originally below it will be grayed). All objects now marked in
2111     // the region are explicitly grayed, if below the global finger,
2112     // and we do not need in fact to scan anything else. So, we simply
2113     // set _finger to be limit to ensure that the bitmap iteration
2114     // doesn't do anything.
2115     _finger = limit;
2116   }
2117 
2118   _region_limit = limit;
2119 }
2120 
2121 void G1CMTask::giveup_current_region() {
2122   assert(_curr_region != NULL, "invariant");
2123   clear_region_fields();
2124 }
2125 
2126 void G1CMTask::clear_region_fields() {
2127   // Values for these three fields that indicate that we're not
2128   // holding on to a region.
2129   _curr_region   = NULL;
2130   _finger        = NULL;
2131   _region_limit  = NULL;
2132 }
2133 
2134 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2135   if (cm_oop_closure == NULL) {
2136     assert(_cm_oop_closure != NULL, "invariant");
2137   } else {
2138     assert(_cm_oop_closure == NULL, "invariant");
2139   }
2140   _cm_oop_closure = cm_oop_closure;
2141 }
2142 
2143 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2144   guarantee(next_mark_bitmap != NULL, "invariant");
2145   _next_mark_bitmap              = next_mark_bitmap;
2146   clear_region_fields();
2147 
2148   _calls                         = 0;
2149   _elapsed_time_ms               = 0.0;
2150   _termination_time_ms           = 0.0;
2151   _termination_start_time_ms     = 0.0;
2152 }
2153 
2154 bool G1CMTask::should_exit_termination() {
2155   regular_clock_call();
2156   // This is called when we are in the termination protocol. We should
2157   // quit if, for some reason, this task wants to abort or the global
2158   // stack is not empty (this means that we can get work from it).
2159   return !_cm->mark_stack_empty() || has_aborted();
2160 }
2161 
2162 void G1CMTask::reached_limit() {
2163   assert(_words_scanned >= _words_scanned_limit ||
2164          _refs_reached >= _refs_reached_limit ,
2165          "shouldn't have been called otherwise");
2166   regular_clock_call();
2167 }
2168 
2169 void G1CMTask::regular_clock_call() {
2170   if (has_aborted()) return;
2171 
2172   // First, we need to recalculate the words scanned and refs reached
2173   // limits for the next clock call.
2174   recalculate_limits();
2175 
2176   // During the regular clock call we do the following
2177 
2178   // (1) If an overflow has been flagged, then we abort.
2179   if (_cm->has_overflown()) {
2180     set_has_aborted();
2181     return;
2182   }
2183 
2184   // If we are not concurrent (i.e. we're doing remark) we don't need
2185   // to check anything else. The other steps are only needed during
2186   // the concurrent marking phase.
2187   if (!_concurrent) {
2188     return;
2189   }
2190 
2191   // (2) If marking has been aborted for Full GC, then we also abort.
2192   if (_cm->has_aborted()) {
2193     set_has_aborted();
2194     return;
2195   }
2196 
2197   double curr_time_ms = os::elapsedVTime() * 1000.0;
2198 
2199   // (4) We check whether we should yield. If we have to, then we abort.
2200   if (SuspendibleThreadSet::should_yield()) {
2201     // We should yield. To do this we abort the task. The caller is
2202     // responsible for yielding.
2203     set_has_aborted();
2204     return;
2205   }
2206 
2207   // (5) We check whether we've reached our time quota. If we have,
2208   // then we abort.
2209   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2210   if (elapsed_time_ms > _time_target_ms) {
2211     set_has_aborted();
2212     _has_timed_out = true;
2213     return;
2214   }
2215 
2216   // (6) Finally, we check whether there are enough completed STAB
2217   // buffers available for processing. If there are, we abort.
2218   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2219   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2220     // we do need to process SATB buffers, we'll abort and restart
2221     // the marking task to do so
2222     set_has_aborted();
2223     return;
2224   }
2225 }
2226 
2227 void G1CMTask::recalculate_limits() {
2228   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2229   _words_scanned_limit      = _real_words_scanned_limit;
2230 
2231   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2232   _refs_reached_limit       = _real_refs_reached_limit;
2233 }
2234 
2235 void G1CMTask::decrease_limits() {
2236   // This is called when we believe that we're going to do an infrequent
2237   // operation which will increase the per byte scanned cost (i.e. move
2238   // entries to/from the global stack). It basically tries to decrease the
2239   // scanning limit so that the clock is called earlier.
2240 
2241   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2242   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2243 }
2244 
2245 void G1CMTask::move_entries_to_global_stack() {
2246   // Local array where we'll store the entries that will be popped
2247   // from the local queue.
2248   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2249 
2250   size_t n = 0;
2251   G1TaskQueueEntry task_entry;
2252   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2253     buffer[n] = task_entry;
2254     ++n;
2255   }
2256   if (n < G1CMMarkStack::EntriesPerChunk) {
2257     buffer[n] = G1TaskQueueEntry();
2258   }
2259 
2260   if (n > 0) {
2261     if (!_cm->mark_stack_push(buffer)) {
2262       set_has_aborted();
2263     }
2264   }
2265 
2266   // This operation was quite expensive, so decrease the limits.
2267   decrease_limits();
2268 }
2269 
2270 bool G1CMTask::get_entries_from_global_stack() {
2271   // Local array where we'll store the entries that will be popped
2272   // from the global stack.
2273   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2274 
2275   if (!_cm->mark_stack_pop(buffer)) {
2276     return false;
2277   }
2278 
2279   // We did actually pop at least one entry.
2280   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2281     G1TaskQueueEntry task_entry = buffer[i];
2282     if (task_entry.is_null()) {
2283       break;
2284     }
2285     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2286     bool success = _task_queue->push(task_entry);
2287     // We only call this when the local queue is empty or under a
2288     // given target limit. So, we do not expect this push to fail.
2289     assert(success, "invariant");
2290   }
2291 
2292   // This operation was quite expensive, so decrease the limits
2293   decrease_limits();
2294   return true;
2295 }
2296 
2297 void G1CMTask::drain_local_queue(bool partially) {
2298   if (has_aborted()) {
2299     return;
2300   }
2301 
2302   // Decide what the target size is, depending whether we're going to
2303   // drain it partially (so that other tasks can steal if they run out
2304   // of things to do) or totally (at the very end).
2305   size_t target_size;
2306   if (partially) {
2307     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2308   } else {
2309     target_size = 0;
2310   }
2311 
2312   if (_task_queue->size() > target_size) {
2313     G1TaskQueueEntry entry;
2314     bool ret = _task_queue->pop_local(entry);
2315     while (ret) {
2316       scan_task_entry(entry);
2317       if (_task_queue->size() <= target_size || has_aborted()) {
2318         ret = false;
2319       } else {
2320         ret = _task_queue->pop_local(entry);
2321       }
2322     }
2323   }
2324 }
2325 
2326 void G1CMTask::drain_global_stack(bool partially) {
2327   if (has_aborted()) return;
2328 
2329   // We have a policy to drain the local queue before we attempt to
2330   // drain the global stack.
2331   assert(partially || _task_queue->size() == 0, "invariant");
2332 
2333   // Decide what the target size is, depending whether we're going to
2334   // drain it partially (so that other tasks can steal if they run out
2335   // of things to do) or totally (at the very end).
2336   // Notice that when draining the global mark stack partially, due to the racyness
2337   // of the mark stack size update we might in fact drop below the target. But,
2338   // this is not a problem.
2339   // In case of total draining, we simply process until the global mark stack is
2340   // totally empty, disregarding the size counter.
2341   if (partially) {
2342     size_t const target_size = _cm->partial_mark_stack_size_target();
2343     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2344       if (get_entries_from_global_stack()) {
2345         drain_local_queue(partially);
2346       }
2347     }
2348   } else {
2349     while (!has_aborted() && get_entries_from_global_stack()) {
2350       drain_local_queue(partially);
2351     }
2352   }
2353 }
2354 
2355 // SATB Queue has several assumptions on whether to call the par or
2356 // non-par versions of the methods. this is why some of the code is
2357 // replicated. We should really get rid of the single-threaded version
2358 // of the code to simplify things.
2359 void G1CMTask::drain_satb_buffers() {
2360   if (has_aborted()) return;
2361 
2362   // We set this so that the regular clock knows that we're in the
2363   // middle of draining buffers and doesn't set the abort flag when it
2364   // notices that SATB buffers are available for draining. It'd be
2365   // very counter productive if it did that. :-)
2366   _draining_satb_buffers = true;
2367 
2368   G1CMSATBBufferClosure satb_cl(this, _g1h);
2369   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2370 
2371   // This keeps claiming and applying the closure to completed buffers
2372   // until we run out of buffers or we need to abort.
2373   while (!has_aborted() &&
2374          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2375     regular_clock_call();
2376   }
2377 
2378   _draining_satb_buffers = false;
2379 
2380   assert(has_aborted() ||
2381          _concurrent ||
2382          satb_mq_set.completed_buffers_num() == 0, "invariant");
2383 
2384   // again, this was a potentially expensive operation, decrease the
2385   // limits to get the regular clock call early
2386   decrease_limits();
2387 }
2388 
2389 void G1CMTask::print_stats() {
2390   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u",
2391                        _worker_id, _calls);
2392   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2393                        _elapsed_time_ms, _termination_time_ms);
2394   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2395                        _step_times_ms.num(), _step_times_ms.avg(),
2396                        _step_times_ms.sd());
2397   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2398                        _step_times_ms.maximum(), _step_times_ms.sum());
2399 }
2400 
2401 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2402   return _task_queues->steal(worker_id, hash_seed, task_entry);
2403 }
2404 
2405 /*****************************************************************************
2406 
2407     The do_marking_step(time_target_ms, ...) method is the building
2408     block of the parallel marking framework. It can be called in parallel
2409     with other invocations of do_marking_step() on different tasks
2410     (but only one per task, obviously) and concurrently with the
2411     mutator threads, or during remark, hence it eliminates the need
2412     for two versions of the code. When called during remark, it will
2413     pick up from where the task left off during the concurrent marking
2414     phase. Interestingly, tasks are also claimable during evacuation
2415     pauses too, since do_marking_step() ensures that it aborts before
2416     it needs to yield.
2417 
2418     The data structures that it uses to do marking work are the
2419     following:
2420 
2421       (1) Marking Bitmap. If there are gray objects that appear only
2422       on the bitmap (this happens either when dealing with an overflow
2423       or when the initial marking phase has simply marked the roots
2424       and didn't push them on the stack), then tasks claim heap
2425       regions whose bitmap they then scan to find gray objects. A
2426       global finger indicates where the end of the last claimed region
2427       is. A local finger indicates how far into the region a task has
2428       scanned. The two fingers are used to determine how to gray an
2429       object (i.e. whether simply marking it is OK, as it will be
2430       visited by a task in the future, or whether it needs to be also
2431       pushed on a stack).
2432 
2433       (2) Local Queue. The local queue of the task which is accessed
2434       reasonably efficiently by the task. Other tasks can steal from
2435       it when they run out of work. Throughout the marking phase, a
2436       task attempts to keep its local queue short but not totally
2437       empty, so that entries are available for stealing by other
2438       tasks. Only when there is no more work, a task will totally
2439       drain its local queue.
2440 
2441       (3) Global Mark Stack. This handles local queue overflow. During
2442       marking only sets of entries are moved between it and the local
2443       queues, as access to it requires a mutex and more fine-grain
2444       interaction with it which might cause contention. If it
2445       overflows, then the marking phase should restart and iterate
2446       over the bitmap to identify gray objects. Throughout the marking
2447       phase, tasks attempt to keep the global mark stack at a small
2448       length but not totally empty, so that entries are available for
2449       popping by other tasks. Only when there is no more work, tasks
2450       will totally drain the global mark stack.
2451 
2452       (4) SATB Buffer Queue. This is where completed SATB buffers are
2453       made available. Buffers are regularly removed from this queue
2454       and scanned for roots, so that the queue doesn't get too
2455       long. During remark, all completed buffers are processed, as
2456       well as the filled in parts of any uncompleted buffers.
2457 
2458     The do_marking_step() method tries to abort when the time target
2459     has been reached. There are a few other cases when the
2460     do_marking_step() method also aborts:
2461 
2462       (1) When the marking phase has been aborted (after a Full GC).
2463 
2464       (2) When a global overflow (on the global stack) has been
2465       triggered. Before the task aborts, it will actually sync up with
2466       the other tasks to ensure that all the marking data structures
2467       (local queues, stacks, fingers etc.)  are re-initialized so that
2468       when do_marking_step() completes, the marking phase can
2469       immediately restart.
2470 
2471       (3) When enough completed SATB buffers are available. The
2472       do_marking_step() method only tries to drain SATB buffers right
2473       at the beginning. So, if enough buffers are available, the
2474       marking step aborts and the SATB buffers are processed at
2475       the beginning of the next invocation.
2476 
2477       (4) To yield. when we have to yield then we abort and yield
2478       right at the end of do_marking_step(). This saves us from a lot
2479       of hassle as, by yielding we might allow a Full GC. If this
2480       happens then objects will be compacted underneath our feet, the
2481       heap might shrink, etc. We save checking for this by just
2482       aborting and doing the yield right at the end.
2483 
2484     From the above it follows that the do_marking_step() method should
2485     be called in a loop (or, otherwise, regularly) until it completes.
2486 
2487     If a marking step completes without its has_aborted() flag being
2488     true, it means it has completed the current marking phase (and
2489     also all other marking tasks have done so and have all synced up).
2490 
2491     A method called regular_clock_call() is invoked "regularly" (in
2492     sub ms intervals) throughout marking. It is this clock method that
2493     checks all the abort conditions which were mentioned above and
2494     decides when the task should abort. A work-based scheme is used to
2495     trigger this clock method: when the number of object words the
2496     marking phase has scanned or the number of references the marking
2497     phase has visited reach a given limit. Additional invocations to
2498     the method clock have been planted in a few other strategic places
2499     too. The initial reason for the clock method was to avoid calling
2500     vtime too regularly, as it is quite expensive. So, once it was in
2501     place, it was natural to piggy-back all the other conditions on it
2502     too and not constantly check them throughout the code.
2503 
2504     If do_termination is true then do_marking_step will enter its
2505     termination protocol.
2506 
2507     The value of is_serial must be true when do_marking_step is being
2508     called serially (i.e. by the VMThread) and do_marking_step should
2509     skip any synchronization in the termination and overflow code.
2510     Examples include the serial remark code and the serial reference
2511     processing closures.
2512 
2513     The value of is_serial must be false when do_marking_step is
2514     being called by any of the worker threads in a work gang.
2515     Examples include the concurrent marking code (CMMarkingTask),
2516     the MT remark code, and the MT reference processing closures.
2517 
2518  *****************************************************************************/
2519 
2520 void G1CMTask::do_marking_step(double time_target_ms,
2521                                bool do_termination,
2522                                bool is_serial) {
2523   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2524   assert(_concurrent == _cm->concurrent(), "they should be the same");
2525 
2526   _start_time_ms = os::elapsedVTime() * 1000.0;
2527 
2528   // If do_stealing is true then do_marking_step will attempt to
2529   // steal work from the other G1CMTasks. It only makes sense to
2530   // enable stealing when the termination protocol is enabled
2531   // and do_marking_step() is not being called serially.
2532   bool do_stealing = do_termination && !is_serial;
2533 
2534   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2535   _time_target_ms = time_target_ms - diff_prediction_ms;
2536 
2537   // set up the variables that are used in the work-based scheme to
2538   // call the regular clock method
2539   _words_scanned = 0;
2540   _refs_reached  = 0;
2541   recalculate_limits();
2542 
2543   // clear all flags
2544   clear_has_aborted();
2545   _has_timed_out = false;
2546   _draining_satb_buffers = false;
2547 
2548   ++_calls;
2549 
2550   // Set up the bitmap and oop closures. Anything that uses them is
2551   // eventually called from this method, so it is OK to allocate these
2552   // statically.
2553   G1CMBitMapClosure bitmap_closure(this, _cm);
2554   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2555   set_cm_oop_closure(&cm_oop_closure);
2556 
2557   if (_cm->has_overflown()) {
2558     // This can happen if the mark stack overflows during a GC pause
2559     // and this task, after a yield point, restarts. We have to abort
2560     // as we need to get into the overflow protocol which happens
2561     // right at the end of this task.
2562     set_has_aborted();
2563   }
2564 
2565   // First drain any available SATB buffers. After this, we will not
2566   // look at SATB buffers before the next invocation of this method.
2567   // If enough completed SATB buffers are queued up, the regular clock
2568   // will abort this task so that it restarts.
2569   drain_satb_buffers();
2570   // ...then partially drain the local queue and the global stack
2571   drain_local_queue(true);
2572   drain_global_stack(true);
2573 
2574   do {
2575     if (!has_aborted() && _curr_region != NULL) {
2576       // This means that we're already holding on to a region.
2577       assert(_finger != NULL, "if region is not NULL, then the finger "
2578              "should not be NULL either");
2579 
2580       // We might have restarted this task after an evacuation pause
2581       // which might have evacuated the region we're holding on to
2582       // underneath our feet. Let's read its limit again to make sure
2583       // that we do not iterate over a region of the heap that
2584       // contains garbage (update_region_limit() will also move
2585       // _finger to the start of the region if it is found empty).
2586       update_region_limit();
2587       // We will start from _finger not from the start of the region,
2588       // as we might be restarting this task after aborting half-way
2589       // through scanning this region. In this case, _finger points to
2590       // the address where we last found a marked object. If this is a
2591       // fresh region, _finger points to start().
2592       MemRegion mr = MemRegion(_finger, _region_limit);
2593 
2594       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2595              "humongous regions should go around loop once only");
2596 
2597       // Some special cases:
2598       // If the memory region is empty, we can just give up the region.
2599       // If the current region is humongous then we only need to check
2600       // the bitmap for the bit associated with the start of the object,
2601       // scan the object if it's live, and give up the region.
2602       // Otherwise, let's iterate over the bitmap of the part of the region
2603       // that is left.
2604       // If the iteration is successful, give up the region.
2605       if (mr.is_empty()) {
2606         giveup_current_region();
2607         regular_clock_call();
2608       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2609         if (_next_mark_bitmap->is_marked(mr.start())) {
2610           // The object is marked - apply the closure
2611           bitmap_closure.do_addr(mr.start());
2612         }
2613         // Even if this task aborted while scanning the humongous object
2614         // we can (and should) give up the current region.
2615         giveup_current_region();
2616         regular_clock_call();
2617       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2618         giveup_current_region();
2619         regular_clock_call();
2620       } else {
2621         assert(has_aborted(), "currently the only way to do so");
2622         // The only way to abort the bitmap iteration is to return
2623         // false from the do_bit() method. However, inside the
2624         // do_bit() method we move the _finger to point to the
2625         // object currently being looked at. So, if we bail out, we
2626         // have definitely set _finger to something non-null.
2627         assert(_finger != NULL, "invariant");
2628 
2629         // Region iteration was actually aborted. So now _finger
2630         // points to the address of the object we last scanned. If we
2631         // leave it there, when we restart this task, we will rescan
2632         // the object. It is easy to avoid this. We move the finger by
2633         // enough to point to the next possible object header.
2634         assert(_finger < _region_limit, "invariant");
2635         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2636         // Check if bitmap iteration was aborted while scanning the last object
2637         if (new_finger >= _region_limit) {
2638           giveup_current_region();
2639         } else {
2640           move_finger_to(new_finger);
2641         }
2642       }
2643     }
2644     // At this point we have either completed iterating over the
2645     // region we were holding on to, or we have aborted.
2646 
2647     // We then partially drain the local queue and the global stack.
2648     // (Do we really need this?)
2649     drain_local_queue(true);
2650     drain_global_stack(true);
2651 
2652     // Read the note on the claim_region() method on why it might
2653     // return NULL with potentially more regions available for
2654     // claiming and why we have to check out_of_regions() to determine
2655     // whether we're done or not.
2656     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2657       // We are going to try to claim a new region. We should have
2658       // given up on the previous one.
2659       // Separated the asserts so that we know which one fires.
2660       assert(_curr_region  == NULL, "invariant");
2661       assert(_finger       == NULL, "invariant");
2662       assert(_region_limit == NULL, "invariant");
2663       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2664       if (claimed_region != NULL) {
2665         // Yes, we managed to claim one
2666         setup_for_region(claimed_region);
2667         assert(_curr_region == claimed_region, "invariant");
2668       }
2669       // It is important to call the regular clock here. It might take
2670       // a while to claim a region if, for example, we hit a large
2671       // block of empty regions. So we need to call the regular clock
2672       // method once round the loop to make sure it's called
2673       // frequently enough.
2674       regular_clock_call();
2675     }
2676 
2677     if (!has_aborted() && _curr_region == NULL) {
2678       assert(_cm->out_of_regions(),
2679              "at this point we should be out of regions");
2680     }
2681   } while ( _curr_region != NULL && !has_aborted());
2682 
2683   if (!has_aborted()) {
2684     // We cannot check whether the global stack is empty, since other
2685     // tasks might be pushing objects to it concurrently.
2686     assert(_cm->out_of_regions(),
2687            "at this point we should be out of regions");
2688     // Try to reduce the number of available SATB buffers so that
2689     // remark has less work to do.
2690     drain_satb_buffers();
2691   }
2692 
2693   // Since we've done everything else, we can now totally drain the
2694   // local queue and global stack.
2695   drain_local_queue(false);
2696   drain_global_stack(false);
2697 
2698   // Attempt at work stealing from other task's queues.
2699   if (do_stealing && !has_aborted()) {
2700     // We have not aborted. This means that we have finished all that
2701     // we could. Let's try to do some stealing...
2702 
2703     // We cannot check whether the global stack is empty, since other
2704     // tasks might be pushing objects to it concurrently.
2705     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2706            "only way to reach here");
2707     while (!has_aborted()) {
2708       G1TaskQueueEntry entry;
2709       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2710         scan_task_entry(entry);
2711 
2712         // And since we're towards the end, let's totally drain the
2713         // local queue and global stack.
2714         drain_local_queue(false);
2715         drain_global_stack(false);
2716       } else {
2717         break;
2718       }
2719     }
2720   }
2721 
2722   // We still haven't aborted. Now, let's try to get into the
2723   // termination protocol.
2724   if (do_termination && !has_aborted()) {
2725     // We cannot check whether the global stack is empty, since other
2726     // tasks might be concurrently pushing objects on it.
2727     // Separated the asserts so that we know which one fires.
2728     assert(_cm->out_of_regions(), "only way to reach here");
2729     assert(_task_queue->size() == 0, "only way to reach here");
2730     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2731 
2732     // The G1CMTask class also extends the TerminatorTerminator class,
2733     // hence its should_exit_termination() method will also decide
2734     // whether to exit the termination protocol or not.
2735     bool finished = (is_serial ||
2736                      _cm->terminator()->offer_termination(this));
2737     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2738     _termination_time_ms +=
2739       termination_end_time_ms - _termination_start_time_ms;
2740 
2741     if (finished) {
2742       // We're all done.
2743 
2744       if (_worker_id == 0) {
2745         // Let's allow task 0 to do this
2746         if (_concurrent) {
2747           assert(_cm->concurrent_marking_in_progress(), "invariant");
2748           // We need to set this to false before the next
2749           // safepoint. This way we ensure that the marking phase
2750           // doesn't observe any more heap expansions.
2751           _cm->clear_concurrent_marking_in_progress();
2752         }
2753       }
2754 
2755       // We can now guarantee that the global stack is empty, since
2756       // all other tasks have finished. We separated the guarantees so
2757       // that, if a condition is false, we can immediately find out
2758       // which one.
2759       guarantee(_cm->out_of_regions(), "only way to reach here");
2760       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2761       guarantee(_task_queue->size() == 0, "only way to reach here");
2762       guarantee(!_cm->has_overflown(), "only way to reach here");
2763     } else {
2764       // Apparently there's more work to do. Let's abort this task. It
2765       // will restart it and we can hopefully find more things to do.
2766       set_has_aborted();
2767     }
2768   }
2769 
2770   // Mainly for debugging purposes to make sure that a pointer to the
2771   // closure which was statically allocated in this frame doesn't
2772   // escape it by accident.
2773   set_cm_oop_closure(NULL);
2774   double end_time_ms = os::elapsedVTime() * 1000.0;
2775   double elapsed_time_ms = end_time_ms - _start_time_ms;
2776   // Update the step history.
2777   _step_times_ms.add(elapsed_time_ms);
2778 
2779   if (has_aborted()) {
2780     // The task was aborted for some reason.
2781     if (_has_timed_out) {
2782       double diff_ms = elapsed_time_ms - _time_target_ms;
2783       // Keep statistics of how well we did with respect to hitting
2784       // our target only if we actually timed out (if we aborted for
2785       // other reasons, then the results might get skewed).
2786       _marking_step_diffs_ms.add(diff_ms);
2787     }
2788 
2789     if (_cm->has_overflown()) {
2790       // This is the interesting one. We aborted because a global
2791       // overflow was raised. This means we have to restart the
2792       // marking phase and start iterating over regions. However, in
2793       // order to do this we have to make sure that all tasks stop
2794       // what they are doing and re-initialize in a safe manner. We
2795       // will achieve this with the use of two barrier sync points.
2796 
2797       if (!is_serial) {
2798         // We only need to enter the sync barrier if being called
2799         // from a parallel context
2800         _cm->enter_first_sync_barrier(_worker_id);
2801 
2802         // When we exit this sync barrier we know that all tasks have
2803         // stopped doing marking work. So, it's now safe to
2804         // re-initialize our data structures. At the end of this method,
2805         // task 0 will clear the global data structures.
2806       }
2807 
2808       // We clear the local state of this task...
2809       clear_region_fields();
2810 
2811       if (!is_serial) {
2812         // ...and enter the second barrier.
2813         _cm->enter_second_sync_barrier(_worker_id);
2814       }
2815       // At this point, if we're during the concurrent phase of
2816       // marking, everything has been re-initialized and we're
2817       // ready to restart.
2818     }
2819   }
2820 }
2821 
2822 G1CMTask::G1CMTask(uint worker_id, G1ConcurrentMark* cm, G1CMTaskQueue* task_queue) :
2823   _objArray_processor(this),
2824   _worker_id(worker_id),
2825   _g1h(G1CollectedHeap::heap()),
2826   _cm(cm),
2827   _next_mark_bitmap(NULL),
2828   _task_queue(task_queue),
2829   _calls(0),
2830   _time_target_ms(0.0),
2831   _start_time_ms(0.0),
2832   _cm_oop_closure(NULL),
2833   _curr_region(NULL),
2834   _finger(NULL),
2835   _region_limit(NULL),
2836   _words_scanned(0),
2837   _words_scanned_limit(0),
2838   _real_words_scanned_limit(0),
2839   _refs_reached(0),
2840   _refs_reached_limit(0),
2841   _real_refs_reached_limit(0),
2842   _hash_seed(17),
2843   _has_aborted(false),
2844   _has_timed_out(false),
2845   _draining_satb_buffers(false),
2846   _step_times_ms(),
2847   _elapsed_time_ms(0.0),
2848   _termination_time_ms(0.0),
2849   _termination_start_time_ms(0.0),
2850   _concurrent(false),
2851   _marking_step_diffs_ms()
2852 {
2853   guarantee(task_queue != NULL, "invariant");
2854 
2855   _marking_step_diffs_ms.add(0.5);
2856 }
2857 
2858 // These are formatting macros that are used below to ensure
2859 // consistent formatting. The *_H_* versions are used to format the
2860 // header for a particular value and they should be kept consistent
2861 // with the corresponding macro. Also note that most of the macros add
2862 // the necessary white space (as a prefix) which makes them a bit
2863 // easier to compose.
2864 
2865 // All the output lines are prefixed with this string to be able to
2866 // identify them easily in a large log file.
2867 #define G1PPRL_LINE_PREFIX            "###"
2868 
2869 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2870 #ifdef _LP64
2871 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2872 #else // _LP64
2873 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2874 #endif // _LP64
2875 
2876 // For per-region info
2877 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2878 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2879 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2880 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2881 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2882 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2883 
2884 // For summary info
2885 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2886 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2887 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2888 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2889 
2890 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2891   _total_used_bytes(0), _total_capacity_bytes(0),
2892   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2893   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2894 {
2895   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2896   MemRegion g1_reserved = g1h->g1_reserved();
2897   double now = os::elapsedTime();
2898 
2899   // Print the header of the output.
2900   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2901   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2902                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2903                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2904                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2905                           HeapRegion::GrainBytes);
2906   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2907   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2908                           G1PPRL_TYPE_H_FORMAT
2909                           G1PPRL_ADDR_BASE_H_FORMAT
2910                           G1PPRL_BYTE_H_FORMAT
2911                           G1PPRL_BYTE_H_FORMAT
2912                           G1PPRL_BYTE_H_FORMAT
2913                           G1PPRL_DOUBLE_H_FORMAT
2914                           G1PPRL_BYTE_H_FORMAT
2915                           G1PPRL_BYTE_H_FORMAT,
2916                           "type", "address-range",
2917                           "used", "prev-live", "next-live", "gc-eff",
2918                           "remset", "code-roots");
2919   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2920                           G1PPRL_TYPE_H_FORMAT
2921                           G1PPRL_ADDR_BASE_H_FORMAT
2922                           G1PPRL_BYTE_H_FORMAT
2923                           G1PPRL_BYTE_H_FORMAT
2924                           G1PPRL_BYTE_H_FORMAT
2925                           G1PPRL_DOUBLE_H_FORMAT
2926                           G1PPRL_BYTE_H_FORMAT
2927                           G1PPRL_BYTE_H_FORMAT,
2928                           "", "",
2929                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2930                           "(bytes)", "(bytes)");
2931 }
2932 
2933 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2934   const char* type       = r->get_type_str();
2935   HeapWord* bottom       = r->bottom();
2936   HeapWord* end          = r->end();
2937   size_t capacity_bytes  = r->capacity();
2938   size_t used_bytes      = r->used();
2939   size_t prev_live_bytes = r->live_bytes();
2940   size_t next_live_bytes = r->next_live_bytes();
2941   double gc_eff          = r->gc_efficiency();
2942   size_t remset_bytes    = r->rem_set()->mem_size();
2943   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2944 
2945   _total_used_bytes      += used_bytes;
2946   _total_capacity_bytes  += capacity_bytes;
2947   _total_prev_live_bytes += prev_live_bytes;
2948   _total_next_live_bytes += next_live_bytes;
2949   _total_remset_bytes    += remset_bytes;
2950   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2951 
2952   // Print a line for this particular region.
2953   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2954                           G1PPRL_TYPE_FORMAT
2955                           G1PPRL_ADDR_BASE_FORMAT
2956                           G1PPRL_BYTE_FORMAT
2957                           G1PPRL_BYTE_FORMAT
2958                           G1PPRL_BYTE_FORMAT
2959                           G1PPRL_DOUBLE_FORMAT
2960                           G1PPRL_BYTE_FORMAT
2961                           G1PPRL_BYTE_FORMAT,
2962                           type, p2i(bottom), p2i(end),
2963                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2964                           remset_bytes, strong_code_roots_bytes);
2965 
2966   return false;
2967 }
2968 
2969 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2970   // add static memory usages to remembered set sizes
2971   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2972   // Print the footer of the output.
2973   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2974   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2975                          " SUMMARY"
2976                          G1PPRL_SUM_MB_FORMAT("capacity")
2977                          G1PPRL_SUM_MB_PERC_FORMAT("used")
2978                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
2979                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
2980                          G1PPRL_SUM_MB_FORMAT("remset")
2981                          G1PPRL_SUM_MB_FORMAT("code-roots"),
2982                          bytes_to_mb(_total_capacity_bytes),
2983                          bytes_to_mb(_total_used_bytes),
2984                          percent_of(_total_used_bytes, _total_capacity_bytes),
2985                          bytes_to_mb(_total_prev_live_bytes),
2986                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
2987                          bytes_to_mb(_total_next_live_bytes),
2988                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
2989                          bytes_to_mb(_total_remset_bytes),
2990                          bytes_to_mb(_total_strong_code_roots_bytes));
2991 }