New src/hotspot/share/gc/g1/g1ConcurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 #include "utilities/align.hpp"
  61 #include "utilities/growableArray.hpp"
  62 
  63 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  64   assert(addr < _cm->finger(), "invariant");
  65   assert(addr >= _task->finger(), "invariant");
  66 
  67   // We move that task's local finger along.
  68   _task->move_finger_to(addr);
  69 
  70   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  71   // we only partially drain the local queue and global stack
  72   _task->drain_local_queue(true);
  73   _task->drain_global_stack(true);
  74 
  75   // if the has_aborted flag has been raised, we need to bail out of
  76   // the iteration
  77   return !_task->has_aborted();
  78 }
  79 
  80 G1CMMarkStack::G1CMMarkStack() :
  81   _max_chunk_capacity(0),
  82   _base(NULL),
  83   _chunk_capacity(0) {
  84   set_empty();
  85 }
  86 
  87 bool G1CMMarkStack::resize(size_t new_capacity) {
  88   assert(is_empty(), "Only resize when stack is empty.");
  89   assert(new_capacity <= _max_chunk_capacity,
  90          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  91 
  92   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  93 
  94   if (new_base == NULL) {
  95     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
  96     return false;
  97   }
  98   // Release old mapping.
  99   if (_base != NULL) {
 100     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 101   }
 102 
 103   _base = new_base;
 104   _chunk_capacity = new_capacity;
 105   set_empty();
 106 
 107   return true;
 108 }
 109 
 110 size_t G1CMMarkStack::capacity_alignment() {
 111   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 112 }
 113 
 114 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 115   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 116 
 117   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 118 
 119   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 120   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 121 
 122   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 123             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 124             _max_chunk_capacity,
 125             initial_chunk_capacity);
 126 
 127   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 128                 initial_chunk_capacity, _max_chunk_capacity);
 129 
 130   return resize(initial_chunk_capacity);
 131 }
 132 
 133 void G1CMMarkStack::expand() {
 134   if (_chunk_capacity == _max_chunk_capacity) {
 135     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 136     return;
 137   }
 138   size_t old_capacity = _chunk_capacity;
 139   // Double capacity if possible
 140   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 141 
 142   if (resize(new_capacity)) {
 143     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 144                   old_capacity, new_capacity);
 145   } else {
 146     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 147                     old_capacity, new_capacity);
 148   }
 149 }
 150 
 151 G1CMMarkStack::~G1CMMarkStack() {
 152   if (_base != NULL) {
 153     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 154   }
 155 }
 156 
 157 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 158   elem->next = *list;
 159   *list = elem;
 160 }
 161 
 162 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 163   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 164   add_chunk_to_list(&_chunk_list, elem);
 165   _chunks_in_chunk_list++;
 166 }
 167 
 168 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 169   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 170   add_chunk_to_list(&_free_list, elem);
 171 }
 172 
 173 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 174   TaskQueueEntryChunk* result = *list;
 175   if (result != NULL) {
 176     *list = (*list)->next;
 177   }
 178   return result;
 179 }
 180 
 181 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 182   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 183   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 184   if (result != NULL) {
 185     _chunks_in_chunk_list--;
 186   }
 187   return result;
 188 }
 189 
 190 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 191   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 192   return remove_chunk_from_list(&_free_list);
 193 }
 194 
 195 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 196   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 197   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 198   // wraparound of _hwm.
 199   if (_hwm >= _chunk_capacity) {
 200     return NULL;
 201   }
 202 
 203   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 204   if (cur_idx >= _chunk_capacity) {
 205     return NULL;
 206   }
 207 
 208   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 209   result->next = NULL;
 210   return result;
 211 }
 212 
 213 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 214   // Get a new chunk.
 215   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 216 
 217   if (new_chunk == NULL) {
 218     // Did not get a chunk from the free list. Allocate from backing memory.
 219     new_chunk = allocate_new_chunk();
 220 
 221     if (new_chunk == NULL) {
 222       return false;
 223     }
 224   }
 225 
 226   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 227 
 228   add_chunk_to_chunk_list(new_chunk);
 229 
 230   return true;
 231 }
 232 
 233 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 234   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 235 
 236   if (cur == NULL) {
 237     return false;
 238   }
 239 
 240   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 241 
 242   add_chunk_to_free_list(cur);
 243   return true;
 244 }
 245 
 246 void G1CMMarkStack::set_empty() {
 247   _chunks_in_chunk_list = 0;
 248   _hwm = 0;
 249   _chunk_list = NULL;
 250   _free_list = NULL;
 251 }
 252 
 253 G1CMRootRegions::G1CMRootRegions() :
 254   _cm(NULL), _scan_in_progress(false),
 255   _should_abort(false), _claimed_survivor_index(0) { }
 256 
 257 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 258   _survivors = survivors;
 259   _cm = cm;
 260 }
 261 
 262 void G1CMRootRegions::prepare_for_scan() {
 263   assert(!scan_in_progress(), "pre-condition");
 264 
 265   // Currently, only survivors can be root regions.
 266   _claimed_survivor_index = 0;
 267   _scan_in_progress = _survivors->regions()->is_nonempty();
 268   _should_abort = false;
 269 }
 270 
 271 HeapRegion* G1CMRootRegions::claim_next() {
 272   if (_should_abort) {
 273     // If someone has set the should_abort flag, we return NULL to
 274     // force the caller to bail out of their loop.
 275     return NULL;
 276   }
 277 
 278   // Currently, only survivors can be root regions.
 279   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 280 
 281   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 282   if (claimed_index < survivor_regions->length()) {
 283     return survivor_regions->at(claimed_index);
 284   }
 285   return NULL;
 286 }
 287 
 288 uint G1CMRootRegions::num_root_regions() const {
 289   return (uint)_survivors->regions()->length();
 290 }
 291 
 292 void G1CMRootRegions::notify_scan_done() {
 293   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 294   _scan_in_progress = false;
 295   RootRegionScan_lock->notify_all();
 296 }
 297 
 298 void G1CMRootRegions::cancel_scan() {
 299   notify_scan_done();
 300 }
 301 
 302 void G1CMRootRegions::scan_finished() {
 303   assert(scan_in_progress(), "pre-condition");
 304 
 305   // Currently, only survivors can be root regions.
 306   if (!_should_abort) {
 307     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 308     assert((uint)_claimed_survivor_index >= _survivors->length(),
 309            "we should have claimed all survivors, claimed index = %u, length = %u",
 310            (uint)_claimed_survivor_index, _survivors->length());
 311   }
 312 
 313   notify_scan_done();
 314 }
 315 
 316 bool G1CMRootRegions::wait_until_scan_finished() {
 317   if (!scan_in_progress()) return false;
 318 
 319   {
 320     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 321     while (scan_in_progress()) {
 322       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 323     }
 324   }
 325   return true;
 326 }
 327 
 328 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 329   return MAX2((n_par_threads + 2) / 4, 1U);
 330 }
 331 
 332 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 333   _g1h(g1h),
 334   _markBitMap1(),
 335   _markBitMap2(),
 336   _parallel_marking_threads(0),
 337   _max_parallel_marking_threads(0),
 338   _sleep_factor(0.0),
 339   _marking_task_overhead(1.0),
 340   _cleanup_list("Cleanup List"),
 341 
 342   _prevMarkBitMap(&_markBitMap1),
 343   _nextMarkBitMap(&_markBitMap2),
 344 
 345   _global_mark_stack(),
 346   // _finger set in set_non_marking_state
 347 
 348   _max_worker_id(ParallelGCThreads),
 349   // _active_tasks set in set_non_marking_state
 350   // _tasks set inside the constructor
 351   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 352   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 353 
 354   _has_overflown(false),
 355   _concurrent(false),
 356   _has_aborted(false),
 357   _restart_for_overflow(false),
 358   _concurrent_marking_in_progress(false),
 359   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 360   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 361 
 362   // _verbose_level set below
 363 
 364   _init_times(),
 365   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 366   _cleanup_times(),
 367   _total_counting_time(0.0),
 368   _total_rs_scrub_time(0.0),
 369 
 370   _parallel_workers(NULL),
 371 
 372   _completed_initialization(false) {
 373 
 374   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 375   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 376 
 377   // Create & start a ConcurrentMark thread.
 378   _cmThread = new ConcurrentMarkThread(this);
 379   assert(cmThread() != NULL, "CM Thread should have been created");
 380   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 381   if (_cmThread->osthread() == NULL) {
 382       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 383   }
 384 
 385   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 386 
 387   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 388   satb_qs.set_buffer_size(G1SATBBufferSize);
 389 
 390   _root_regions.init(_g1h->survivor(), this);
 391 
 392   if (ConcGCThreads > ParallelGCThreads) {
 393     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 394                     ConcGCThreads, ParallelGCThreads);
 395     return;
 396   }
 397   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 398     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 399     // if both are set
 400     _sleep_factor             = 0.0;
 401     _marking_task_overhead    = 1.0;
 402   } else if (G1MarkingOverheadPercent > 0) {
 403     // We will calculate the number of parallel marking threads based
 404     // on a target overhead with respect to the soft real-time goal
 405     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 406     double overall_cm_overhead =
 407       (double) MaxGCPauseMillis * marking_overhead /
 408       (double) GCPauseIntervalMillis;
 409     double cpu_ratio = 1.0 / os::initial_active_processor_count();
 410     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 411     double marking_task_overhead =
 412       overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
 413     double sleep_factor =
 414                        (1.0 - marking_task_overhead) / marking_task_overhead;
 415 
 416     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 417     _sleep_factor             = sleep_factor;
 418     _marking_task_overhead    = marking_task_overhead;
 419   } else {
 420     // Calculate the number of parallel marking threads by scaling
 421     // the number of parallel GC threads.
 422     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 423     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 424     _sleep_factor             = 0.0;
 425     _marking_task_overhead    = 1.0;
 426   }
 427 
 428   assert(ConcGCThreads > 0, "Should have been set");
 429   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 430   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 431   _parallel_marking_threads = ConcGCThreads;
 432   _max_parallel_marking_threads = _parallel_marking_threads;
 433 
 434   _parallel_workers = new WorkGang("G1 Marker",
 435        _max_parallel_marking_threads, false, true);
 436   if (_parallel_workers == NULL) {
 437     vm_exit_during_initialization("Failed necessary allocation.");
 438   } else {
 439     _parallel_workers->initialize_workers();
 440   }
 441 
 442   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 443     size_t mark_stack_size =
 444       MIN2(MarkStackSizeMax,
 445           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 446     // Verify that the calculated value for MarkStackSize is in range.
 447     // It would be nice to use the private utility routine from Arguments.
 448     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 449       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 450                       "must be between 1 and " SIZE_FORMAT,
 451                       mark_stack_size, MarkStackSizeMax);
 452       return;
 453     }
 454     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 455   } else {
 456     // Verify MarkStackSize is in range.
 457     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 458       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 459         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 460           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 461                           "must be between 1 and " SIZE_FORMAT,
 462                           MarkStackSize, MarkStackSizeMax);
 463           return;
 464         }
 465       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 466         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 467           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 468                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 469                           MarkStackSize, MarkStackSizeMax);
 470           return;
 471         }
 472       }
 473     }
 474   }
 475 
 476   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 477     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 478   }
 479 
 480   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 481   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 482 
 483   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 484   _active_tasks = _max_worker_id;
 485 
 486   for (uint i = 0; i < _max_worker_id; ++i) {
 487     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 488     task_queue->initialize();
 489     _task_queues->register_queue(i, task_queue);
 490 
 491     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 492 
 493     _accum_task_vtime[i] = 0.0;
 494   }
 495 
 496   // so that the call below can read a sensible value
 497   _heap_start = g1h->reserved_region().start();
 498   set_non_marking_state();
 499   _completed_initialization = true;
 500 }
 501 
 502 void G1ConcurrentMark::reset() {
 503   // Starting values for these two. This should be called in a STW
 504   // phase.
 505   MemRegion reserved = _g1h->g1_reserved();
 506   _heap_start = reserved.start();
 507   _heap_end   = reserved.end();
 508 
 509   // Separated the asserts so that we know which one fires.
 510   assert(_heap_start != NULL, "heap bounds should look ok");
 511   assert(_heap_end != NULL, "heap bounds should look ok");
 512   assert(_heap_start < _heap_end, "heap bounds should look ok");
 513 
 514   // Reset all the marking data structures and any necessary flags
 515   reset_marking_state();
 516 
 517   // We do reset all of them, since different phases will use
 518   // different number of active threads. So, it's easiest to have all
 519   // of them ready.
 520   for (uint i = 0; i < _max_worker_id; ++i) {
 521     _tasks[i]->reset(_nextMarkBitMap);
 522   }
 523 
 524   // we need this to make sure that the flag is on during the evac
 525   // pause with initial mark piggy-backed
 526   set_concurrent_marking_in_progress();
 527 }
 528 
 529 
 530 void G1ConcurrentMark::reset_marking_state() {
 531   _global_mark_stack.set_empty();
 532 
 533   // Expand the marking stack, if we have to and if we can.
 534   if (has_overflown()) {
 535     _global_mark_stack.expand();
 536   }
 537 
 538   clear_has_overflown();
 539   _finger = _heap_start;
 540 
 541   for (uint i = 0; i < _max_worker_id; ++i) {
 542     G1CMTaskQueue* queue = _task_queues->queue(i);
 543     queue->set_empty();
 544   }
 545 }
 546 
 547 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 548   assert(active_tasks <= _max_worker_id, "we should not have more");
 549 
 550   _active_tasks = active_tasks;
 551   // Need to update the three data structures below according to the
 552   // number of active threads for this phase.
 553   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 554   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 555   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 556 }
 557 
 558 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 559   set_concurrency(active_tasks);
 560 
 561   _concurrent = concurrent;
 562   // We propagate this to all tasks, not just the active ones.
 563   for (uint i = 0; i < _max_worker_id; ++i)
 564     _tasks[i]->set_concurrent(concurrent);
 565 
 566   if (concurrent) {
 567     set_concurrent_marking_in_progress();
 568   } else {
 569     // We currently assume that the concurrent flag has been set to
 570     // false before we start remark. At this point we should also be
 571     // in a STW phase.
 572     assert(!concurrent_marking_in_progress(), "invariant");
 573     assert(out_of_regions(),
 574            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 575            p2i(_finger), p2i(_heap_end));
 576   }
 577 }
 578 
 579 void G1ConcurrentMark::set_non_marking_state() {
 580   // We set the global marking state to some default values when we're
 581   // not doing marking.
 582   reset_marking_state();
 583   _active_tasks = 0;
 584   clear_concurrent_marking_in_progress();
 585 }
 586 
 587 G1ConcurrentMark::~G1ConcurrentMark() {
 588   // The G1ConcurrentMark instance is never freed.
 589   ShouldNotReachHere();
 590 }
 591 
 592 class G1ClearBitMapTask : public AbstractGangTask {
 593 public:
 594   static size_t chunk_size() { return M; }
 595 
 596 private:
 597   // Heap region closure used for clearing the given mark bitmap.
 598   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 599   private:
 600     G1CMBitMap* _bitmap;
 601     G1ConcurrentMark* _cm;
 602   public:
 603     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 604     }
 605 
 606     virtual bool doHeapRegion(HeapRegion* r) {
 607       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 608 
 609       HeapWord* cur = r->bottom();
 610       HeapWord* const end = r->end();
 611 
 612       while (cur < end) {
 613         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 614         _bitmap->clear_range(mr);
 615 
 616         cur += chunk_size_in_words;
 617 
 618         // Abort iteration if after yielding the marking has been aborted.
 619         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 620           return true;
 621         }
 622         // Repeat the asserts from before the start of the closure. We will do them
 623         // as asserts here to minimize their overhead on the product. However, we
 624         // will have them as guarantees at the beginning / end of the bitmap
 625         // clearing to get some checking in the product.
 626         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 627         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 628       }
 629       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 630 
 631       return false;
 632     }
 633   };
 634 
 635   G1ClearBitmapHRClosure _cl;
 636   HeapRegionClaimer _hr_claimer;
 637   bool _suspendible; // If the task is suspendible, workers must join the STS.
 638 
 639 public:
 640   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 641     AbstractGangTask("G1 Clear Bitmap"),
 642     _cl(bitmap, suspendible ? cm : NULL),
 643     _hr_claimer(n_workers),
 644     _suspendible(suspendible)
 645   { }
 646 
 647   void work(uint worker_id) {
 648     SuspendibleThreadSetJoiner sts_join(_suspendible);
 649     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
 650   }
 651 
 652   bool is_complete() {
 653     return _cl.complete();
 654   }
 655 };
 656 
 657 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 658   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 659 
 660   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 661   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 662 
 663   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 664 
 665   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 666 
 667   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 668   workers->run_task(&cl, num_workers);
 669   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 670 }
 671 
 672 void G1ConcurrentMark::cleanup_for_next_mark() {
 673   // Make sure that the concurrent mark thread looks to still be in
 674   // the current cycle.
 675   guarantee(cmThread()->during_cycle(), "invariant");
 676 
 677   // We are finishing up the current cycle by clearing the next
 678   // marking bitmap and getting it ready for the next cycle. During
 679   // this time no other cycle can start. So, let's make sure that this
 680   // is the case.
 681   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 682 
 683   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 684 
 685   // Clear the live count data. If the marking has been aborted, the abort()
 686   // call already did that.
 687   if (!has_aborted()) {
 688     clear_live_data(_parallel_workers);
 689     DEBUG_ONLY(verify_live_data_clear());
 690   }
 691 
 692   // Repeat the asserts from above.
 693   guarantee(cmThread()->during_cycle(), "invariant");
 694   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 695 }
 696 
 697 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 698   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 699   clear_bitmap(_prevMarkBitMap, workers, false);
 700 }
 701 
 702 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 703   G1CMBitMap* _bitmap;
 704   bool _error;
 705  public:
 706   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 707   }
 708 
 709   virtual bool doHeapRegion(HeapRegion* r) {
 710     // This closure can be called concurrently to the mutator, so we must make sure
 711     // that the result of the getNextMarkedWordAddress() call is compared to the
 712     // value passed to it as limit to detect any found bits.
 713     // end never changes in G1.
 714     HeapWord* end = r->end();
 715     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 716   }
 717 };
 718 
 719 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 720   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 721   _g1h->heap_region_iterate(&cl);
 722   return cl.complete();
 723 }
 724 
 725 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 726 public:
 727   bool doHeapRegion(HeapRegion* r) {
 728     r->note_start_of_marking();
 729     return false;
 730   }
 731 };
 732 
 733 void G1ConcurrentMark::checkpointRootsInitialPre() {
 734   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 735 
 736   _has_aborted = false;
 737 
 738   // Initialize marking structures. This has to be done in a STW phase.
 739   reset();
 740 
 741   // For each region note start of marking.
 742   NoteStartOfMarkHRClosure startcl;
 743   g1h->heap_region_iterate(&startcl);
 744 }
 745 
 746 
 747 void G1ConcurrentMark::checkpointRootsInitialPost() {
 748   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 749 
 750   // Start Concurrent Marking weak-reference discovery.
 751   ReferenceProcessor* rp = g1h->ref_processor_cm();
 752   // enable ("weak") refs discovery
 753   rp->enable_discovery();
 754   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 755 
 756   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 757   // This is the start of  the marking cycle, we're expected all
 758   // threads to have SATB queues with active set to false.
 759   satb_mq_set.set_active_all_threads(true, /* new active value */
 760                                      false /* expected_active */);
 761 
 762   _root_regions.prepare_for_scan();
 763 
 764   // update_g1_committed() will be called at the end of an evac pause
 765   // when marking is on. So, it's also called at the end of the
 766   // initial-mark pause to update the heap end, if the heap expands
 767   // during it. No need to call it here.
 768 }
 769 
 770 /*
 771  * Notice that in the next two methods, we actually leave the STS
 772  * during the barrier sync and join it immediately afterwards. If we
 773  * do not do this, the following deadlock can occur: one thread could
 774  * be in the barrier sync code, waiting for the other thread to also
 775  * sync up, whereas another one could be trying to yield, while also
 776  * waiting for the other threads to sync up too.
 777  *
 778  * Note, however, that this code is also used during remark and in
 779  * this case we should not attempt to leave / enter the STS, otherwise
 780  * we'll either hit an assert (debug / fastdebug) or deadlock
 781  * (product). So we should only leave / enter the STS if we are
 782  * operating concurrently.
 783  *
 784  * Because the thread that does the sync barrier has left the STS, it
 785  * is possible to be suspended for a Full GC or an evacuation pause
 786  * could occur. This is actually safe, since the entering the sync
 787  * barrier is one of the last things do_marking_step() does, and it
 788  * doesn't manipulate any data structures afterwards.
 789  */
 790 
 791 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 792   bool barrier_aborted;
 793   {
 794     SuspendibleThreadSetLeaver sts_leave(concurrent());
 795     barrier_aborted = !_first_overflow_barrier_sync.enter();
 796   }
 797 
 798   // at this point everyone should have synced up and not be doing any
 799   // more work
 800 
 801   if (barrier_aborted) {
 802     // If the barrier aborted we ignore the overflow condition and
 803     // just abort the whole marking phase as quickly as possible.
 804     return;
 805   }
 806 
 807   // If we're executing the concurrent phase of marking, reset the marking
 808   // state; otherwise the marking state is reset after reference processing,
 809   // during the remark pause.
 810   // If we reset here as a result of an overflow during the remark we will
 811   // see assertion failures from any subsequent set_concurrency_and_phase()
 812   // calls.
 813   if (concurrent()) {
 814     // let the task associated with with worker 0 do this
 815     if (worker_id == 0) {
 816       // task 0 is responsible for clearing the global data structures
 817       // We should be here because of an overflow. During STW we should
 818       // not clear the overflow flag since we rely on it being true when
 819       // we exit this method to abort the pause and restart concurrent
 820       // marking.
 821       reset_marking_state();
 822 
 823       log_info(gc, marking)("Concurrent Mark reset for overflow");
 824     }
 825   }
 826 
 827   // after this, each task should reset its own data structures then
 828   // then go into the second barrier
 829 }
 830 
 831 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 832   SuspendibleThreadSetLeaver sts_leave(concurrent());
 833   _second_overflow_barrier_sync.enter();
 834 
 835   // at this point everything should be re-initialized and ready to go
 836 }
 837 
 838 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 839 private:
 840   G1ConcurrentMark*     _cm;
 841   ConcurrentMarkThread* _cmt;
 842 
 843 public:
 844   void work(uint worker_id) {
 845     assert(Thread::current()->is_ConcurrentGC_thread(),
 846            "this should only be done by a conc GC thread");
 847     ResourceMark rm;
 848 
 849     double start_vtime = os::elapsedVTime();
 850 
 851     {
 852       SuspendibleThreadSetJoiner sts_join;
 853 
 854       assert(worker_id < _cm->active_tasks(), "invariant");
 855       G1CMTask* the_task = _cm->task(worker_id);
 856       the_task->record_start_time();
 857       if (!_cm->has_aborted()) {
 858         do {
 859           double start_vtime_sec = os::elapsedVTime();
 860           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 861 
 862           the_task->do_marking_step(mark_step_duration_ms,
 863                                     true  /* do_termination */,
 864                                     false /* is_serial*/);
 865 
 866           double end_vtime_sec = os::elapsedVTime();
 867           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 868           _cm->do_yield_check();
 869 
 870           jlong sleep_time_ms;
 871           if (!_cm->has_aborted() && the_task->has_aborted()) {
 872             sleep_time_ms =
 873               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 874             {
 875               SuspendibleThreadSetLeaver sts_leave;
 876               os::sleep(Thread::current(), sleep_time_ms, false);
 877             }
 878           }
 879         } while (!_cm->has_aborted() && the_task->has_aborted());
 880       }
 881       the_task->record_end_time();
 882       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 883     }
 884 
 885     double end_vtime = os::elapsedVTime();
 886     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 887   }
 888 
 889   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 890                             ConcurrentMarkThread* cmt) :
 891       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 892 
 893   ~G1CMConcurrentMarkingTask() { }
 894 };
 895 
 896 // Calculates the number of active workers for a concurrent
 897 // phase.
 898 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 899   uint n_conc_workers = 0;
 900   if (!UseDynamicNumberOfGCThreads ||
 901       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 902        !ForceDynamicNumberOfGCThreads)) {
 903     n_conc_workers = max_parallel_marking_threads();
 904   } else {
 905     n_conc_workers =
 906       AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
 907                                                       1, /* Minimum workers */
 908                                                       parallel_marking_threads(),
 909                                                       Threads::number_of_non_daemon_threads());
 910     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 911     // that scaling has already gone into "_max_parallel_marking_threads".
 912   }
 913   assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
 914          "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
 915          max_parallel_marking_threads(), n_conc_workers);
 916   return n_conc_workers;
 917 }
 918 
 919 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
 920   // Currently, only survivors can be root regions.
 921   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 922   G1RootRegionScanClosure cl(_g1h, this);
 923 
 924   const uintx interval = PrefetchScanIntervalInBytes;
 925   HeapWord* curr = hr->bottom();
 926   const HeapWord* end = hr->top();
 927   while (curr < end) {
 928     Prefetch::read(curr, interval);
 929     oop obj = oop(curr);
 930     int size = obj->oop_iterate_size(&cl);
 931     assert(size == obj->size(), "sanity");
 932     curr += size;
 933   }
 934 }
 935 
 936 class G1CMRootRegionScanTask : public AbstractGangTask {
 937 private:
 938   G1ConcurrentMark* _cm;
 939 
 940 public:
 941   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 942     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 943 
 944   void work(uint worker_id) {
 945     assert(Thread::current()->is_ConcurrentGC_thread(),
 946            "this should only be done by a conc GC thread");
 947 
 948     G1CMRootRegions* root_regions = _cm->root_regions();
 949     HeapRegion* hr = root_regions->claim_next();
 950     while (hr != NULL) {
 951       _cm->scanRootRegion(hr);
 952       hr = root_regions->claim_next();
 953     }
 954   }
 955 };
 956 
 957 void G1ConcurrentMark::scan_root_regions() {
 958   // scan_in_progress() will have been set to true only if there was
 959   // at least one root region to scan. So, if it's false, we
 960   // should not attempt to do any further work.
 961   if (root_regions()->scan_in_progress()) {
 962     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 963 
 964     _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
 965                                      // We distribute work on a per-region basis, so starting
 966                                      // more threads than that is useless.
 967                                      root_regions()->num_root_regions());
 968     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 969            "Maximum number of marking threads exceeded");
 970 
 971     G1CMRootRegionScanTask task(this);
 972     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 973                         task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
 974     _parallel_workers->run_task(&task, _parallel_marking_threads);
 975 
 976     // It's possible that has_aborted() is true here without actually
 977     // aborting the survivor scan earlier. This is OK as it's
 978     // mainly used for sanity checking.
 979     root_regions()->scan_finished();
 980   }
 981 }
 982 
 983 void G1ConcurrentMark::concurrent_cycle_start() {
 984   _gc_timer_cm->register_gc_start();
 985 
 986   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 987 
 988   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 989 }
 990 
 991 void G1ConcurrentMark::concurrent_cycle_end() {
 992   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 993 
 994   if (has_aborted()) {
 995     _gc_tracer_cm->report_concurrent_mode_failure();
 996   }
 997 
 998   _gc_timer_cm->register_gc_end();
 999 
1000   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1001 }
1002 
1003 void G1ConcurrentMark::mark_from_roots() {
1004   // we might be tempted to assert that:
1005   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1006   //        "inconsistent argument?");
1007   // However that wouldn't be right, because it's possible that
1008   // a safepoint is indeed in progress as a younger generation
1009   // stop-the-world GC happens even as we mark in this generation.
1010 
1011   _restart_for_overflow = false;
1012 
1013   // _g1h has _n_par_threads
1014   _parallel_marking_threads = calc_parallel_marking_threads();
1015   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1016     "Maximum number of marking threads exceeded");
1017 
1018   uint active_workers = MAX2(1U, parallel_marking_threads());
1019   assert(active_workers > 0, "Should have been set");
1020 
1021   // Setting active workers is not guaranteed since fewer
1022   // worker threads may currently exist and more may not be
1023   // available.
1024   active_workers = _parallel_workers->update_active_workers(active_workers);
1025   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1026 
1027   // Parallel task terminator is set in "set_concurrency_and_phase()"
1028   set_concurrency_and_phase(active_workers, true /* concurrent */);
1029 
1030   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1031   _parallel_workers->run_task(&markingTask);
1032   print_stats();
1033 }
1034 
1035 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1036   // world is stopped at this checkpoint
1037   assert(SafepointSynchronize::is_at_safepoint(),
1038          "world should be stopped");
1039 
1040   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1041 
1042   // If a full collection has happened, we shouldn't do this.
1043   if (has_aborted()) {
1044     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1045     return;
1046   }
1047 
1048   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1049 
1050   if (VerifyDuringGC) {
1051     HandleMark hm;  // handle scope
1052     g1h->prepare_for_verify();
1053     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1054   }
1055   g1h->verifier()->check_bitmaps("Remark Start");
1056 
1057   G1Policy* g1p = g1h->g1_policy();
1058   g1p->record_concurrent_mark_remark_start();
1059 
1060   double start = os::elapsedTime();
1061 
1062   checkpointRootsFinalWork();
1063 
1064   double mark_work_end = os::elapsedTime();
1065 
1066   weakRefsWork(clear_all_soft_refs);
1067 
1068   if (has_overflown()) {
1069     // We overflowed.  Restart concurrent marking.
1070     _restart_for_overflow = true;
1071 
1072     // Verify the heap w.r.t. the previous marking bitmap.
1073     if (VerifyDuringGC) {
1074       HandleMark hm;  // handle scope
1075       g1h->prepare_for_verify();
1076       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1077     }
1078 
1079     // Clear the marking state because we will be restarting
1080     // marking due to overflowing the global mark stack.
1081     reset_marking_state();
1082   } else {
1083     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1084     // We're done with marking.
1085     // This is the end of  the marking cycle, we're expected all
1086     // threads to have SATB queues with active set to true.
1087     satb_mq_set.set_active_all_threads(false, /* new active value */
1088                                        true /* expected_active */);
1089 
1090     if (VerifyDuringGC) {
1091       HandleMark hm;  // handle scope
1092       g1h->prepare_for_verify();
1093       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1094     }
1095     g1h->verifier()->check_bitmaps("Remark End");
1096     assert(!restart_for_overflow(), "sanity");
1097     // Completely reset the marking state since marking completed
1098     set_non_marking_state();
1099   }
1100 
1101   // Statistics
1102   double now = os::elapsedTime();
1103   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1104   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1105   _remark_times.add((now - start) * 1000.0);
1106 
1107   g1p->record_concurrent_mark_remark_end();
1108 
1109   G1CMIsAliveClosure is_alive(g1h);
1110   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1111 }
1112 
1113 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1114   G1CollectedHeap* _g1;
1115   size_t _freed_bytes;
1116   FreeRegionList* _local_cleanup_list;
1117   uint _old_regions_removed;
1118   uint _humongous_regions_removed;
1119   HRRSCleanupTask* _hrrs_cleanup_task;
1120 
1121 public:
1122   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1123                              FreeRegionList* local_cleanup_list,
1124                              HRRSCleanupTask* hrrs_cleanup_task) :
1125     _g1(g1),
1126     _freed_bytes(0),
1127     _local_cleanup_list(local_cleanup_list),
1128     _old_regions_removed(0),
1129     _humongous_regions_removed(0),
1130     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1131 
1132   size_t freed_bytes() { return _freed_bytes; }
1133   const uint old_regions_removed() { return _old_regions_removed; }
1134   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1135 
1136   bool doHeapRegion(HeapRegion *hr) {
1137     _g1->reset_gc_time_stamps(hr);
1138     hr->note_end_of_marking();
1139 
1140     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1141       _freed_bytes += hr->used();
1142       hr->set_containing_set(NULL);
1143       if (hr->is_humongous()) {
1144         _humongous_regions_removed++;
1145         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1146       } else {
1147         _old_regions_removed++;
1148         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1149       }
1150     } else {
1151       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1152     }
1153 
1154     return false;
1155   }
1156 };
1157 
1158 class G1ParNoteEndTask: public AbstractGangTask {
1159   friend class G1NoteEndOfConcMarkClosure;
1160 
1161 protected:
1162   G1CollectedHeap* _g1h;
1163   FreeRegionList* _cleanup_list;
1164   HeapRegionClaimer _hrclaimer;
1165 
1166 public:
1167   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1168       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1169   }
1170 
1171   void work(uint worker_id) {
1172     FreeRegionList local_cleanup_list("Local Cleanup List");
1173     HRRSCleanupTask hrrs_cleanup_task;
1174     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1175                                            &hrrs_cleanup_task);
1176     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1177     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1178 
1179     // Now update the lists
1180     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1181     {
1182       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1183       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1184 
1185       // If we iterate over the global cleanup list at the end of
1186       // cleanup to do this printing we will not guarantee to only
1187       // generate output for the newly-reclaimed regions (the list
1188       // might not be empty at the beginning of cleanup; we might
1189       // still be working on its previous contents). So we do the
1190       // printing here, before we append the new regions to the global
1191       // cleanup list.
1192 
1193       G1HRPrinter* hr_printer = _g1h->hr_printer();
1194       if (hr_printer->is_active()) {
1195         FreeRegionListIterator iter(&local_cleanup_list);
1196         while (iter.more_available()) {
1197           HeapRegion* hr = iter.get_next();
1198           hr_printer->cleanup(hr);
1199         }
1200       }
1201 
1202       _cleanup_list->add_ordered(&local_cleanup_list);
1203       assert(local_cleanup_list.is_empty(), "post-condition");
1204 
1205       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1206     }
1207   }
1208 };
1209 
1210 void G1ConcurrentMark::cleanup() {
1211   // world is stopped at this checkpoint
1212   assert(SafepointSynchronize::is_at_safepoint(),
1213          "world should be stopped");
1214   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1215 
1216   // If a full collection has happened, we shouldn't do this.
1217   if (has_aborted()) {
1218     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1219     return;
1220   }
1221 
1222   g1h->verifier()->verify_region_sets_optional();
1223 
1224   if (VerifyDuringGC) {
1225     HandleMark hm;  // handle scope
1226     g1h->prepare_for_verify();
1227     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1228   }
1229   g1h->verifier()->check_bitmaps("Cleanup Start");
1230 
1231   G1Policy* g1p = g1h->g1_policy();
1232   g1p->record_concurrent_mark_cleanup_start();
1233 
1234   double start = os::elapsedTime();
1235 
1236   HeapRegionRemSet::reset_for_cleanup_tasks();
1237 
1238   {
1239     GCTraceTime(Debug, gc)("Finalize Live Data");
1240     finalize_live_data();
1241   }
1242 
1243   if (VerifyDuringGC) {
1244     GCTraceTime(Debug, gc)("Verify Live Data");
1245     verify_live_data();
1246   }
1247 
1248   g1h->collector_state()->set_mark_in_progress(false);
1249 
1250   double count_end = os::elapsedTime();
1251   double this_final_counting_time = (count_end - start);
1252   _total_counting_time += this_final_counting_time;
1253 
1254   if (log_is_enabled(Trace, gc, liveness)) {
1255     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1256     _g1h->heap_region_iterate(&cl);
1257   }
1258 
1259   // Install newly created mark bitMap as "prev".
1260   swapMarkBitMaps();
1261 
1262   g1h->reset_gc_time_stamp();
1263 
1264   uint n_workers = _g1h->workers()->active_workers();
1265 
1266   // Note end of marking in all heap regions.
1267   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1268   g1h->workers()->run_task(&g1_par_note_end_task);
1269   g1h->check_gc_time_stamps();
1270 
1271   if (!cleanup_list_is_empty()) {
1272     // The cleanup list is not empty, so we'll have to process it
1273     // concurrently. Notify anyone else that might be wanting free
1274     // regions that there will be more free regions coming soon.
1275     g1h->set_free_regions_coming();
1276   }
1277 
1278   // call below, since it affects the metric by which we sort the heap
1279   // regions.
1280   if (G1ScrubRemSets) {
1281     double rs_scrub_start = os::elapsedTime();
1282     g1h->scrub_rem_set();
1283     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1284   }
1285 
1286   // this will also free any regions totally full of garbage objects,
1287   // and sort the regions.
1288   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1289 
1290   // Statistics.
1291   double end = os::elapsedTime();
1292   _cleanup_times.add((end - start) * 1000.0);
1293 
1294   // Clean up will have freed any regions completely full of garbage.
1295   // Update the soft reference policy with the new heap occupancy.
1296   Universe::update_heap_info_at_gc();
1297 
1298   if (VerifyDuringGC) {
1299     HandleMark hm;  // handle scope
1300     g1h->prepare_for_verify();
1301     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1302   }
1303 
1304   g1h->verifier()->check_bitmaps("Cleanup End");
1305 
1306   g1h->verifier()->verify_region_sets_optional();
1307 
1308   // We need to make this be a "collection" so any collection pause that
1309   // races with it goes around and waits for completeCleanup to finish.
1310   g1h->increment_total_collections();
1311 
1312   // Clean out dead classes and update Metaspace sizes.
1313   if (ClassUnloadingWithConcurrentMark) {
1314     ClassLoaderDataGraph::purge();
1315   }
1316   MetaspaceGC::compute_new_size();
1317 
1318   // We reclaimed old regions so we should calculate the sizes to make
1319   // sure we update the old gen/space data.
1320   g1h->g1mm()->update_sizes();
1321   g1h->allocation_context_stats().update_after_mark();
1322 }
1323 
1324 void G1ConcurrentMark::complete_cleanup() {
1325   if (has_aborted()) return;
1326 
1327   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1328 
1329   _cleanup_list.verify_optional();
1330   FreeRegionList tmp_free_list("Tmp Free List");
1331 
1332   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1333                                   "cleanup list has %u entries",
1334                                   _cleanup_list.length());
1335 
1336   // No one else should be accessing the _cleanup_list at this point,
1337   // so it is not necessary to take any locks
1338   while (!_cleanup_list.is_empty()) {
1339     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1340     assert(hr != NULL, "Got NULL from a non-empty list");
1341     hr->par_clear();
1342     tmp_free_list.add_ordered(hr);
1343 
1344     // Instead of adding one region at a time to the secondary_free_list,
1345     // we accumulate them in the local list and move them a few at a
1346     // time. This also cuts down on the number of notify_all() calls
1347     // we do during this process. We'll also append the local list when
1348     // _cleanup_list is empty (which means we just removed the last
1349     // region from the _cleanup_list).
1350     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1351         _cleanup_list.is_empty()) {
1352       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1353                                       "appending %u entries to the secondary_free_list, "
1354                                       "cleanup list still has %u entries",
1355                                       tmp_free_list.length(),
1356                                       _cleanup_list.length());
1357 
1358       {
1359         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1360         g1h->secondary_free_list_add(&tmp_free_list);
1361         SecondaryFreeList_lock->notify_all();
1362       }
1363 #ifndef PRODUCT
1364       if (G1StressConcRegionFreeing) {
1365         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1366           os::sleep(Thread::current(), (jlong) 1, false);
1367         }
1368       }
1369 #endif
1370     }
1371   }
1372   assert(tmp_free_list.is_empty(), "post-condition");
1373 }
1374 
1375 // Supporting Object and Oop closures for reference discovery
1376 // and processing in during marking
1377 
1378 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1379   HeapWord* addr = (HeapWord*)obj;
1380   return addr != NULL &&
1381          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1382 }
1383 
1384 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1385 // Uses the G1CMTask associated with a worker thread (for serial reference
1386 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1387 // trace referent objects.
1388 //
1389 // Using the G1CMTask and embedded local queues avoids having the worker
1390 // threads operating on the global mark stack. This reduces the risk
1391 // of overflowing the stack - which we would rather avoid at this late
1392 // state. Also using the tasks' local queues removes the potential
1393 // of the workers interfering with each other that could occur if
1394 // operating on the global stack.
1395 
1396 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1397   G1ConcurrentMark* _cm;
1398   G1CMTask*         _task;
1399   int               _ref_counter_limit;
1400   int               _ref_counter;
1401   bool              _is_serial;
1402  public:
1403   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1404     _cm(cm), _task(task), _is_serial(is_serial),
1405     _ref_counter_limit(G1RefProcDrainInterval) {
1406     assert(_ref_counter_limit > 0, "sanity");
1407     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1408     _ref_counter = _ref_counter_limit;
1409   }
1410 
1411   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1412   virtual void do_oop(      oop* p) { do_oop_work(p); }
1413 
1414   template <class T> void do_oop_work(T* p) {
1415     if (!_cm->has_overflown()) {
1416       oop obj = oopDesc::load_decode_heap_oop(p);
1417       _task->deal_with_reference(obj);
1418       _ref_counter--;
1419 
1420       if (_ref_counter == 0) {
1421         // We have dealt with _ref_counter_limit references, pushing them
1422         // and objects reachable from them on to the local stack (and
1423         // possibly the global stack). Call G1CMTask::do_marking_step() to
1424         // process these entries.
1425         //
1426         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1427         // there's nothing more to do (i.e. we're done with the entries that
1428         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1429         // above) or we overflow.
1430         //
1431         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1432         // flag while there may still be some work to do. (See the comment at
1433         // the beginning of G1CMTask::do_marking_step() for those conditions -
1434         // one of which is reaching the specified time target.) It is only
1435         // when G1CMTask::do_marking_step() returns without setting the
1436         // has_aborted() flag that the marking step has completed.
1437         do {
1438           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1439           _task->do_marking_step(mark_step_duration_ms,
1440                                  false      /* do_termination */,
1441                                  _is_serial);
1442         } while (_task->has_aborted() && !_cm->has_overflown());
1443         _ref_counter = _ref_counter_limit;
1444       }
1445     }
1446   }
1447 };
1448 
1449 // 'Drain' oop closure used by both serial and parallel reference processing.
1450 // Uses the G1CMTask associated with a given worker thread (for serial
1451 // reference processing the G1CMtask for worker 0 is used). Calls the
1452 // do_marking_step routine, with an unbelievably large timeout value,
1453 // to drain the marking data structures of the remaining entries
1454 // added by the 'keep alive' oop closure above.
1455 
1456 class G1CMDrainMarkingStackClosure: public VoidClosure {
1457   G1ConcurrentMark* _cm;
1458   G1CMTask*         _task;
1459   bool              _is_serial;
1460  public:
1461   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1462     _cm(cm), _task(task), _is_serial(is_serial) {
1463     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1464   }
1465 
1466   void do_void() {
1467     do {
1468       // We call G1CMTask::do_marking_step() to completely drain the local
1469       // and global marking stacks of entries pushed by the 'keep alive'
1470       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1471       //
1472       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1473       // if there's nothing more to do (i.e. we've completely drained the
1474       // entries that were pushed as a a result of applying the 'keep alive'
1475       // closure to the entries on the discovered ref lists) or we overflow
1476       // the global marking stack.
1477       //
1478       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1479       // flag while there may still be some work to do. (See the comment at
1480       // the beginning of G1CMTask::do_marking_step() for those conditions -
1481       // one of which is reaching the specified time target.) It is only
1482       // when G1CMTask::do_marking_step() returns without setting the
1483       // has_aborted() flag that the marking step has completed.
1484 
1485       _task->do_marking_step(1000000000.0 /* something very large */,
1486                              true         /* do_termination */,
1487                              _is_serial);
1488     } while (_task->has_aborted() && !_cm->has_overflown());
1489   }
1490 };
1491 
1492 // Implementation of AbstractRefProcTaskExecutor for parallel
1493 // reference processing at the end of G1 concurrent marking
1494 
1495 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1496 private:
1497   G1CollectedHeap*  _g1h;
1498   G1ConcurrentMark* _cm;
1499   WorkGang*         _workers;
1500   uint              _active_workers;
1501 
1502 public:
1503   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1504                           G1ConcurrentMark* cm,
1505                           WorkGang* workers,
1506                           uint n_workers) :
1507     _g1h(g1h), _cm(cm),
1508     _workers(workers), _active_workers(n_workers) { }
1509 
1510   // Executes the given task using concurrent marking worker threads.
1511   virtual void execute(ProcessTask& task);
1512   virtual void execute(EnqueueTask& task);
1513 };
1514 
1515 class G1CMRefProcTaskProxy: public AbstractGangTask {
1516   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1517   ProcessTask&      _proc_task;
1518   G1CollectedHeap*  _g1h;
1519   G1ConcurrentMark* _cm;
1520 
1521 public:
1522   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1523                        G1CollectedHeap* g1h,
1524                        G1ConcurrentMark* cm) :
1525     AbstractGangTask("Process reference objects in parallel"),
1526     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1527     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1528     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1529   }
1530 
1531   virtual void work(uint worker_id) {
1532     ResourceMark rm;
1533     HandleMark hm;
1534     G1CMTask* task = _cm->task(worker_id);
1535     G1CMIsAliveClosure g1_is_alive(_g1h);
1536     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1537     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1538 
1539     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1540   }
1541 };
1542 
1543 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1544   assert(_workers != NULL, "Need parallel worker threads.");
1545   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1546 
1547   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1548 
1549   // We need to reset the concurrency level before each
1550   // proxy task execution, so that the termination protocol
1551   // and overflow handling in G1CMTask::do_marking_step() knows
1552   // how many workers to wait for.
1553   _cm->set_concurrency(_active_workers);
1554   _workers->run_task(&proc_task_proxy);
1555 }
1556 
1557 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1558   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1559   EnqueueTask& _enq_task;
1560 
1561 public:
1562   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1563     AbstractGangTask("Enqueue reference objects in parallel"),
1564     _enq_task(enq_task) { }
1565 
1566   virtual void work(uint worker_id) {
1567     _enq_task.work(worker_id);
1568   }
1569 };
1570 
1571 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1572   assert(_workers != NULL, "Need parallel worker threads.");
1573   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1574 
1575   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1576 
1577   // Not strictly necessary but...
1578   //
1579   // We need to reset the concurrency level before each
1580   // proxy task execution, so that the termination protocol
1581   // and overflow handling in G1CMTask::do_marking_step() knows
1582   // how many workers to wait for.
1583   _cm->set_concurrency(_active_workers);
1584   _workers->run_task(&enq_task_proxy);
1585 }
1586 
1587 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1588   if (has_overflown()) {
1589     // Skip processing the discovered references if we have
1590     // overflown the global marking stack. Reference objects
1591     // only get discovered once so it is OK to not
1592     // de-populate the discovered reference lists. We could have,
1593     // but the only benefit would be that, when marking restarts,
1594     // less reference objects are discovered.
1595     return;
1596   }
1597 
1598   ResourceMark rm;
1599   HandleMark   hm;
1600 
1601   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1602 
1603   // Is alive closure.
1604   G1CMIsAliveClosure g1_is_alive(g1h);
1605 
1606   // Inner scope to exclude the cleaning of the string and symbol
1607   // tables from the displayed time.
1608   {
1609     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1610 
1611     ReferenceProcessor* rp = g1h->ref_processor_cm();
1612 
1613     // See the comment in G1CollectedHeap::ref_processing_init()
1614     // about how reference processing currently works in G1.
1615 
1616     // Set the soft reference policy
1617     rp->setup_policy(clear_all_soft_refs);
1618     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1619 
1620     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1621     // in serial reference processing. Note these closures are also
1622     // used for serially processing (by the the current thread) the
1623     // JNI references during parallel reference processing.
1624     //
1625     // These closures do not need to synchronize with the worker
1626     // threads involved in parallel reference processing as these
1627     // instances are executed serially by the current thread (e.g.
1628     // reference processing is not multi-threaded and is thus
1629     // performed by the current thread instead of a gang worker).
1630     //
1631     // The gang tasks involved in parallel reference processing create
1632     // their own instances of these closures, which do their own
1633     // synchronization among themselves.
1634     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1635     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1636 
1637     // We need at least one active thread. If reference processing
1638     // is not multi-threaded we use the current (VMThread) thread,
1639     // otherwise we use the work gang from the G1CollectedHeap and
1640     // we utilize all the worker threads we can.
1641     bool processing_is_mt = rp->processing_is_mt();
1642     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1643     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1644 
1645     // Parallel processing task executor.
1646     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1647                                               g1h->workers(), active_workers);
1648     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1649 
1650     // Set the concurrency level. The phase was already set prior to
1651     // executing the remark task.
1652     set_concurrency(active_workers);
1653 
1654     // Set the degree of MT processing here.  If the discovery was done MT,
1655     // the number of threads involved during discovery could differ from
1656     // the number of active workers.  This is OK as long as the discovered
1657     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1658     rp->set_active_mt_degree(active_workers);
1659 
1660     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1661 
1662     // Process the weak references.
1663     const ReferenceProcessorStats& stats =
1664         rp->process_discovered_references(&g1_is_alive,
1665                                           &g1_keep_alive,
1666                                           &g1_drain_mark_stack,
1667                                           executor,
1668                                           &pt);
1669     _gc_tracer_cm->report_gc_reference_stats(stats);
1670     pt.print_all_references();
1671 
1672     // The do_oop work routines of the keep_alive and drain_marking_stack
1673     // oop closures will set the has_overflown flag if we overflow the
1674     // global marking stack.
1675 
1676     assert(has_overflown() || _global_mark_stack.is_empty(),
1677             "Mark stack should be empty (unless it has overflown)");
1678 
1679     assert(rp->num_q() == active_workers, "why not");
1680 
1681     rp->enqueue_discovered_references(executor, &pt);
1682 
1683     rp->verify_no_references_recorded();
1684 
1685     pt.print_enqueue_phase();
1686 
1687     assert(!rp->discovery_enabled(), "Post condition");
1688   }
1689 
1690   if (has_overflown()) {
1691     // We can not trust g1_is_alive if the marking stack overflowed
1692     return;
1693   }
1694 
1695   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1696 
1697   // Unload Klasses, String, Symbols, Code Cache, etc.
1698   if (ClassUnloadingWithConcurrentMark) {
1699     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1700     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1701     g1h->complete_cleaning(&g1_is_alive, purged_classes);
1702   } else {
1703     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1704     // No need to clean string table and symbol table as they are treated as strong roots when
1705     // class unloading is disabled.
1706     g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1707 
1708   }
1709 }
1710 
1711 void G1ConcurrentMark::swapMarkBitMaps() {
1712   G1CMBitMap* temp = _prevMarkBitMap;
1713   _prevMarkBitMap  = _nextMarkBitMap;
1714   _nextMarkBitMap  = temp;
1715 }
1716 
1717 // Closure for marking entries in SATB buffers.
1718 class G1CMSATBBufferClosure : public SATBBufferClosure {
1719 private:
1720   G1CMTask* _task;
1721   G1CollectedHeap* _g1h;
1722 
1723   // This is very similar to G1CMTask::deal_with_reference, but with
1724   // more relaxed requirements for the argument, so this must be more
1725   // circumspect about treating the argument as an object.
1726   void do_entry(void* entry) const {
1727     _task->increment_refs_reached();
1728     oop const obj = static_cast<oop>(entry);
1729     _task->make_reference_grey(obj);
1730   }
1731 
1732 public:
1733   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1734     : _task(task), _g1h(g1h) { }
1735 
1736   virtual void do_buffer(void** buffer, size_t size) {
1737     for (size_t i = 0; i < size; ++i) {
1738       do_entry(buffer[i]);
1739     }
1740   }
1741 };
1742 
1743 class G1RemarkThreadsClosure : public ThreadClosure {
1744   G1CMSATBBufferClosure _cm_satb_cl;
1745   G1CMOopClosure _cm_cl;
1746   MarkingCodeBlobClosure _code_cl;
1747   int _thread_parity;
1748 
1749  public:
1750   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1751     _cm_satb_cl(task, g1h),
1752     _cm_cl(g1h, g1h->concurrent_mark(), task),
1753     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1754     _thread_parity(Threads::thread_claim_parity()) {}
1755 
1756   void do_thread(Thread* thread) {
1757     if (thread->is_Java_thread()) {
1758       if (thread->claim_oops_do(true, _thread_parity)) {
1759         JavaThread* jt = (JavaThread*)thread;
1760 
1761         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1762         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1763         // * Alive if on the stack of an executing method
1764         // * Weakly reachable otherwise
1765         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1766         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1767         jt->nmethods_do(&_code_cl);
1768 
1769         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1770       }
1771     } else if (thread->is_VM_thread()) {
1772       if (thread->claim_oops_do(true, _thread_parity)) {
1773         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1774       }
1775     }
1776   }
1777 };
1778 
1779 class G1CMRemarkTask: public AbstractGangTask {
1780 private:
1781   G1ConcurrentMark* _cm;
1782 public:
1783   void work(uint worker_id) {
1784     // Since all available tasks are actually started, we should
1785     // only proceed if we're supposed to be active.
1786     if (worker_id < _cm->active_tasks()) {
1787       G1CMTask* task = _cm->task(worker_id);
1788       task->record_start_time();
1789       {
1790         ResourceMark rm;
1791         HandleMark hm;
1792 
1793         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1794         Threads::threads_do(&threads_f);
1795       }
1796 
1797       do {
1798         task->do_marking_step(1000000000.0 /* something very large */,
1799                               true         /* do_termination       */,
1800                               false        /* is_serial            */);
1801       } while (task->has_aborted() && !_cm->has_overflown());
1802       // If we overflow, then we do not want to restart. We instead
1803       // want to abort remark and do concurrent marking again.
1804       task->record_end_time();
1805     }
1806   }
1807 
1808   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1809     AbstractGangTask("Par Remark"), _cm(cm) {
1810     _cm->terminator()->reset_for_reuse(active_workers);
1811   }
1812 };
1813 
1814 void G1ConcurrentMark::checkpointRootsFinalWork() {
1815   ResourceMark rm;
1816   HandleMark   hm;
1817   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1818 
1819   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1820 
1821   g1h->ensure_parsability(false);
1822 
1823   // this is remark, so we'll use up all active threads
1824   uint active_workers = g1h->workers()->active_workers();
1825   set_concurrency_and_phase(active_workers, false /* concurrent */);
1826   // Leave _parallel_marking_threads at it's
1827   // value originally calculated in the G1ConcurrentMark
1828   // constructor and pass values of the active workers
1829   // through the gang in the task.
1830 
1831   {
1832     StrongRootsScope srs(active_workers);
1833 
1834     G1CMRemarkTask remarkTask(this, active_workers);
1835     // We will start all available threads, even if we decide that the
1836     // active_workers will be fewer. The extra ones will just bail out
1837     // immediately.
1838     g1h->workers()->run_task(&remarkTask);
1839   }
1840 
1841   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1842   guarantee(has_overflown() ||
1843             satb_mq_set.completed_buffers_num() == 0,
1844             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1845             BOOL_TO_STR(has_overflown()),
1846             satb_mq_set.completed_buffers_num());
1847 
1848   print_stats();
1849 }
1850 
1851 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1852   _prevMarkBitMap->clear_range(mr);
1853 }
1854 
1855 HeapRegion*
1856 G1ConcurrentMark::claim_region(uint worker_id) {
1857   // "checkpoint" the finger
1858   HeapWord* finger = _finger;
1859 
1860   // _heap_end will not change underneath our feet; it only changes at
1861   // yield points.
1862   while (finger < _heap_end) {
1863     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1864 
1865     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1866     // Make sure that the reads below do not float before loading curr_region.
1867     OrderAccess::loadload();
1868     // Above heap_region_containing may return NULL as we always scan claim
1869     // until the end of the heap. In this case, just jump to the next region.
1870     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1871 
1872     // Is the gap between reading the finger and doing the CAS too long?
1873     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1874     if (res == finger && curr_region != NULL) {
1875       // we succeeded
1876       HeapWord*   bottom        = curr_region->bottom();
1877       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1878 
1879       // notice that _finger == end cannot be guaranteed here since,
1880       // someone else might have moved the finger even further
1881       assert(_finger >= end, "the finger should have moved forward");
1882 
1883       if (limit > bottom) {
1884         return curr_region;
1885       } else {
1886         assert(limit == bottom,
1887                "the region limit should be at bottom");
1888         // we return NULL and the caller should try calling
1889         // claim_region() again.
1890         return NULL;
1891       }
1892     } else {
1893       assert(_finger > finger, "the finger should have moved forward");
1894       // read it again
1895       finger = _finger;
1896     }
1897   }
1898 
1899   return NULL;
1900 }
1901 
1902 #ifndef PRODUCT
1903 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1904 private:
1905   G1CollectedHeap* _g1h;
1906   const char* _phase;
1907   int _info;
1908 
1909 public:
1910   VerifyNoCSetOops(const char* phase, int info = -1) :
1911     _g1h(G1CollectedHeap::heap()),
1912     _phase(phase),
1913     _info(info)
1914   { }
1915 
1916   void operator()(G1TaskQueueEntry task_entry) const {
1917     if (task_entry.is_array_slice()) {
1918       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1919       return;
1920     }
1921     guarantee(oopDesc::is_oop(task_entry.obj()),
1922               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1923               p2i(task_entry.obj()), _phase, _info);
1924     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1925               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1926               p2i(task_entry.obj()), _phase, _info);
1927   }
1928 };
1929 
1930 void G1ConcurrentMark::verify_no_cset_oops() {
1931   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1932   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1933     return;
1934   }
1935 
1936   // Verify entries on the global mark stack
1937   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1938 
1939   // Verify entries on the task queues
1940   for (uint i = 0; i < _max_worker_id; ++i) {
1941     G1CMTaskQueue* queue = _task_queues->queue(i);
1942     queue->iterate(VerifyNoCSetOops("Queue", i));
1943   }
1944 
1945   // Verify the global finger
1946   HeapWord* global_finger = finger();
1947   if (global_finger != NULL && global_finger < _heap_end) {
1948     // Since we always iterate over all regions, we might get a NULL HeapRegion
1949     // here.
1950     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1951     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1952               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1953               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1954   }
1955 
1956   // Verify the task fingers
1957   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
1958   for (uint i = 0; i < parallel_marking_threads(); ++i) {
1959     G1CMTask* task = _tasks[i];
1960     HeapWord* task_finger = task->finger();
1961     if (task_finger != NULL && task_finger < _heap_end) {
1962       // See above note on the global finger verification.
1963       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1964       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1965                 !task_hr->in_collection_set(),
1966                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1967                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1968     }
1969   }
1970 }
1971 #endif // PRODUCT
1972 void G1ConcurrentMark::create_live_data() {
1973   _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
1974 }
1975 
1976 void G1ConcurrentMark::finalize_live_data() {
1977   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
1978 }
1979 
1980 void G1ConcurrentMark::verify_live_data() {
1981   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
1982 }
1983 
1984 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
1985   _g1h->g1_rem_set()->clear_card_live_data(workers);
1986 }
1987 
1988 #ifdef ASSERT
1989 void G1ConcurrentMark::verify_live_data_clear() {
1990   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
1991 }
1992 #endif
1993 
1994 void G1ConcurrentMark::print_stats() {
1995   if (!log_is_enabled(Debug, gc, stats)) {
1996     return;
1997   }
1998   log_debug(gc, stats)("---------------------------------------------------------------------");
1999   for (size_t i = 0; i < _active_tasks; ++i) {
2000     _tasks[i]->print_stats();
2001     log_debug(gc, stats)("---------------------------------------------------------------------");
2002   }
2003 }
2004 
2005 void G1ConcurrentMark::abort() {
2006   if (!cmThread()->during_cycle() || _has_aborted) {
2007     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2008     return;
2009   }
2010 
2011   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2012   // concurrent bitmap clearing.
2013   {
2014     GCTraceTime(Debug, gc)("Clear Next Bitmap");
2015     clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2016   }
2017   // Note we cannot clear the previous marking bitmap here
2018   // since VerifyDuringGC verifies the objects marked during
2019   // a full GC against the previous bitmap.
2020 
2021   {
2022     GCTraceTime(Debug, gc)("Clear Live Data");
2023     clear_live_data(_g1h->workers());
2024   }
2025   DEBUG_ONLY({
2026     GCTraceTime(Debug, gc)("Verify Live Data Clear");
2027     verify_live_data_clear();
2028   })
2029   // Empty mark stack
2030   reset_marking_state();
2031   for (uint i = 0; i < _max_worker_id; ++i) {
2032     _tasks[i]->clear_region_fields();
2033   }
2034   _first_overflow_barrier_sync.abort();
2035   _second_overflow_barrier_sync.abort();
2036   _has_aborted = true;
2037 
2038   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2039   satb_mq_set.abandon_partial_marking();
2040   // This can be called either during or outside marking, we'll read
2041   // the expected_active value from the SATB queue set.
2042   satb_mq_set.set_active_all_threads(
2043                                  false, /* new active value */
2044                                  satb_mq_set.is_active() /* expected_active */);
2045 }
2046 
2047 static void print_ms_time_info(const char* prefix, const char* name,
2048                                NumberSeq& ns) {
2049   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2050                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2051   if (ns.num() > 0) {
2052     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2053                            prefix, ns.sd(), ns.maximum());
2054   }
2055 }
2056 
2057 void G1ConcurrentMark::print_summary_info() {
2058   Log(gc, marking) log;
2059   if (!log.is_trace()) {
2060     return;
2061   }
2062 
2063   log.trace(" Concurrent marking:");
2064   print_ms_time_info("  ", "init marks", _init_times);
2065   print_ms_time_info("  ", "remarks", _remark_times);
2066   {
2067     print_ms_time_info("     ", "final marks", _remark_mark_times);
2068     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2069 
2070   }
2071   print_ms_time_info("  ", "cleanups", _cleanup_times);
2072   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2073             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2074   if (G1ScrubRemSets) {
2075     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2076               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2077   }
2078   log.trace("  Total stop_world time = %8.2f s.",
2079             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2080   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2081             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2082 }
2083 
2084 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2085   _parallel_workers->print_worker_threads_on(st);
2086 }
2087 
2088 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2089   _parallel_workers->threads_do(tc);
2090 }
2091 
2092 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2093   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2094       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2095   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2096   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2097 }
2098 
2099 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2100   ReferenceProcessor* result = g1h->ref_processor_cm();
2101   assert(result != NULL, "CM reference processor should not be NULL");
2102   return result;
2103 }
2104 
2105 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2106                                G1ConcurrentMark* cm,
2107                                G1CMTask* task)
2108   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2109     _g1h(g1h), _cm(cm), _task(task)
2110 { }
2111 
2112 void G1CMTask::setup_for_region(HeapRegion* hr) {
2113   assert(hr != NULL,
2114         "claim_region() should have filtered out NULL regions");
2115   _curr_region  = hr;
2116   _finger       = hr->bottom();
2117   update_region_limit();
2118 }
2119 
2120 void G1CMTask::update_region_limit() {
2121   HeapRegion* hr            = _curr_region;
2122   HeapWord* bottom          = hr->bottom();
2123   HeapWord* limit           = hr->next_top_at_mark_start();
2124 
2125   if (limit == bottom) {
2126     // The region was collected underneath our feet.
2127     // We set the finger to bottom to ensure that the bitmap
2128     // iteration that will follow this will not do anything.
2129     // (this is not a condition that holds when we set the region up,
2130     // as the region is not supposed to be empty in the first place)
2131     _finger = bottom;
2132   } else if (limit >= _region_limit) {
2133     assert(limit >= _finger, "peace of mind");
2134   } else {
2135     assert(limit < _region_limit, "only way to get here");
2136     // This can happen under some pretty unusual circumstances.  An
2137     // evacuation pause empties the region underneath our feet (NTAMS
2138     // at bottom). We then do some allocation in the region (NTAMS
2139     // stays at bottom), followed by the region being used as a GC
2140     // alloc region (NTAMS will move to top() and the objects
2141     // originally below it will be grayed). All objects now marked in
2142     // the region are explicitly grayed, if below the global finger,
2143     // and we do not need in fact to scan anything else. So, we simply
2144     // set _finger to be limit to ensure that the bitmap iteration
2145     // doesn't do anything.
2146     _finger = limit;
2147   }
2148 
2149   _region_limit = limit;
2150 }
2151 
2152 void G1CMTask::giveup_current_region() {
2153   assert(_curr_region != NULL, "invariant");
2154   clear_region_fields();
2155 }
2156 
2157 void G1CMTask::clear_region_fields() {
2158   // Values for these three fields that indicate that we're not
2159   // holding on to a region.
2160   _curr_region   = NULL;
2161   _finger        = NULL;
2162   _region_limit  = NULL;
2163 }
2164 
2165 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2166   if (cm_oop_closure == NULL) {
2167     assert(_cm_oop_closure != NULL, "invariant");
2168   } else {
2169     assert(_cm_oop_closure == NULL, "invariant");
2170   }
2171   _cm_oop_closure = cm_oop_closure;
2172 }
2173 
2174 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2175   guarantee(nextMarkBitMap != NULL, "invariant");
2176   _nextMarkBitMap                = nextMarkBitMap;
2177   clear_region_fields();
2178 
2179   _calls                         = 0;
2180   _elapsed_time_ms               = 0.0;
2181   _termination_time_ms           = 0.0;
2182   _termination_start_time_ms     = 0.0;
2183 }
2184 
2185 bool G1CMTask::should_exit_termination() {
2186   regular_clock_call();
2187   // This is called when we are in the termination protocol. We should
2188   // quit if, for some reason, this task wants to abort or the global
2189   // stack is not empty (this means that we can get work from it).
2190   return !_cm->mark_stack_empty() || has_aborted();
2191 }
2192 
2193 void G1CMTask::reached_limit() {
2194   assert(_words_scanned >= _words_scanned_limit ||
2195          _refs_reached >= _refs_reached_limit ,
2196          "shouldn't have been called otherwise");
2197   regular_clock_call();
2198 }
2199 
2200 void G1CMTask::regular_clock_call() {
2201   if (has_aborted()) return;
2202 
2203   // First, we need to recalculate the words scanned and refs reached
2204   // limits for the next clock call.
2205   recalculate_limits();
2206 
2207   // During the regular clock call we do the following
2208 
2209   // (1) If an overflow has been flagged, then we abort.
2210   if (_cm->has_overflown()) {
2211     set_has_aborted();
2212     return;
2213   }
2214 
2215   // If we are not concurrent (i.e. we're doing remark) we don't need
2216   // to check anything else. The other steps are only needed during
2217   // the concurrent marking phase.
2218   if (!concurrent()) return;
2219 
2220   // (2) If marking has been aborted for Full GC, then we also abort.
2221   if (_cm->has_aborted()) {
2222     set_has_aborted();
2223     return;
2224   }
2225 
2226   double curr_time_ms = os::elapsedVTime() * 1000.0;
2227 
2228   // (4) We check whether we should yield. If we have to, then we abort.
2229   if (SuspendibleThreadSet::should_yield()) {
2230     // We should yield. To do this we abort the task. The caller is
2231     // responsible for yielding.
2232     set_has_aborted();
2233     return;
2234   }
2235 
2236   // (5) We check whether we've reached our time quota. If we have,
2237   // then we abort.
2238   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2239   if (elapsed_time_ms > _time_target_ms) {
2240     set_has_aborted();
2241     _has_timed_out = true;
2242     return;
2243   }
2244 
2245   // (6) Finally, we check whether there are enough completed STAB
2246   // buffers available for processing. If there are, we abort.
2247   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2248   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2249     // we do need to process SATB buffers, we'll abort and restart
2250     // the marking task to do so
2251     set_has_aborted();
2252     return;
2253   }
2254 }
2255 
2256 void G1CMTask::recalculate_limits() {
2257   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2258   _words_scanned_limit      = _real_words_scanned_limit;
2259 
2260   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2261   _refs_reached_limit       = _real_refs_reached_limit;
2262 }
2263 
2264 void G1CMTask::decrease_limits() {
2265   // This is called when we believe that we're going to do an infrequent
2266   // operation which will increase the per byte scanned cost (i.e. move
2267   // entries to/from the global stack). It basically tries to decrease the
2268   // scanning limit so that the clock is called earlier.
2269 
2270   _words_scanned_limit = _real_words_scanned_limit -
2271     3 * words_scanned_period / 4;
2272   _refs_reached_limit  = _real_refs_reached_limit -
2273     3 * refs_reached_period / 4;
2274 }
2275 
2276 void G1CMTask::move_entries_to_global_stack() {
2277   // Local array where we'll store the entries that will be popped
2278   // from the local queue.
2279   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2280 
2281   size_t n = 0;
2282   G1TaskQueueEntry task_entry;
2283   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2284     buffer[n] = task_entry;
2285     ++n;
2286   }
2287   if (n < G1CMMarkStack::EntriesPerChunk) {
2288     buffer[n] = G1TaskQueueEntry();
2289   }
2290 
2291   if (n > 0) {
2292     if (!_cm->mark_stack_push(buffer)) {
2293       set_has_aborted();
2294     }
2295   }
2296 
2297   // This operation was quite expensive, so decrease the limits.
2298   decrease_limits();
2299 }
2300 
2301 bool G1CMTask::get_entries_from_global_stack() {
2302   // Local array where we'll store the entries that will be popped
2303   // from the global stack.
2304   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2305 
2306   if (!_cm->mark_stack_pop(buffer)) {
2307     return false;
2308   }
2309 
2310   // We did actually pop at least one entry.
2311   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2312     G1TaskQueueEntry task_entry = buffer[i];
2313     if (task_entry.is_null()) {
2314       break;
2315     }
2316     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2317     bool success = _task_queue->push(task_entry);
2318     // We only call this when the local queue is empty or under a
2319     // given target limit. So, we do not expect this push to fail.
2320     assert(success, "invariant");
2321   }
2322 
2323   // This operation was quite expensive, so decrease the limits
2324   decrease_limits();
2325   return true;
2326 }
2327 
2328 void G1CMTask::drain_local_queue(bool partially) {
2329   if (has_aborted()) {
2330     return;
2331   }
2332 
2333   // Decide what the target size is, depending whether we're going to
2334   // drain it partially (so that other tasks can steal if they run out
2335   // of things to do) or totally (at the very end).
2336   size_t target_size;
2337   if (partially) {
2338     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2339   } else {
2340     target_size = 0;
2341   }
2342 
2343   if (_task_queue->size() > target_size) {
2344     G1TaskQueueEntry entry;
2345     bool ret = _task_queue->pop_local(entry);
2346     while (ret) {
2347       scan_task_entry(entry);
2348       if (_task_queue->size() <= target_size || has_aborted()) {
2349         ret = false;
2350       } else {
2351         ret = _task_queue->pop_local(entry);
2352       }
2353     }
2354   }
2355 }
2356 
2357 void G1CMTask::drain_global_stack(bool partially) {
2358   if (has_aborted()) return;
2359 
2360   // We have a policy to drain the local queue before we attempt to
2361   // drain the global stack.
2362   assert(partially || _task_queue->size() == 0, "invariant");
2363 
2364   // Decide what the target size is, depending whether we're going to
2365   // drain it partially (so that other tasks can steal if they run out
2366   // of things to do) or totally (at the very end).
2367   // Notice that when draining the global mark stack partially, due to the racyness
2368   // of the mark stack size update we might in fact drop below the target. But,
2369   // this is not a problem.
2370   // In case of total draining, we simply process until the global mark stack is
2371   // totally empty, disregarding the size counter.
2372   if (partially) {
2373     size_t const target_size = _cm->partial_mark_stack_size_target();
2374     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2375       if (get_entries_from_global_stack()) {
2376         drain_local_queue(partially);
2377       }
2378     }
2379   } else {
2380     while (!has_aborted() && get_entries_from_global_stack()) {
2381       drain_local_queue(partially);
2382     }
2383   }
2384 }
2385 
2386 // SATB Queue has several assumptions on whether to call the par or
2387 // non-par versions of the methods. this is why some of the code is
2388 // replicated. We should really get rid of the single-threaded version
2389 // of the code to simplify things.
2390 void G1CMTask::drain_satb_buffers() {
2391   if (has_aborted()) return;
2392 
2393   // We set this so that the regular clock knows that we're in the
2394   // middle of draining buffers and doesn't set the abort flag when it
2395   // notices that SATB buffers are available for draining. It'd be
2396   // very counter productive if it did that. :-)
2397   _draining_satb_buffers = true;
2398 
2399   G1CMSATBBufferClosure satb_cl(this, _g1h);
2400   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2401 
2402   // This keeps claiming and applying the closure to completed buffers
2403   // until we run out of buffers or we need to abort.
2404   while (!has_aborted() &&
2405          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2406     regular_clock_call();
2407   }
2408 
2409   _draining_satb_buffers = false;
2410 
2411   assert(has_aborted() ||
2412          concurrent() ||
2413          satb_mq_set.completed_buffers_num() == 0, "invariant");
2414 
2415   // again, this was a potentially expensive operation, decrease the
2416   // limits to get the regular clock call early
2417   decrease_limits();
2418 }
2419 
2420 void G1CMTask::print_stats() {
2421   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2422                        _worker_id, _calls);
2423   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2424                        _elapsed_time_ms, _termination_time_ms);
2425   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2426                        _step_times_ms.num(), _step_times_ms.avg(),
2427                        _step_times_ms.sd());
2428   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2429                        _step_times_ms.maximum(), _step_times_ms.sum());
2430 }
2431 
2432 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2433   return _task_queues->steal(worker_id, hash_seed, task_entry);
2434 }
2435 
2436 /*****************************************************************************
2437 
2438     The do_marking_step(time_target_ms, ...) method is the building
2439     block of the parallel marking framework. It can be called in parallel
2440     with other invocations of do_marking_step() on different tasks
2441     (but only one per task, obviously) and concurrently with the
2442     mutator threads, or during remark, hence it eliminates the need
2443     for two versions of the code. When called during remark, it will
2444     pick up from where the task left off during the concurrent marking
2445     phase. Interestingly, tasks are also claimable during evacuation
2446     pauses too, since do_marking_step() ensures that it aborts before
2447     it needs to yield.
2448 
2449     The data structures that it uses to do marking work are the
2450     following:
2451 
2452       (1) Marking Bitmap. If there are gray objects that appear only
2453       on the bitmap (this happens either when dealing with an overflow
2454       or when the initial marking phase has simply marked the roots
2455       and didn't push them on the stack), then tasks claim heap
2456       regions whose bitmap they then scan to find gray objects. A
2457       global finger indicates where the end of the last claimed region
2458       is. A local finger indicates how far into the region a task has
2459       scanned. The two fingers are used to determine how to gray an
2460       object (i.e. whether simply marking it is OK, as it will be
2461       visited by a task in the future, or whether it needs to be also
2462       pushed on a stack).
2463 
2464       (2) Local Queue. The local queue of the task which is accessed
2465       reasonably efficiently by the task. Other tasks can steal from
2466       it when they run out of work. Throughout the marking phase, a
2467       task attempts to keep its local queue short but not totally
2468       empty, so that entries are available for stealing by other
2469       tasks. Only when there is no more work, a task will totally
2470       drain its local queue.
2471 
2472       (3) Global Mark Stack. This handles local queue overflow. During
2473       marking only sets of entries are moved between it and the local
2474       queues, as access to it requires a mutex and more fine-grain
2475       interaction with it which might cause contention. If it
2476       overflows, then the marking phase should restart and iterate
2477       over the bitmap to identify gray objects. Throughout the marking
2478       phase, tasks attempt to keep the global mark stack at a small
2479       length but not totally empty, so that entries are available for
2480       popping by other tasks. Only when there is no more work, tasks
2481       will totally drain the global mark stack.
2482 
2483       (4) SATB Buffer Queue. This is where completed SATB buffers are
2484       made available. Buffers are regularly removed from this queue
2485       and scanned for roots, so that the queue doesn't get too
2486       long. During remark, all completed buffers are processed, as
2487       well as the filled in parts of any uncompleted buffers.
2488 
2489     The do_marking_step() method tries to abort when the time target
2490     has been reached. There are a few other cases when the
2491     do_marking_step() method also aborts:
2492 
2493       (1) When the marking phase has been aborted (after a Full GC).
2494 
2495       (2) When a global overflow (on the global stack) has been
2496       triggered. Before the task aborts, it will actually sync up with
2497       the other tasks to ensure that all the marking data structures
2498       (local queues, stacks, fingers etc.)  are re-initialized so that
2499       when do_marking_step() completes, the marking phase can
2500       immediately restart.
2501 
2502       (3) When enough completed SATB buffers are available. The
2503       do_marking_step() method only tries to drain SATB buffers right
2504       at the beginning. So, if enough buffers are available, the
2505       marking step aborts and the SATB buffers are processed at
2506       the beginning of the next invocation.
2507 
2508       (4) To yield. when we have to yield then we abort and yield
2509       right at the end of do_marking_step(). This saves us from a lot
2510       of hassle as, by yielding we might allow a Full GC. If this
2511       happens then objects will be compacted underneath our feet, the
2512       heap might shrink, etc. We save checking for this by just
2513       aborting and doing the yield right at the end.
2514 
2515     From the above it follows that the do_marking_step() method should
2516     be called in a loop (or, otherwise, regularly) until it completes.
2517 
2518     If a marking step completes without its has_aborted() flag being
2519     true, it means it has completed the current marking phase (and
2520     also all other marking tasks have done so and have all synced up).
2521 
2522     A method called regular_clock_call() is invoked "regularly" (in
2523     sub ms intervals) throughout marking. It is this clock method that
2524     checks all the abort conditions which were mentioned above and
2525     decides when the task should abort. A work-based scheme is used to
2526     trigger this clock method: when the number of object words the
2527     marking phase has scanned or the number of references the marking
2528     phase has visited reach a given limit. Additional invocations to
2529     the method clock have been planted in a few other strategic places
2530     too. The initial reason for the clock method was to avoid calling
2531     vtime too regularly, as it is quite expensive. So, once it was in
2532     place, it was natural to piggy-back all the other conditions on it
2533     too and not constantly check them throughout the code.
2534 
2535     If do_termination is true then do_marking_step will enter its
2536     termination protocol.
2537 
2538     The value of is_serial must be true when do_marking_step is being
2539     called serially (i.e. by the VMThread) and do_marking_step should
2540     skip any synchronization in the termination and overflow code.
2541     Examples include the serial remark code and the serial reference
2542     processing closures.
2543 
2544     The value of is_serial must be false when do_marking_step is
2545     being called by any of the worker threads in a work gang.
2546     Examples include the concurrent marking code (CMMarkingTask),
2547     the MT remark code, and the MT reference processing closures.
2548 
2549  *****************************************************************************/
2550 
2551 void G1CMTask::do_marking_step(double time_target_ms,
2552                                bool do_termination,
2553                                bool is_serial) {
2554   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2555   assert(concurrent() == _cm->concurrent(), "they should be the same");
2556 
2557   G1Policy* g1_policy = _g1h->g1_policy();
2558   assert(_task_queues != NULL, "invariant");
2559   assert(_task_queue != NULL, "invariant");
2560   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2561 
2562   assert(!_claimed,
2563          "only one thread should claim this task at any one time");
2564 
2565   // OK, this doesn't safeguard again all possible scenarios, as it is
2566   // possible for two threads to set the _claimed flag at the same
2567   // time. But it is only for debugging purposes anyway and it will
2568   // catch most problems.
2569   _claimed = true;
2570 
2571   _start_time_ms = os::elapsedVTime() * 1000.0;
2572 
2573   // If do_stealing is true then do_marking_step will attempt to
2574   // steal work from the other G1CMTasks. It only makes sense to
2575   // enable stealing when the termination protocol is enabled
2576   // and do_marking_step() is not being called serially.
2577   bool do_stealing = do_termination && !is_serial;
2578 
2579   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2580   _time_target_ms = time_target_ms - diff_prediction_ms;
2581 
2582   // set up the variables that are used in the work-based scheme to
2583   // call the regular clock method
2584   _words_scanned = 0;
2585   _refs_reached  = 0;
2586   recalculate_limits();
2587 
2588   // clear all flags
2589   clear_has_aborted();
2590   _has_timed_out = false;
2591   _draining_satb_buffers = false;
2592 
2593   ++_calls;
2594 
2595   // Set up the bitmap and oop closures. Anything that uses them is
2596   // eventually called from this method, so it is OK to allocate these
2597   // statically.
2598   G1CMBitMapClosure bitmap_closure(this, _cm);
2599   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2600   set_cm_oop_closure(&cm_oop_closure);
2601 
2602   if (_cm->has_overflown()) {
2603     // This can happen if the mark stack overflows during a GC pause
2604     // and this task, after a yield point, restarts. We have to abort
2605     // as we need to get into the overflow protocol which happens
2606     // right at the end of this task.
2607     set_has_aborted();
2608   }
2609 
2610   // First drain any available SATB buffers. After this, we will not
2611   // look at SATB buffers before the next invocation of this method.
2612   // If enough completed SATB buffers are queued up, the regular clock
2613   // will abort this task so that it restarts.
2614   drain_satb_buffers();
2615   // ...then partially drain the local queue and the global stack
2616   drain_local_queue(true);
2617   drain_global_stack(true);
2618 
2619   do {
2620     if (!has_aborted() && _curr_region != NULL) {
2621       // This means that we're already holding on to a region.
2622       assert(_finger != NULL, "if region is not NULL, then the finger "
2623              "should not be NULL either");
2624 
2625       // We might have restarted this task after an evacuation pause
2626       // which might have evacuated the region we're holding on to
2627       // underneath our feet. Let's read its limit again to make sure
2628       // that we do not iterate over a region of the heap that
2629       // contains garbage (update_region_limit() will also move
2630       // _finger to the start of the region if it is found empty).
2631       update_region_limit();
2632       // We will start from _finger not from the start of the region,
2633       // as we might be restarting this task after aborting half-way
2634       // through scanning this region. In this case, _finger points to
2635       // the address where we last found a marked object. If this is a
2636       // fresh region, _finger points to start().
2637       MemRegion mr = MemRegion(_finger, _region_limit);
2638 
2639       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2640              "humongous regions should go around loop once only");
2641 
2642       // Some special cases:
2643       // If the memory region is empty, we can just give up the region.
2644       // If the current region is humongous then we only need to check
2645       // the bitmap for the bit associated with the start of the object,
2646       // scan the object if it's live, and give up the region.
2647       // Otherwise, let's iterate over the bitmap of the part of the region
2648       // that is left.
2649       // If the iteration is successful, give up the region.
2650       if (mr.is_empty()) {
2651         giveup_current_region();
2652         regular_clock_call();
2653       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2654         if (_nextMarkBitMap->is_marked(mr.start())) {
2655           // The object is marked - apply the closure
2656           bitmap_closure.do_addr(mr.start());
2657         }
2658         // Even if this task aborted while scanning the humongous object
2659         // we can (and should) give up the current region.
2660         giveup_current_region();
2661         regular_clock_call();
2662       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2663         giveup_current_region();
2664         regular_clock_call();
2665       } else {
2666         assert(has_aborted(), "currently the only way to do so");
2667         // The only way to abort the bitmap iteration is to return
2668         // false from the do_bit() method. However, inside the
2669         // do_bit() method we move the _finger to point to the
2670         // object currently being looked at. So, if we bail out, we
2671         // have definitely set _finger to something non-null.
2672         assert(_finger != NULL, "invariant");
2673 
2674         // Region iteration was actually aborted. So now _finger
2675         // points to the address of the object we last scanned. If we
2676         // leave it there, when we restart this task, we will rescan
2677         // the object. It is easy to avoid this. We move the finger by
2678         // enough to point to the next possible object header.
2679         assert(_finger < _region_limit, "invariant");
2680         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2681         // Check if bitmap iteration was aborted while scanning the last object
2682         if (new_finger >= _region_limit) {
2683           giveup_current_region();
2684         } else {
2685           move_finger_to(new_finger);
2686         }
2687       }
2688     }
2689     // At this point we have either completed iterating over the
2690     // region we were holding on to, or we have aborted.
2691 
2692     // We then partially drain the local queue and the global stack.
2693     // (Do we really need this?)
2694     drain_local_queue(true);
2695     drain_global_stack(true);
2696 
2697     // Read the note on the claim_region() method on why it might
2698     // return NULL with potentially more regions available for
2699     // claiming and why we have to check out_of_regions() to determine
2700     // whether we're done or not.
2701     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2702       // We are going to try to claim a new region. We should have
2703       // given up on the previous one.
2704       // Separated the asserts so that we know which one fires.
2705       assert(_curr_region  == NULL, "invariant");
2706       assert(_finger       == NULL, "invariant");
2707       assert(_region_limit == NULL, "invariant");
2708       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2709       if (claimed_region != NULL) {
2710         // Yes, we managed to claim one
2711         setup_for_region(claimed_region);
2712         assert(_curr_region == claimed_region, "invariant");
2713       }
2714       // It is important to call the regular clock here. It might take
2715       // a while to claim a region if, for example, we hit a large
2716       // block of empty regions. So we need to call the regular clock
2717       // method once round the loop to make sure it's called
2718       // frequently enough.
2719       regular_clock_call();
2720     }
2721 
2722     if (!has_aborted() && _curr_region == NULL) {
2723       assert(_cm->out_of_regions(),
2724              "at this point we should be out of regions");
2725     }
2726   } while ( _curr_region != NULL && !has_aborted());
2727 
2728   if (!has_aborted()) {
2729     // We cannot check whether the global stack is empty, since other
2730     // tasks might be pushing objects to it concurrently.
2731     assert(_cm->out_of_regions(),
2732            "at this point we should be out of regions");
2733     // Try to reduce the number of available SATB buffers so that
2734     // remark has less work to do.
2735     drain_satb_buffers();
2736   }
2737 
2738   // Since we've done everything else, we can now totally drain the
2739   // local queue and global stack.
2740   drain_local_queue(false);
2741   drain_global_stack(false);
2742 
2743   // Attempt at work stealing from other task's queues.
2744   if (do_stealing && !has_aborted()) {
2745     // We have not aborted. This means that we have finished all that
2746     // we could. Let's try to do some stealing...
2747 
2748     // We cannot check whether the global stack is empty, since other
2749     // tasks might be pushing objects to it concurrently.
2750     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2751            "only way to reach here");
2752     while (!has_aborted()) {
2753       G1TaskQueueEntry entry;
2754       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2755         scan_task_entry(entry);
2756 
2757         // And since we're towards the end, let's totally drain the
2758         // local queue and global stack.
2759         drain_local_queue(false);
2760         drain_global_stack(false);
2761       } else {
2762         break;
2763       }
2764     }
2765   }
2766 
2767   // We still haven't aborted. Now, let's try to get into the
2768   // termination protocol.
2769   if (do_termination && !has_aborted()) {
2770     // We cannot check whether the global stack is empty, since other
2771     // tasks might be concurrently pushing objects on it.
2772     // Separated the asserts so that we know which one fires.
2773     assert(_cm->out_of_regions(), "only way to reach here");
2774     assert(_task_queue->size() == 0, "only way to reach here");
2775     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2776 
2777     // The G1CMTask class also extends the TerminatorTerminator class,
2778     // hence its should_exit_termination() method will also decide
2779     // whether to exit the termination protocol or not.
2780     bool finished = (is_serial ||
2781                      _cm->terminator()->offer_termination(this));
2782     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2783     _termination_time_ms +=
2784       termination_end_time_ms - _termination_start_time_ms;
2785 
2786     if (finished) {
2787       // We're all done.
2788 
2789       if (_worker_id == 0) {
2790         // let's allow task 0 to do this
2791         if (concurrent()) {
2792           assert(_cm->concurrent_marking_in_progress(), "invariant");
2793           // we need to set this to false before the next
2794           // safepoint. This way we ensure that the marking phase
2795           // doesn't observe any more heap expansions.
2796           _cm->clear_concurrent_marking_in_progress();
2797         }
2798       }
2799 
2800       // We can now guarantee that the global stack is empty, since
2801       // all other tasks have finished. We separated the guarantees so
2802       // that, if a condition is false, we can immediately find out
2803       // which one.
2804       guarantee(_cm->out_of_regions(), "only way to reach here");
2805       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2806       guarantee(_task_queue->size() == 0, "only way to reach here");
2807       guarantee(!_cm->has_overflown(), "only way to reach here");
2808     } else {
2809       // Apparently there's more work to do. Let's abort this task. It
2810       // will restart it and we can hopefully find more things to do.
2811       set_has_aborted();
2812     }
2813   }
2814 
2815   // Mainly for debugging purposes to make sure that a pointer to the
2816   // closure which was statically allocated in this frame doesn't
2817   // escape it by accident.
2818   set_cm_oop_closure(NULL);
2819   double end_time_ms = os::elapsedVTime() * 1000.0;
2820   double elapsed_time_ms = end_time_ms - _start_time_ms;
2821   // Update the step history.
2822   _step_times_ms.add(elapsed_time_ms);
2823 
2824   if (has_aborted()) {
2825     // The task was aborted for some reason.
2826     if (_has_timed_out) {
2827       double diff_ms = elapsed_time_ms - _time_target_ms;
2828       // Keep statistics of how well we did with respect to hitting
2829       // our target only if we actually timed out (if we aborted for
2830       // other reasons, then the results might get skewed).
2831       _marking_step_diffs_ms.add(diff_ms);
2832     }
2833 
2834     if (_cm->has_overflown()) {
2835       // This is the interesting one. We aborted because a global
2836       // overflow was raised. This means we have to restart the
2837       // marking phase and start iterating over regions. However, in
2838       // order to do this we have to make sure that all tasks stop
2839       // what they are doing and re-initialize in a safe manner. We
2840       // will achieve this with the use of two barrier sync points.
2841 
2842       if (!is_serial) {
2843         // We only need to enter the sync barrier if being called
2844         // from a parallel context
2845         _cm->enter_first_sync_barrier(_worker_id);
2846 
2847         // When we exit this sync barrier we know that all tasks have
2848         // stopped doing marking work. So, it's now safe to
2849         // re-initialize our data structures. At the end of this method,
2850         // task 0 will clear the global data structures.
2851       }
2852 
2853       // We clear the local state of this task...
2854       clear_region_fields();
2855 
2856       if (!is_serial) {
2857         // ...and enter the second barrier.
2858         _cm->enter_second_sync_barrier(_worker_id);
2859       }
2860       // At this point, if we're during the concurrent phase of
2861       // marking, everything has been re-initialized and we're
2862       // ready to restart.
2863     }
2864   }
2865 
2866   _claimed = false;
2867 }
2868 
2869 G1CMTask::G1CMTask(uint worker_id,
2870                    G1ConcurrentMark* cm,
2871                    G1CMTaskQueue* task_queue,
2872                    G1CMTaskQueueSet* task_queues)
2873   : _g1h(G1CollectedHeap::heap()),
2874     _worker_id(worker_id), _cm(cm),
2875     _objArray_processor(this),
2876     _claimed(false),
2877     _nextMarkBitMap(NULL), _hash_seed(17),
2878     _task_queue(task_queue),
2879     _task_queues(task_queues),
2880     _cm_oop_closure(NULL) {
2881   guarantee(task_queue != NULL, "invariant");
2882   guarantee(task_queues != NULL, "invariant");
2883 
2884   _marking_step_diffs_ms.add(0.5);
2885 }
2886 
2887 // These are formatting macros that are used below to ensure
2888 // consistent formatting. The *_H_* versions are used to format the
2889 // header for a particular value and they should be kept consistent
2890 // with the corresponding macro. Also note that most of the macros add
2891 // the necessary white space (as a prefix) which makes them a bit
2892 // easier to compose.
2893 
2894 // All the output lines are prefixed with this string to be able to
2895 // identify them easily in a large log file.
2896 #define G1PPRL_LINE_PREFIX            "###"
2897 
2898 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2899 #ifdef _LP64
2900 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2901 #else // _LP64
2902 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2903 #endif // _LP64
2904 
2905 // For per-region info
2906 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2907 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2908 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2909 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2910 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2911 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2912 
2913 // For summary info
2914 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2915 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2916 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2917 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2918 
2919 G1PrintRegionLivenessInfoClosure::
2920 G1PrintRegionLivenessInfoClosure(const char* phase_name)
2921   : _total_used_bytes(0), _total_capacity_bytes(0),
2922     _total_prev_live_bytes(0), _total_next_live_bytes(0),
2923     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
2924   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2925   MemRegion g1_reserved = g1h->g1_reserved();
2926   double now = os::elapsedTime();
2927 
2928   // Print the header of the output.
2929   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2930   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2931                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2932                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2933                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2934                           HeapRegion::GrainBytes);
2935   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2936   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2937                           G1PPRL_TYPE_H_FORMAT
2938                           G1PPRL_ADDR_BASE_H_FORMAT
2939                           G1PPRL_BYTE_H_FORMAT
2940                           G1PPRL_BYTE_H_FORMAT
2941                           G1PPRL_BYTE_H_FORMAT
2942                           G1PPRL_DOUBLE_H_FORMAT
2943                           G1PPRL_BYTE_H_FORMAT
2944                           G1PPRL_BYTE_H_FORMAT,
2945                           "type", "address-range",
2946                           "used", "prev-live", "next-live", "gc-eff",
2947                           "remset", "code-roots");
2948   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2949                           G1PPRL_TYPE_H_FORMAT
2950                           G1PPRL_ADDR_BASE_H_FORMAT
2951                           G1PPRL_BYTE_H_FORMAT
2952                           G1PPRL_BYTE_H_FORMAT
2953                           G1PPRL_BYTE_H_FORMAT
2954                           G1PPRL_DOUBLE_H_FORMAT
2955                           G1PPRL_BYTE_H_FORMAT
2956                           G1PPRL_BYTE_H_FORMAT,
2957                           "", "",
2958                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2959                           "(bytes)", "(bytes)");
2960 }
2961 
2962 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
2963   const char* type       = r->get_type_str();
2964   HeapWord* bottom       = r->bottom();
2965   HeapWord* end          = r->end();
2966   size_t capacity_bytes  = r->capacity();
2967   size_t used_bytes      = r->used();
2968   size_t prev_live_bytes = r->live_bytes();
2969   size_t next_live_bytes = r->next_live_bytes();
2970   double gc_eff          = r->gc_efficiency();
2971   size_t remset_bytes    = r->rem_set()->mem_size();
2972   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2973 
2974   _total_used_bytes      += used_bytes;
2975   _total_capacity_bytes  += capacity_bytes;
2976   _total_prev_live_bytes += prev_live_bytes;
2977   _total_next_live_bytes += next_live_bytes;
2978   _total_remset_bytes    += remset_bytes;
2979   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2980 
2981   // Print a line for this particular region.
2982   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2983                           G1PPRL_TYPE_FORMAT
2984                           G1PPRL_ADDR_BASE_FORMAT
2985                           G1PPRL_BYTE_FORMAT
2986                           G1PPRL_BYTE_FORMAT
2987                           G1PPRL_BYTE_FORMAT
2988                           G1PPRL_DOUBLE_FORMAT
2989                           G1PPRL_BYTE_FORMAT
2990                           G1PPRL_BYTE_FORMAT,
2991                           type, p2i(bottom), p2i(end),
2992                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2993                           remset_bytes, strong_code_roots_bytes);
2994 
2995   return false;
2996 }
2997 
2998 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2999   // add static memory usages to remembered set sizes
3000   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3001   // Print the footer of the output.
3002   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3003   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3004                          " SUMMARY"
3005                          G1PPRL_SUM_MB_FORMAT("capacity")
3006                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3007                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3008                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3009                          G1PPRL_SUM_MB_FORMAT("remset")
3010                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3011                          bytes_to_mb(_total_capacity_bytes),
3012                          bytes_to_mb(_total_used_bytes),
3013                          perc(_total_used_bytes, _total_capacity_bytes),
3014                          bytes_to_mb(_total_prev_live_bytes),
3015                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3016                          bytes_to_mb(_total_next_live_bytes),
3017                          perc(_total_next_live_bytes, _total_capacity_bytes),
3018                          bytes_to_mb(_total_remset_bytes),
3019                          bytes_to_mb(_total_strong_code_roots_bytes));
3020 }