1 /*
   2  * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 #include "utilities/align.hpp"
  61 #include "utilities/growableArray.hpp"
  62 
  63 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  64   assert(addr < _cm->finger(), "invariant");
  65   assert(addr >= _task->finger(), "invariant");
  66 
  67   // We move that task's local finger along.
  68   _task->move_finger_to(addr);
  69 
  70   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  71   // we only partially drain the local queue and global stack
  72   _task->drain_local_queue(true);
  73   _task->drain_global_stack(true);
  74 
  75   // if the has_aborted flag has been raised, we need to bail out of
  76   // the iteration
  77   return !_task->has_aborted();
  78 }
  79 
  80 G1CMMarkStack::G1CMMarkStack() :
  81   _max_chunk_capacity(0),
  82   _base(NULL),
  83   _chunk_capacity(0) {
  84   set_empty();
  85 }
  86 
  87 bool G1CMMarkStack::resize(size_t new_capacity) {
  88   assert(is_empty(), "Only resize when stack is empty.");
  89   assert(new_capacity <= _max_chunk_capacity,
  90          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  91 
  92   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  93 
  94   if (new_base == NULL) {
  95     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
  96     return false;
  97   }
  98   // Release old mapping.
  99   if (_base != NULL) {
 100     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 101   }
 102 
 103   _base = new_base;
 104   _chunk_capacity = new_capacity;
 105   set_empty();
 106 
 107   return true;
 108 }
 109 
 110 size_t G1CMMarkStack::capacity_alignment() {
 111   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 112 }
 113 
 114 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 115   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 116 
 117   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 118 
 119   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 120   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 121 
 122   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 123             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 124             _max_chunk_capacity,
 125             initial_chunk_capacity);
 126 
 127   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 128                 initial_chunk_capacity, _max_chunk_capacity);
 129 
 130   return resize(initial_chunk_capacity);
 131 }
 132 
 133 void G1CMMarkStack::expand() {
 134   if (_chunk_capacity == _max_chunk_capacity) {
 135     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 136     return;
 137   }
 138   size_t old_capacity = _chunk_capacity;
 139   // Double capacity if possible
 140   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 141 
 142   if (resize(new_capacity)) {
 143     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 144                   old_capacity, new_capacity);
 145   } else {
 146     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 147                     old_capacity, new_capacity);
 148   }
 149 }
 150 
 151 G1CMMarkStack::~G1CMMarkStack() {
 152   if (_base != NULL) {
 153     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 154   }
 155 }
 156 
 157 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 158   elem->next = *list;
 159   *list = elem;
 160 }
 161 
 162 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 163   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 164   add_chunk_to_list(&_chunk_list, elem);
 165   _chunks_in_chunk_list++;
 166 }
 167 
 168 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 169   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 170   add_chunk_to_list(&_free_list, elem);
 171 }
 172 
 173 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 174   TaskQueueEntryChunk* result = *list;
 175   if (result != NULL) {
 176     *list = (*list)->next;
 177   }
 178   return result;
 179 }
 180 
 181 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 182   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 183   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 184   if (result != NULL) {
 185     _chunks_in_chunk_list--;
 186   }
 187   return result;
 188 }
 189 
 190 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 191   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 192   return remove_chunk_from_list(&_free_list);
 193 }
 194 
 195 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 196   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 197   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 198   // wraparound of _hwm.
 199   if (_hwm >= _chunk_capacity) {
 200     return NULL;
 201   }
 202 
 203   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 204   if (cur_idx >= _chunk_capacity) {
 205     return NULL;
 206   }
 207 
 208   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 209   result->next = NULL;
 210   return result;
 211 }
 212 
 213 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 214   // Get a new chunk.
 215   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 216 
 217   if (new_chunk == NULL) {
 218     // Did not get a chunk from the free list. Allocate from backing memory.
 219     new_chunk = allocate_new_chunk();
 220 
 221     if (new_chunk == NULL) {
 222       return false;
 223     }
 224   }
 225 
 226   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 227 
 228   add_chunk_to_chunk_list(new_chunk);
 229 
 230   return true;
 231 }
 232 
 233 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 234   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 235 
 236   if (cur == NULL) {
 237     return false;
 238   }
 239 
 240   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 241 
 242   add_chunk_to_free_list(cur);
 243   return true;
 244 }
 245 
 246 void G1CMMarkStack::set_empty() {
 247   _chunks_in_chunk_list = 0;
 248   _hwm = 0;
 249   _chunk_list = NULL;
 250   _free_list = NULL;
 251 }
 252 
 253 G1CMRootRegions::G1CMRootRegions() :
 254   _cm(NULL), _scan_in_progress(false),
 255   _should_abort(false), _claimed_survivor_index(0) { }
 256 
 257 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 258   _survivors = survivors;
 259   _cm = cm;
 260 }
 261 
 262 void G1CMRootRegions::prepare_for_scan() {
 263   assert(!scan_in_progress(), "pre-condition");
 264 
 265   // Currently, only survivors can be root regions.
 266   _claimed_survivor_index = 0;
 267   _scan_in_progress = _survivors->regions()->is_nonempty();
 268   _should_abort = false;
 269 }
 270 
 271 HeapRegion* G1CMRootRegions::claim_next() {
 272   if (_should_abort) {
 273     // If someone has set the should_abort flag, we return NULL to
 274     // force the caller to bail out of their loop.
 275     return NULL;
 276   }
 277 
 278   // Currently, only survivors can be root regions.
 279   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 280 
 281   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 282   if (claimed_index < survivor_regions->length()) {
 283     return survivor_regions->at(claimed_index);
 284   }
 285   return NULL;
 286 }
 287 
 288 uint G1CMRootRegions::num_root_regions() const {
 289   return (uint)_survivors->regions()->length();
 290 }
 291 
 292 void G1CMRootRegions::notify_scan_done() {
 293   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 294   _scan_in_progress = false;
 295   RootRegionScan_lock->notify_all();
 296 }
 297 
 298 void G1CMRootRegions::cancel_scan() {
 299   notify_scan_done();
 300 }
 301 
 302 void G1CMRootRegions::scan_finished() {
 303   assert(scan_in_progress(), "pre-condition");
 304 
 305   // Currently, only survivors can be root regions.
 306   if (!_should_abort) {
 307     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 308     assert((uint)_claimed_survivor_index >= _survivors->length(),
 309            "we should have claimed all survivors, claimed index = %u, length = %u",
 310            (uint)_claimed_survivor_index, _survivors->length());
 311   }
 312 
 313   notify_scan_done();
 314 }
 315 
 316 bool G1CMRootRegions::wait_until_scan_finished() {
 317   if (!scan_in_progress()) return false;
 318 
 319   {
 320     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 321     while (scan_in_progress()) {
 322       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 323     }
 324   }
 325   return true;
 326 }
 327 
 328 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 329   return MAX2((n_par_threads + 2) / 4, 1U);
 330 }
 331 
 332 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 333   _g1h(g1h),
 334   _markBitMap1(),
 335   _markBitMap2(),
 336   _parallel_marking_threads(0),
 337   _max_parallel_marking_threads(0),
 338   _sleep_factor(0.0),
 339   _marking_task_overhead(1.0),
 340   _cleanup_list("Cleanup List"),
 341 
 342   _prevMarkBitMap(&_markBitMap1),
 343   _nextMarkBitMap(&_markBitMap2),
 344 
 345   _global_mark_stack(),
 346   // _finger set in set_non_marking_state
 347 
 348   _max_worker_id(ParallelGCThreads),
 349   // _active_tasks set in set_non_marking_state
 350   // _tasks set inside the constructor
 351   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 352   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 353 
 354   _has_overflown(false),
 355   _concurrent(false),
 356   _has_aborted(false),
 357   _restart_for_overflow(false),
 358   _concurrent_marking_in_progress(false),
 359   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 360   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 361 
 362   // _verbose_level set below
 363 
 364   _init_times(),
 365   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 366   _cleanup_times(),
 367   _total_counting_time(0.0),
 368   _total_rs_scrub_time(0.0),
 369 
 370   _parallel_workers(NULL),
 371 
 372   _completed_initialization(false) {
 373 
 374   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 375   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 376 
 377   // Create & start a ConcurrentMark thread.
 378   _cmThread = new ConcurrentMarkThread(this);
 379   assert(cmThread() != NULL, "CM Thread should have been created");
 380   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 381   if (_cmThread->osthread() == NULL) {
 382       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 383   }
 384 
 385   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 386 
 387   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 388   satb_qs.set_buffer_size(G1SATBBufferSize);
 389 
 390   _root_regions.init(_g1h->survivor(), this);
 391 
 392   if (ConcGCThreads > ParallelGCThreads) {
 393     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 394                     ConcGCThreads, ParallelGCThreads);
 395     return;
 396   }
 397   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 398     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 399     // if both are set
 400     _sleep_factor             = 0.0;
 401     _marking_task_overhead    = 1.0;
 402   } else if (G1MarkingOverheadPercent > 0) {
 403     // We will calculate the number of parallel marking threads based
 404     // on a target overhead with respect to the soft real-time goal
 405     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 406     double overall_cm_overhead =
 407       (double) MaxGCPauseMillis * marking_overhead /
 408       (double) GCPauseIntervalMillis;
 409     double cpu_ratio = 1.0 / os::initial_active_processor_count();
 410     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 411     double marking_task_overhead =
 412       overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
 413     double sleep_factor =
 414                        (1.0 - marking_task_overhead) / marking_task_overhead;
 415 
 416     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 417     _sleep_factor             = sleep_factor;
 418     _marking_task_overhead    = marking_task_overhead;
 419   } else {
 420     // Calculate the number of parallel marking threads by scaling
 421     // the number of parallel GC threads.
 422     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 423     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 424     _sleep_factor             = 0.0;
 425     _marking_task_overhead    = 1.0;
 426   }
 427 
 428   assert(ConcGCThreads > 0, "Should have been set");
 429   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 430   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 431   _parallel_marking_threads = ConcGCThreads;
 432   _max_parallel_marking_threads = _parallel_marking_threads;
 433 
 434   _parallel_workers = new WorkGang("G1 Marker",
 435        _max_parallel_marking_threads, false, true);
 436   if (_parallel_workers == NULL) {
 437     vm_exit_during_initialization("Failed necessary allocation.");
 438   } else {
 439     _parallel_workers->initialize_workers();
 440   }
 441 
 442   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 443     size_t mark_stack_size =
 444       MIN2(MarkStackSizeMax,
 445           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 446     // Verify that the calculated value for MarkStackSize is in range.
 447     // It would be nice to use the private utility routine from Arguments.
 448     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 449       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 450                       "must be between 1 and " SIZE_FORMAT,
 451                       mark_stack_size, MarkStackSizeMax);
 452       return;
 453     }
 454     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 455   } else {
 456     // Verify MarkStackSize is in range.
 457     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 458       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 459         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 460           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 461                           "must be between 1 and " SIZE_FORMAT,
 462                           MarkStackSize, MarkStackSizeMax);
 463           return;
 464         }
 465       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 466         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 467           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 468                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 469                           MarkStackSize, MarkStackSizeMax);
 470           return;
 471         }
 472       }
 473     }
 474   }
 475 
 476   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 477     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 478   }
 479 
 480   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 481   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 482 
 483   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 484   _active_tasks = _max_worker_id;
 485 
 486   for (uint i = 0; i < _max_worker_id; ++i) {
 487     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 488     task_queue->initialize();
 489     _task_queues->register_queue(i, task_queue);
 490 
 491     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 492 
 493     _accum_task_vtime[i] = 0.0;
 494   }
 495 
 496   // so that the call below can read a sensible value
 497   _heap_start = g1h->reserved_region().start();
 498   set_non_marking_state();
 499   _completed_initialization = true;
 500 }
 501 
 502 void G1ConcurrentMark::reset() {
 503   // Starting values for these two. This should be called in a STW
 504   // phase.
 505   MemRegion reserved = _g1h->g1_reserved();
 506   _heap_start = reserved.start();
 507   _heap_end   = reserved.end();
 508 
 509   // Separated the asserts so that we know which one fires.
 510   assert(_heap_start != NULL, "heap bounds should look ok");
 511   assert(_heap_end != NULL, "heap bounds should look ok");
 512   assert(_heap_start < _heap_end, "heap bounds should look ok");
 513 
 514   // Reset all the marking data structures and any necessary flags
 515   reset_marking_state();
 516 
 517   // We do reset all of them, since different phases will use
 518   // different number of active threads. So, it's easiest to have all
 519   // of them ready.
 520   for (uint i = 0; i < _max_worker_id; ++i) {
 521     _tasks[i]->reset(_nextMarkBitMap);
 522   }
 523 
 524   // we need this to make sure that the flag is on during the evac
 525   // pause with initial mark piggy-backed
 526   set_concurrent_marking_in_progress();
 527 }
 528 
 529 
 530 void G1ConcurrentMark::reset_marking_state() {
 531   _global_mark_stack.set_empty();
 532 
 533   // Expand the marking stack, if we have to and if we can.
 534   if (has_overflown()) {
 535     _global_mark_stack.expand();
 536   }
 537 
 538   clear_has_overflown();
 539   _finger = _heap_start;
 540 
 541   for (uint i = 0; i < _max_worker_id; ++i) {
 542     G1CMTaskQueue* queue = _task_queues->queue(i);
 543     queue->set_empty();
 544   }
 545 }
 546 
 547 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 548   assert(active_tasks <= _max_worker_id, "we should not have more");
 549 
 550   _active_tasks = active_tasks;
 551   // Need to update the three data structures below according to the
 552   // number of active threads for this phase.
 553   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 554   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 555   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 556 }
 557 
 558 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 559   set_concurrency(active_tasks);
 560 
 561   _concurrent = concurrent;
 562   // We propagate this to all tasks, not just the active ones.
 563   for (uint i = 0; i < _max_worker_id; ++i)
 564     _tasks[i]->set_concurrent(concurrent);
 565 
 566   if (concurrent) {
 567     set_concurrent_marking_in_progress();
 568   } else {
 569     // We currently assume that the concurrent flag has been set to
 570     // false before we start remark. At this point we should also be
 571     // in a STW phase.
 572     assert(!concurrent_marking_in_progress(), "invariant");
 573     assert(out_of_regions(),
 574            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 575            p2i(_finger), p2i(_heap_end));
 576   }
 577 }
 578 
 579 void G1ConcurrentMark::set_non_marking_state() {
 580   // We set the global marking state to some default values when we're
 581   // not doing marking.
 582   reset_marking_state();
 583   _active_tasks = 0;
 584   clear_concurrent_marking_in_progress();
 585 }
 586 
 587 G1ConcurrentMark::~G1ConcurrentMark() {
 588   // The G1ConcurrentMark instance is never freed.
 589   ShouldNotReachHere();
 590 }
 591 
 592 class G1ClearBitMapTask : public AbstractGangTask {
 593 public:
 594   static size_t chunk_size() { return M; }
 595 
 596 private:
 597   // Heap region closure used for clearing the given mark bitmap.
 598   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 599   private:
 600     G1CMBitMap* _bitmap;
 601     G1ConcurrentMark* _cm;
 602   public:
 603     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 604     }
 605 
 606     virtual bool doHeapRegion(HeapRegion* r) {
 607       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 608 
 609       HeapWord* cur = r->bottom();
 610       HeapWord* const end = r->end();
 611 
 612       while (cur < end) {
 613         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 614         _bitmap->clear_range(mr);
 615 
 616         cur += chunk_size_in_words;
 617 
 618         // Abort iteration if after yielding the marking has been aborted.
 619         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 620           return true;
 621         }
 622         // Repeat the asserts from before the start of the closure. We will do them
 623         // as asserts here to minimize their overhead on the product. However, we
 624         // will have them as guarantees at the beginning / end of the bitmap
 625         // clearing to get some checking in the product.
 626         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 627         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 628       }
 629       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 630 
 631       return false;
 632     }
 633   };
 634 
 635   G1ClearBitmapHRClosure _cl;
 636   HeapRegionClaimer _hr_claimer;
 637   bool _suspendible; // If the task is suspendible, workers must join the STS.
 638 
 639 public:
 640   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 641     AbstractGangTask("G1 Clear Bitmap"),
 642     _cl(bitmap, suspendible ? cm : NULL),
 643     _hr_claimer(n_workers),
 644     _suspendible(suspendible)
 645   { }
 646 
 647   void work(uint worker_id) {
 648     SuspendibleThreadSetJoiner sts_join(_suspendible);
 649     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
 650   }
 651 
 652   bool is_complete() {
 653     return _cl.complete();
 654   }
 655 };
 656 
 657 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 658   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 659 
 660   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 661   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 662 
 663   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 664 
 665   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 666 
 667   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 668   workers->run_task(&cl, num_workers);
 669   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 670 }
 671 
 672 void G1ConcurrentMark::cleanup_for_next_mark() {
 673   // Make sure that the concurrent mark thread looks to still be in
 674   // the current cycle.
 675   guarantee(cmThread()->during_cycle(), "invariant");
 676 
 677   // We are finishing up the current cycle by clearing the next
 678   // marking bitmap and getting it ready for the next cycle. During
 679   // this time no other cycle can start. So, let's make sure that this
 680   // is the case.
 681   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 682 
 683   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 684 
 685   // Clear the live count data. If the marking has been aborted, the abort()
 686   // call already did that.
 687   if (!has_aborted()) {
 688     clear_live_data(_parallel_workers);
 689     DEBUG_ONLY(verify_live_data_clear());
 690   }
 691 
 692   // Repeat the asserts from above.
 693   guarantee(cmThread()->during_cycle(), "invariant");
 694   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 695 }
 696 
 697 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 698   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 699   clear_bitmap(_prevMarkBitMap, workers, false);
 700 }
 701 
 702 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 703   G1CMBitMap* _bitmap;
 704   bool _error;
 705  public:
 706   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 707   }
 708 
 709   virtual bool doHeapRegion(HeapRegion* r) {
 710     // This closure can be called concurrently to the mutator, so we must make sure
 711     // that the result of the getNextMarkedWordAddress() call is compared to the
 712     // value passed to it as limit to detect any found bits.
 713     // end never changes in G1.
 714     HeapWord* end = r->end();
 715     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 716   }
 717 };
 718 
 719 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 720   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 721   _g1h->heap_region_iterate(&cl);
 722   return cl.complete();
 723 }
 724 
 725 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 726 public:
 727   bool doHeapRegion(HeapRegion* r) {
 728     r->note_start_of_marking();
 729     return false;
 730   }
 731 };
 732 
 733 void G1ConcurrentMark::checkpointRootsInitialPre() {
 734   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 735 
 736   _has_aborted = false;
 737 
 738   // Initialize marking structures. This has to be done in a STW phase.
 739   reset();
 740 
 741   // For each region note start of marking.
 742   NoteStartOfMarkHRClosure startcl;
 743   g1h->heap_region_iterate(&startcl);
 744 }
 745 
 746 
 747 void G1ConcurrentMark::checkpointRootsInitialPost() {
 748   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 749 
 750   // Start Concurrent Marking weak-reference discovery.
 751   ReferenceProcessor* rp = g1h->ref_processor_cm();
 752   // enable ("weak") refs discovery
 753   rp->enable_discovery();
 754   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 755 
 756   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 757   // This is the start of  the marking cycle, we're expected all
 758   // threads to have SATB queues with active set to false.
 759   satb_mq_set.set_active_all_threads(true, /* new active value */
 760                                      false /* expected_active */);
 761 
 762   _root_regions.prepare_for_scan();
 763 
 764   // update_g1_committed() will be called at the end of an evac pause
 765   // when marking is on. So, it's also called at the end of the
 766   // initial-mark pause to update the heap end, if the heap expands
 767   // during it. No need to call it here.
 768 }
 769 
 770 /*
 771  * Notice that in the next two methods, we actually leave the STS
 772  * during the barrier sync and join it immediately afterwards. If we
 773  * do not do this, the following deadlock can occur: one thread could
 774  * be in the barrier sync code, waiting for the other thread to also
 775  * sync up, whereas another one could be trying to yield, while also
 776  * waiting for the other threads to sync up too.
 777  *
 778  * Note, however, that this code is also used during remark and in
 779  * this case we should not attempt to leave / enter the STS, otherwise
 780  * we'll either hit an assert (debug / fastdebug) or deadlock
 781  * (product). So we should only leave / enter the STS if we are
 782  * operating concurrently.
 783  *
 784  * Because the thread that does the sync barrier has left the STS, it
 785  * is possible to be suspended for a Full GC or an evacuation pause
 786  * could occur. This is actually safe, since the entering the sync
 787  * barrier is one of the last things do_marking_step() does, and it
 788  * doesn't manipulate any data structures afterwards.
 789  */
 790 
 791 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 792   bool barrier_aborted;
 793   {
 794     SuspendibleThreadSetLeaver sts_leave(concurrent());
 795     barrier_aborted = !_first_overflow_barrier_sync.enter();
 796   }
 797 
 798   // at this point everyone should have synced up and not be doing any
 799   // more work
 800 
 801   if (barrier_aborted) {
 802     // If the barrier aborted we ignore the overflow condition and
 803     // just abort the whole marking phase as quickly as possible.
 804     return;
 805   }
 806 
 807   // If we're executing the concurrent phase of marking, reset the marking
 808   // state; otherwise the marking state is reset after reference processing,
 809   // during the remark pause.
 810   // If we reset here as a result of an overflow during the remark we will
 811   // see assertion failures from any subsequent set_concurrency_and_phase()
 812   // calls.
 813   if (concurrent()) {
 814     // let the task associated with with worker 0 do this
 815     if (worker_id == 0) {
 816       // task 0 is responsible for clearing the global data structures
 817       // We should be here because of an overflow. During STW we should
 818       // not clear the overflow flag since we rely on it being true when
 819       // we exit this method to abort the pause and restart concurrent
 820       // marking.
 821       reset_marking_state();
 822 
 823       log_info(gc, marking)("Concurrent Mark reset for overflow");
 824     }
 825   }
 826 
 827   // after this, each task should reset its own data structures then
 828   // then go into the second barrier
 829 }
 830 
 831 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 832   SuspendibleThreadSetLeaver sts_leave(concurrent());
 833   _second_overflow_barrier_sync.enter();
 834 
 835   // at this point everything should be re-initialized and ready to go
 836 }
 837 
 838 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 839 private:
 840   G1ConcurrentMark*     _cm;
 841   ConcurrentMarkThread* _cmt;
 842 
 843 public:
 844   void work(uint worker_id) {
 845     assert(Thread::current()->is_ConcurrentGC_thread(),
 846            "this should only be done by a conc GC thread");
 847     ResourceMark rm;
 848 
 849     double start_vtime = os::elapsedVTime();
 850 
 851     {
 852       SuspendibleThreadSetJoiner sts_join;
 853 
 854       assert(worker_id < _cm->active_tasks(), "invariant");
 855       G1CMTask* the_task = _cm->task(worker_id);
 856       the_task->record_start_time();
 857       if (!_cm->has_aborted()) {
 858         do {
 859           double start_vtime_sec = os::elapsedVTime();
 860           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 861 
 862           the_task->do_marking_step(mark_step_duration_ms,
 863                                     true  /* do_termination */,
 864                                     false /* is_serial*/);
 865 
 866           double end_vtime_sec = os::elapsedVTime();
 867           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 868           _cm->do_yield_check();
 869 
 870           jlong sleep_time_ms;
 871           if (!_cm->has_aborted() && the_task->has_aborted()) {
 872             sleep_time_ms =
 873               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 874             {
 875               SuspendibleThreadSetLeaver sts_leave;
 876               os::sleep(Thread::current(), sleep_time_ms, false);
 877             }
 878           }
 879         } while (!_cm->has_aborted() && the_task->has_aborted());
 880       }
 881       the_task->record_end_time();
 882       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 883     }
 884 
 885     double end_vtime = os::elapsedVTime();
 886     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 887   }
 888 
 889   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 890                             ConcurrentMarkThread* cmt) :
 891       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 892 
 893   ~G1CMConcurrentMarkingTask() { }
 894 };
 895 
 896 // Calculates the number of active workers for a concurrent
 897 // phase.
 898 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 899   uint n_conc_workers = 0;
 900   if (!UseDynamicNumberOfGCThreads ||
 901       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 902        !ForceDynamicNumberOfGCThreads)) {
 903     n_conc_workers = max_parallel_marking_threads();
 904   } else {
 905     n_conc_workers =
 906       AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
 907                                                       1, /* Minimum workers */
 908                                                       parallel_marking_threads(),
 909                                                       Threads::number_of_non_daemon_threads());
 910     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 911     // that scaling has already gone into "_max_parallel_marking_threads".
 912   }
 913   assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
 914          "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
 915          max_parallel_marking_threads(), n_conc_workers);
 916   return n_conc_workers;
 917 }
 918 
 919 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
 920   // Currently, only survivors can be root regions.
 921   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 922   G1RootRegionScanClosure cl(_g1h, this);
 923 
 924   const uintx interval = PrefetchScanIntervalInBytes;
 925   HeapWord* curr = hr->bottom();
 926   const HeapWord* end = hr->top();
 927   while (curr < end) {
 928     Prefetch::read(curr, interval);
 929     oop obj = oop(curr);
 930     int size = obj->oop_iterate_size(&cl);
 931     assert(size == obj->size(), "sanity");
 932     curr += size;
 933   }
 934 }
 935 
 936 class G1CMRootRegionScanTask : public AbstractGangTask {
 937 private:
 938   G1ConcurrentMark* _cm;
 939 
 940 public:
 941   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 942     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 943 
 944   void work(uint worker_id) {
 945     assert(Thread::current()->is_ConcurrentGC_thread(),
 946            "this should only be done by a conc GC thread");
 947 
 948     G1CMRootRegions* root_regions = _cm->root_regions();
 949     HeapRegion* hr = root_regions->claim_next();
 950     while (hr != NULL) {
 951       _cm->scanRootRegion(hr);
 952       hr = root_regions->claim_next();
 953     }
 954   }
 955 };
 956 
 957 void G1ConcurrentMark::scan_root_regions() {
 958   // scan_in_progress() will have been set to true only if there was
 959   // at least one root region to scan. So, if it's false, we
 960   // should not attempt to do any further work.
 961   if (root_regions()->scan_in_progress()) {
 962     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 963 
 964     _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
 965                                      // We distribute work on a per-region basis, so starting
 966                                      // more threads than that is useless.
 967                                      root_regions()->num_root_regions());
 968     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 969            "Maximum number of marking threads exceeded");
 970 
 971     G1CMRootRegionScanTask task(this);
 972     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 973                         task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
 974     _parallel_workers->run_task(&task, _parallel_marking_threads);
 975 
 976     // It's possible that has_aborted() is true here without actually
 977     // aborting the survivor scan earlier. This is OK as it's
 978     // mainly used for sanity checking.
 979     root_regions()->scan_finished();
 980   }
 981 }
 982 
 983 void G1ConcurrentMark::concurrent_cycle_start() {
 984   _gc_timer_cm->register_gc_start();
 985 
 986   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 987 
 988   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 989 }
 990 
 991 void G1ConcurrentMark::concurrent_cycle_end() {
 992   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 993 
 994   if (has_aborted()) {
 995     _gc_tracer_cm->report_concurrent_mode_failure();
 996   }
 997 
 998   _gc_timer_cm->register_gc_end();
 999 
1000   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1001 }
1002 
1003 void G1ConcurrentMark::mark_from_roots() {
1004   // we might be tempted to assert that:
1005   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1006   //        "inconsistent argument?");
1007   // However that wouldn't be right, because it's possible that
1008   // a safepoint is indeed in progress as a younger generation
1009   // stop-the-world GC happens even as we mark in this generation.
1010 
1011   _restart_for_overflow = false;
1012 
1013   // _g1h has _n_par_threads
1014   _parallel_marking_threads = calc_parallel_marking_threads();
1015   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1016     "Maximum number of marking threads exceeded");
1017 
1018   uint active_workers = MAX2(1U, parallel_marking_threads());
1019   assert(active_workers > 0, "Should have been set");
1020 
1021   // Setting active workers is not guaranteed since fewer
1022   // worker threads may currently exist and more may not be
1023   // available.
1024   active_workers = _parallel_workers->update_active_workers(active_workers);
1025   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1026 
1027   // Parallel task terminator is set in "set_concurrency_and_phase()"
1028   set_concurrency_and_phase(active_workers, true /* concurrent */);
1029 
1030   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1031   _parallel_workers->run_task(&markingTask);
1032   print_stats();
1033 }
1034 
1035 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1036   // world is stopped at this checkpoint
1037   assert(SafepointSynchronize::is_at_safepoint(),
1038          "world should be stopped");
1039 
1040   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1041 
1042   // If a full collection has happened, we shouldn't do this.
1043   if (has_aborted()) {
1044     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1045     return;
1046   }
1047 
1048   if (VerifyDuringGC) {
1049     HandleMark hm;  // handle scope
1050     g1h->prepare_for_verify();
1051     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1052   }
1053   g1h->verifier()->check_bitmaps("Remark Start");
1054 
1055   G1Policy* g1p = g1h->g1_policy();
1056   g1p->record_concurrent_mark_remark_start();
1057 
1058   double start = os::elapsedTime();
1059 
1060   checkpointRootsFinalWork();
1061 
1062   double mark_work_end = os::elapsedTime();
1063 
1064   weakRefsWork(clear_all_soft_refs);
1065 
1066   if (has_overflown()) {
1067     // We overflowed.  Restart concurrent marking.
1068     _restart_for_overflow = true;
1069 
1070     // Verify the heap w.r.t. the previous marking bitmap.
1071     if (VerifyDuringGC) {
1072       HandleMark hm;  // handle scope
1073       g1h->prepare_for_verify();
1074       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1075     }
1076 
1077     // Clear the marking state because we will be restarting
1078     // marking due to overflowing the global mark stack.
1079     reset_marking_state();
1080   } else {
1081     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1082     // We're done with marking.
1083     // This is the end of  the marking cycle, we're expected all
1084     // threads to have SATB queues with active set to true.
1085     satb_mq_set.set_active_all_threads(false, /* new active value */
1086                                        true /* expected_active */);
1087 
1088     if (VerifyDuringGC) {
1089       HandleMark hm;  // handle scope
1090       g1h->prepare_for_verify();
1091       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1092     }
1093     g1h->verifier()->check_bitmaps("Remark End");
1094     assert(!restart_for_overflow(), "sanity");
1095     // Completely reset the marking state since marking completed
1096     set_non_marking_state();
1097   }
1098 
1099   // Statistics
1100   double now = os::elapsedTime();
1101   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1102   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1103   _remark_times.add((now - start) * 1000.0);
1104 
1105   g1p->record_concurrent_mark_remark_end();
1106 
1107   G1CMIsAliveClosure is_alive(g1h);
1108   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1109 }
1110 
1111 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1112   G1CollectedHeap* _g1;
1113   size_t _freed_bytes;
1114   FreeRegionList* _local_cleanup_list;
1115   uint _old_regions_removed;
1116   uint _humongous_regions_removed;
1117   HRRSCleanupTask* _hrrs_cleanup_task;
1118 
1119 public:
1120   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1121                              FreeRegionList* local_cleanup_list,
1122                              HRRSCleanupTask* hrrs_cleanup_task) :
1123     _g1(g1),
1124     _freed_bytes(0),
1125     _local_cleanup_list(local_cleanup_list),
1126     _old_regions_removed(0),
1127     _humongous_regions_removed(0),
1128     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1129 
1130   size_t freed_bytes() { return _freed_bytes; }
1131   const uint old_regions_removed() { return _old_regions_removed; }
1132   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1133 
1134   bool doHeapRegion(HeapRegion *hr) {
1135     _g1->reset_gc_time_stamps(hr);
1136     hr->note_end_of_marking();
1137 
1138     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1139       _freed_bytes += hr->used();
1140       hr->set_containing_set(NULL);
1141       if (hr->is_humongous()) {
1142         _humongous_regions_removed++;
1143         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1144       } else {
1145         _old_regions_removed++;
1146         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1147       }
1148     } else {
1149       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1150     }
1151 
1152     return false;
1153   }
1154 };
1155 
1156 class G1ParNoteEndTask: public AbstractGangTask {
1157   friend class G1NoteEndOfConcMarkClosure;
1158 
1159 protected:
1160   G1CollectedHeap* _g1h;
1161   FreeRegionList* _cleanup_list;
1162   HeapRegionClaimer _hrclaimer;
1163 
1164 public:
1165   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1166       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1167   }
1168 
1169   void work(uint worker_id) {
1170     FreeRegionList local_cleanup_list("Local Cleanup List");
1171     HRRSCleanupTask hrrs_cleanup_task;
1172     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1173                                            &hrrs_cleanup_task);
1174     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1175     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1176 
1177     // Now update the lists
1178     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1179     {
1180       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1181       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1182 
1183       // If we iterate over the global cleanup list at the end of
1184       // cleanup to do this printing we will not guarantee to only
1185       // generate output for the newly-reclaimed regions (the list
1186       // might not be empty at the beginning of cleanup; we might
1187       // still be working on its previous contents). So we do the
1188       // printing here, before we append the new regions to the global
1189       // cleanup list.
1190 
1191       G1HRPrinter* hr_printer = _g1h->hr_printer();
1192       if (hr_printer->is_active()) {
1193         FreeRegionListIterator iter(&local_cleanup_list);
1194         while (iter.more_available()) {
1195           HeapRegion* hr = iter.get_next();
1196           hr_printer->cleanup(hr);
1197         }
1198       }
1199 
1200       _cleanup_list->add_ordered(&local_cleanup_list);
1201       assert(local_cleanup_list.is_empty(), "post-condition");
1202 
1203       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1204     }
1205   }
1206 };
1207 
1208 void G1ConcurrentMark::cleanup() {
1209   // world is stopped at this checkpoint
1210   assert(SafepointSynchronize::is_at_safepoint(),
1211          "world should be stopped");
1212   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1213 
1214   // If a full collection has happened, we shouldn't do this.
1215   if (has_aborted()) {
1216     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1217     return;
1218   }
1219 
1220   g1h->verifier()->verify_region_sets_optional();
1221 
1222   if (VerifyDuringGC) {
1223     HandleMark hm;  // handle scope
1224     g1h->prepare_for_verify();
1225     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1226   }
1227   g1h->verifier()->check_bitmaps("Cleanup Start");
1228 
1229   G1Policy* g1p = g1h->g1_policy();
1230   g1p->record_concurrent_mark_cleanup_start();
1231 
1232   double start = os::elapsedTime();
1233 
1234   HeapRegionRemSet::reset_for_cleanup_tasks();
1235 
1236   {
1237     GCTraceTime(Debug, gc)("Finalize Live Data");
1238     finalize_live_data();
1239   }
1240 
1241   if (VerifyDuringGC) {
1242     GCTraceTime(Debug, gc)("Verify Live Data");
1243     verify_live_data();
1244   }
1245 
1246   g1h->collector_state()->set_mark_in_progress(false);
1247 
1248   double count_end = os::elapsedTime();
1249   double this_final_counting_time = (count_end - start);
1250   _total_counting_time += this_final_counting_time;
1251 
1252   if (log_is_enabled(Trace, gc, liveness)) {
1253     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1254     _g1h->heap_region_iterate(&cl);
1255   }
1256 
1257   // Install newly created mark bitMap as "prev".
1258   swapMarkBitMaps();
1259 
1260   g1h->reset_gc_time_stamp();
1261 
1262   uint n_workers = _g1h->workers()->active_workers();
1263 
1264   // Note end of marking in all heap regions.
1265   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1266   g1h->workers()->run_task(&g1_par_note_end_task);
1267   g1h->check_gc_time_stamps();
1268 
1269   if (!cleanup_list_is_empty()) {
1270     // The cleanup list is not empty, so we'll have to process it
1271     // concurrently. Notify anyone else that might be wanting free
1272     // regions that there will be more free regions coming soon.
1273     g1h->set_free_regions_coming();
1274   }
1275 
1276   // call below, since it affects the metric by which we sort the heap
1277   // regions.
1278   if (G1ScrubRemSets) {
1279     double rs_scrub_start = os::elapsedTime();
1280     g1h->scrub_rem_set();
1281     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1282   }
1283 
1284   // this will also free any regions totally full of garbage objects,
1285   // and sort the regions.
1286   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1287 
1288   // Statistics.
1289   double end = os::elapsedTime();
1290   _cleanup_times.add((end - start) * 1000.0);
1291 
1292   // Clean up will have freed any regions completely full of garbage.
1293   // Update the soft reference policy with the new heap occupancy.
1294   Universe::update_heap_info_at_gc();
1295 
1296   if (VerifyDuringGC) {
1297     HandleMark hm;  // handle scope
1298     g1h->prepare_for_verify();
1299     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1300   }
1301 
1302   g1h->verifier()->check_bitmaps("Cleanup End");
1303 
1304   g1h->verifier()->verify_region_sets_optional();
1305 
1306   // We need to make this be a "collection" so any collection pause that
1307   // races with it goes around and waits for completeCleanup to finish.
1308   g1h->increment_total_collections();
1309 
1310   // Clean out dead classes and update Metaspace sizes.
1311   if (ClassUnloadingWithConcurrentMark) {
1312     ClassLoaderDataGraph::purge();
1313   }
1314   MetaspaceGC::compute_new_size();
1315 
1316   // We reclaimed old regions so we should calculate the sizes to make
1317   // sure we update the old gen/space data.
1318   g1h->g1mm()->update_sizes();
1319   g1h->allocation_context_stats().update_after_mark();
1320 }
1321 
1322 void G1ConcurrentMark::complete_cleanup() {
1323   if (has_aborted()) return;
1324 
1325   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1326 
1327   _cleanup_list.verify_optional();
1328   FreeRegionList tmp_free_list("Tmp Free List");
1329 
1330   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1331                                   "cleanup list has %u entries",
1332                                   _cleanup_list.length());
1333 
1334   // No one else should be accessing the _cleanup_list at this point,
1335   // so it is not necessary to take any locks
1336   while (!_cleanup_list.is_empty()) {
1337     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1338     assert(hr != NULL, "Got NULL from a non-empty list");
1339     hr->par_clear();
1340     tmp_free_list.add_ordered(hr);
1341 
1342     // Instead of adding one region at a time to the secondary_free_list,
1343     // we accumulate them in the local list and move them a few at a
1344     // time. This also cuts down on the number of notify_all() calls
1345     // we do during this process. We'll also append the local list when
1346     // _cleanup_list is empty (which means we just removed the last
1347     // region from the _cleanup_list).
1348     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1349         _cleanup_list.is_empty()) {
1350       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1351                                       "appending %u entries to the secondary_free_list, "
1352                                       "cleanup list still has %u entries",
1353                                       tmp_free_list.length(),
1354                                       _cleanup_list.length());
1355 
1356       {
1357         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1358         g1h->secondary_free_list_add(&tmp_free_list);
1359         SecondaryFreeList_lock->notify_all();
1360       }
1361 #ifndef PRODUCT
1362       if (G1StressConcRegionFreeing) {
1363         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1364           os::sleep(Thread::current(), (jlong) 1, false);
1365         }
1366       }
1367 #endif
1368     }
1369   }
1370   assert(tmp_free_list.is_empty(), "post-condition");
1371 }
1372 
1373 // Supporting Object and Oop closures for reference discovery
1374 // and processing in during marking
1375 
1376 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1377   HeapWord* addr = (HeapWord*)obj;
1378   return addr != NULL &&
1379          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1380 }
1381 
1382 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1383 // Uses the G1CMTask associated with a worker thread (for serial reference
1384 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1385 // trace referent objects.
1386 //
1387 // Using the G1CMTask and embedded local queues avoids having the worker
1388 // threads operating on the global mark stack. This reduces the risk
1389 // of overflowing the stack - which we would rather avoid at this late
1390 // state. Also using the tasks' local queues removes the potential
1391 // of the workers interfering with each other that could occur if
1392 // operating on the global stack.
1393 
1394 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1395   G1ConcurrentMark* _cm;
1396   G1CMTask*         _task;
1397   int               _ref_counter_limit;
1398   int               _ref_counter;
1399   bool              _is_serial;
1400  public:
1401   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1402     _cm(cm), _task(task), _is_serial(is_serial),
1403     _ref_counter_limit(G1RefProcDrainInterval) {
1404     assert(_ref_counter_limit > 0, "sanity");
1405     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1406     _ref_counter = _ref_counter_limit;
1407   }
1408 
1409   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1410   virtual void do_oop(      oop* p) { do_oop_work(p); }
1411 
1412   template <class T> void do_oop_work(T* p) {
1413     if (!_cm->has_overflown()) {
1414       oop obj = oopDesc::load_decode_heap_oop(p);
1415       _task->deal_with_reference(obj);
1416       _ref_counter--;
1417 
1418       if (_ref_counter == 0) {
1419         // We have dealt with _ref_counter_limit references, pushing them
1420         // and objects reachable from them on to the local stack (and
1421         // possibly the global stack). Call G1CMTask::do_marking_step() to
1422         // process these entries.
1423         //
1424         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1425         // there's nothing more to do (i.e. we're done with the entries that
1426         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1427         // above) or we overflow.
1428         //
1429         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1430         // flag while there may still be some work to do. (See the comment at
1431         // the beginning of G1CMTask::do_marking_step() for those conditions -
1432         // one of which is reaching the specified time target.) It is only
1433         // when G1CMTask::do_marking_step() returns without setting the
1434         // has_aborted() flag that the marking step has completed.
1435         do {
1436           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1437           _task->do_marking_step(mark_step_duration_ms,
1438                                  false      /* do_termination */,
1439                                  _is_serial);
1440         } while (_task->has_aborted() && !_cm->has_overflown());
1441         _ref_counter = _ref_counter_limit;
1442       }
1443     }
1444   }
1445 };
1446 
1447 // 'Drain' oop closure used by both serial and parallel reference processing.
1448 // Uses the G1CMTask associated with a given worker thread (for serial
1449 // reference processing the G1CMtask for worker 0 is used). Calls the
1450 // do_marking_step routine, with an unbelievably large timeout value,
1451 // to drain the marking data structures of the remaining entries
1452 // added by the 'keep alive' oop closure above.
1453 
1454 class G1CMDrainMarkingStackClosure: public VoidClosure {
1455   G1ConcurrentMark* _cm;
1456   G1CMTask*         _task;
1457   bool              _is_serial;
1458  public:
1459   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1460     _cm(cm), _task(task), _is_serial(is_serial) {
1461     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1462   }
1463 
1464   void do_void() {
1465     do {
1466       // We call G1CMTask::do_marking_step() to completely drain the local
1467       // and global marking stacks of entries pushed by the 'keep alive'
1468       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1469       //
1470       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1471       // if there's nothing more to do (i.e. we've completely drained the
1472       // entries that were pushed as a a result of applying the 'keep alive'
1473       // closure to the entries on the discovered ref lists) or we overflow
1474       // the global marking stack.
1475       //
1476       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1477       // flag while there may still be some work to do. (See the comment at
1478       // the beginning of G1CMTask::do_marking_step() for those conditions -
1479       // one of which is reaching the specified time target.) It is only
1480       // when G1CMTask::do_marking_step() returns without setting the
1481       // has_aborted() flag that the marking step has completed.
1482 
1483       _task->do_marking_step(1000000000.0 /* something very large */,
1484                              true         /* do_termination */,
1485                              _is_serial);
1486     } while (_task->has_aborted() && !_cm->has_overflown());
1487   }
1488 };
1489 
1490 // Implementation of AbstractRefProcTaskExecutor for parallel
1491 // reference processing at the end of G1 concurrent marking
1492 
1493 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1494 private:
1495   G1CollectedHeap*  _g1h;
1496   G1ConcurrentMark* _cm;
1497   WorkGang*         _workers;
1498   uint              _active_workers;
1499 
1500 public:
1501   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1502                           G1ConcurrentMark* cm,
1503                           WorkGang* workers,
1504                           uint n_workers) :
1505     _g1h(g1h), _cm(cm),
1506     _workers(workers), _active_workers(n_workers) { }
1507 
1508   // Executes the given task using concurrent marking worker threads.
1509   virtual void execute(ProcessTask& task);
1510   virtual void execute(EnqueueTask& task);
1511 };
1512 
1513 class G1CMRefProcTaskProxy: public AbstractGangTask {
1514   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1515   ProcessTask&      _proc_task;
1516   G1CollectedHeap*  _g1h;
1517   G1ConcurrentMark* _cm;
1518 
1519 public:
1520   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1521                        G1CollectedHeap* g1h,
1522                        G1ConcurrentMark* cm) :
1523     AbstractGangTask("Process reference objects in parallel"),
1524     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1525     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1526     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1527   }
1528 
1529   virtual void work(uint worker_id) {
1530     ResourceMark rm;
1531     HandleMark hm;
1532     G1CMTask* task = _cm->task(worker_id);
1533     G1CMIsAliveClosure g1_is_alive(_g1h);
1534     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1535     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1536 
1537     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1538   }
1539 };
1540 
1541 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1542   assert(_workers != NULL, "Need parallel worker threads.");
1543   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1544 
1545   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1546 
1547   // We need to reset the concurrency level before each
1548   // proxy task execution, so that the termination protocol
1549   // and overflow handling in G1CMTask::do_marking_step() knows
1550   // how many workers to wait for.
1551   _cm->set_concurrency(_active_workers);
1552   _workers->run_task(&proc_task_proxy);
1553 }
1554 
1555 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1556   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1557   EnqueueTask& _enq_task;
1558 
1559 public:
1560   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1561     AbstractGangTask("Enqueue reference objects in parallel"),
1562     _enq_task(enq_task) { }
1563 
1564   virtual void work(uint worker_id) {
1565     _enq_task.work(worker_id);
1566   }
1567 };
1568 
1569 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1570   assert(_workers != NULL, "Need parallel worker threads.");
1571   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1572 
1573   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1574 
1575   // Not strictly necessary but...
1576   //
1577   // We need to reset the concurrency level before each
1578   // proxy task execution, so that the termination protocol
1579   // and overflow handling in G1CMTask::do_marking_step() knows
1580   // how many workers to wait for.
1581   _cm->set_concurrency(_active_workers);
1582   _workers->run_task(&enq_task_proxy);
1583 }
1584 
1585 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1586   if (has_overflown()) {
1587     // Skip processing the discovered references if we have
1588     // overflown the global marking stack. Reference objects
1589     // only get discovered once so it is OK to not
1590     // de-populate the discovered reference lists. We could have,
1591     // but the only benefit would be that, when marking restarts,
1592     // less reference objects are discovered.
1593     return;
1594   }
1595 
1596   ResourceMark rm;
1597   HandleMark   hm;
1598 
1599   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1600 
1601   // Is alive closure.
1602   G1CMIsAliveClosure g1_is_alive(g1h);
1603 
1604   // Inner scope to exclude the cleaning of the string and symbol
1605   // tables from the displayed time.
1606   {
1607     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1608 
1609     ReferenceProcessor* rp = g1h->ref_processor_cm();
1610 
1611     // See the comment in G1CollectedHeap::ref_processing_init()
1612     // about how reference processing currently works in G1.
1613 
1614     // Set the soft reference policy
1615     rp->setup_policy(clear_all_soft_refs);
1616     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1617 
1618     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1619     // in serial reference processing. Note these closures are also
1620     // used for serially processing (by the the current thread) the
1621     // JNI references during parallel reference processing.
1622     //
1623     // These closures do not need to synchronize with the worker
1624     // threads involved in parallel reference processing as these
1625     // instances are executed serially by the current thread (e.g.
1626     // reference processing is not multi-threaded and is thus
1627     // performed by the current thread instead of a gang worker).
1628     //
1629     // The gang tasks involved in parallel reference processing create
1630     // their own instances of these closures, which do their own
1631     // synchronization among themselves.
1632     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1633     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1634 
1635     // We need at least one active thread. If reference processing
1636     // is not multi-threaded we use the current (VMThread) thread,
1637     // otherwise we use the work gang from the G1CollectedHeap and
1638     // we utilize all the worker threads we can.
1639     bool processing_is_mt = rp->processing_is_mt();
1640     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1641     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1642 
1643     // Parallel processing task executor.
1644     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1645                                               g1h->workers(), active_workers);
1646     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1647 
1648     // Set the concurrency level. The phase was already set prior to
1649     // executing the remark task.
1650     set_concurrency(active_workers);
1651 
1652     // Set the degree of MT processing here.  If the discovery was done MT,
1653     // the number of threads involved during discovery could differ from
1654     // the number of active workers.  This is OK as long as the discovered
1655     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1656     rp->set_active_mt_degree(active_workers);
1657 
1658     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1659 
1660     // Process the weak references.
1661     const ReferenceProcessorStats& stats =
1662         rp->process_discovered_references(&g1_is_alive,
1663                                           &g1_keep_alive,
1664                                           &g1_drain_mark_stack,
1665                                           executor,
1666                                           &pt);
1667     _gc_tracer_cm->report_gc_reference_stats(stats);
1668     pt.print_all_references();
1669 
1670     // The do_oop work routines of the keep_alive and drain_marking_stack
1671     // oop closures will set the has_overflown flag if we overflow the
1672     // global marking stack.
1673 
1674     assert(has_overflown() || _global_mark_stack.is_empty(),
1675             "Mark stack should be empty (unless it has overflown)");
1676 
1677     assert(rp->num_q() == active_workers, "why not");
1678 
1679     rp->enqueue_discovered_references(executor, &pt);
1680 
1681     rp->verify_no_references_recorded();
1682 
1683     pt.print_enqueue_phase();
1684 
1685     assert(!rp->discovery_enabled(), "Post condition");
1686   }
1687 
1688   if (has_overflown()) {
1689     // We can not trust g1_is_alive if the marking stack overflowed
1690     return;
1691   }
1692 
1693   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1694 
1695   // Unload Klasses, String, Symbols, Code Cache, etc.
1696   if (ClassUnloadingWithConcurrentMark) {
1697     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1698     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1699     g1h->complete_cleaning(&g1_is_alive, purged_classes);
1700   } else {
1701     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1702     // No need to clean string table and symbol table as they are treated as strong roots when
1703     // class unloading is disabled.
1704     g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1705 
1706   }
1707 }
1708 
1709 void G1ConcurrentMark::swapMarkBitMaps() {
1710   G1CMBitMap* temp = _prevMarkBitMap;
1711   _prevMarkBitMap  = _nextMarkBitMap;
1712   _nextMarkBitMap  = temp;
1713 }
1714 
1715 // Closure for marking entries in SATB buffers.
1716 class G1CMSATBBufferClosure : public SATBBufferClosure {
1717 private:
1718   G1CMTask* _task;
1719   G1CollectedHeap* _g1h;
1720 
1721   // This is very similar to G1CMTask::deal_with_reference, but with
1722   // more relaxed requirements for the argument, so this must be more
1723   // circumspect about treating the argument as an object.
1724   void do_entry(void* entry) const {
1725     _task->increment_refs_reached();
1726     oop const obj = static_cast<oop>(entry);
1727     _task->make_reference_grey(obj);
1728   }
1729 
1730 public:
1731   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1732     : _task(task), _g1h(g1h) { }
1733 
1734   virtual void do_buffer(void** buffer, size_t size) {
1735     for (size_t i = 0; i < size; ++i) {
1736       do_entry(buffer[i]);
1737     }
1738   }
1739 };
1740 
1741 class G1RemarkThreadsClosure : public ThreadClosure {
1742   G1CMSATBBufferClosure _cm_satb_cl;
1743   G1CMOopClosure _cm_cl;
1744   MarkingCodeBlobClosure _code_cl;
1745   int _thread_parity;
1746 
1747  public:
1748   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1749     _cm_satb_cl(task, g1h),
1750     _cm_cl(g1h, g1h->concurrent_mark(), task),
1751     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1752     _thread_parity(Threads::thread_claim_parity()) {}
1753 
1754   void do_thread(Thread* thread) {
1755     if (thread->is_Java_thread()) {
1756       if (thread->claim_oops_do(true, _thread_parity)) {
1757         JavaThread* jt = (JavaThread*)thread;
1758 
1759         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1760         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1761         // * Alive if on the stack of an executing method
1762         // * Weakly reachable otherwise
1763         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1764         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1765         jt->nmethods_do(&_code_cl);
1766 
1767         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1768       }
1769     } else if (thread->is_VM_thread()) {
1770       if (thread->claim_oops_do(true, _thread_parity)) {
1771         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1772       }
1773     }
1774   }
1775 };
1776 
1777 class G1CMRemarkTask: public AbstractGangTask {
1778 private:
1779   G1ConcurrentMark* _cm;
1780 public:
1781   void work(uint worker_id) {
1782     // Since all available tasks are actually started, we should
1783     // only proceed if we're supposed to be active.
1784     if (worker_id < _cm->active_tasks()) {
1785       G1CMTask* task = _cm->task(worker_id);
1786       task->record_start_time();
1787       {
1788         ResourceMark rm;
1789         HandleMark hm;
1790 
1791         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1792         Threads::threads_do(&threads_f);
1793       }
1794 
1795       do {
1796         task->do_marking_step(1000000000.0 /* something very large */,
1797                               true         /* do_termination       */,
1798                               false        /* is_serial            */);
1799       } while (task->has_aborted() && !_cm->has_overflown());
1800       // If we overflow, then we do not want to restart. We instead
1801       // want to abort remark and do concurrent marking again.
1802       task->record_end_time();
1803     }
1804   }
1805 
1806   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1807     AbstractGangTask("Par Remark"), _cm(cm) {
1808     _cm->terminator()->reset_for_reuse(active_workers);
1809   }
1810 };
1811 
1812 void G1ConcurrentMark::checkpointRootsFinalWork() {
1813   ResourceMark rm;
1814   HandleMark   hm;
1815   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1816 
1817   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1818 
1819   g1h->ensure_parsability(false);
1820 
1821   // this is remark, so we'll use up all active threads
1822   uint active_workers = g1h->workers()->active_workers();
1823   set_concurrency_and_phase(active_workers, false /* concurrent */);
1824   // Leave _parallel_marking_threads at it's
1825   // value originally calculated in the G1ConcurrentMark
1826   // constructor and pass values of the active workers
1827   // through the gang in the task.
1828 
1829   {
1830     StrongRootsScope srs(active_workers);
1831 
1832     G1CMRemarkTask remarkTask(this, active_workers);
1833     // We will start all available threads, even if we decide that the
1834     // active_workers will be fewer. The extra ones will just bail out
1835     // immediately.
1836     g1h->workers()->run_task(&remarkTask);
1837   }
1838 
1839   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1840   guarantee(has_overflown() ||
1841             satb_mq_set.completed_buffers_num() == 0,
1842             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1843             BOOL_TO_STR(has_overflown()),
1844             satb_mq_set.completed_buffers_num());
1845 
1846   print_stats();
1847 }
1848 
1849 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1850   _prevMarkBitMap->clear_range(mr);
1851 }
1852 
1853 HeapRegion*
1854 G1ConcurrentMark::claim_region(uint worker_id) {
1855   // "checkpoint" the finger
1856   HeapWord* finger = _finger;
1857 
1858   // _heap_end will not change underneath our feet; it only changes at
1859   // yield points.
1860   while (finger < _heap_end) {
1861     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1862 
1863     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1864     // Make sure that the reads below do not float before loading curr_region.
1865     OrderAccess::loadload();
1866     // Above heap_region_containing may return NULL as we always scan claim
1867     // until the end of the heap. In this case, just jump to the next region.
1868     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1869 
1870     // Is the gap between reading the finger and doing the CAS too long?
1871     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1872     if (res == finger && curr_region != NULL) {
1873       // we succeeded
1874       HeapWord*   bottom        = curr_region->bottom();
1875       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1876 
1877       // notice that _finger == end cannot be guaranteed here since,
1878       // someone else might have moved the finger even further
1879       assert(_finger >= end, "the finger should have moved forward");
1880 
1881       if (limit > bottom) {
1882         return curr_region;
1883       } else {
1884         assert(limit == bottom,
1885                "the region limit should be at bottom");
1886         // we return NULL and the caller should try calling
1887         // claim_region() again.
1888         return NULL;
1889       }
1890     } else {
1891       assert(_finger > finger, "the finger should have moved forward");
1892       // read it again
1893       finger = _finger;
1894     }
1895   }
1896 
1897   return NULL;
1898 }
1899 
1900 #ifndef PRODUCT
1901 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1902 private:
1903   G1CollectedHeap* _g1h;
1904   const char* _phase;
1905   int _info;
1906 
1907 public:
1908   VerifyNoCSetOops(const char* phase, int info = -1) :
1909     _g1h(G1CollectedHeap::heap()),
1910     _phase(phase),
1911     _info(info)
1912   { }
1913 
1914   void operator()(G1TaskQueueEntry task_entry) const {
1915     if (task_entry.is_array_slice()) {
1916       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1917       return;
1918     }
1919     guarantee(oopDesc::is_oop(task_entry.obj()),
1920               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1921               p2i(task_entry.obj()), _phase, _info);
1922     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1923               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1924               p2i(task_entry.obj()), _phase, _info);
1925   }
1926 };
1927 
1928 void G1ConcurrentMark::verify_no_cset_oops() {
1929   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1930   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1931     return;
1932   }
1933 
1934   // Verify entries on the global mark stack
1935   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1936 
1937   // Verify entries on the task queues
1938   for (uint i = 0; i < _max_worker_id; ++i) {
1939     G1CMTaskQueue* queue = _task_queues->queue(i);
1940     queue->iterate(VerifyNoCSetOops("Queue", i));
1941   }
1942 
1943   // Verify the global finger
1944   HeapWord* global_finger = finger();
1945   if (global_finger != NULL && global_finger < _heap_end) {
1946     // Since we always iterate over all regions, we might get a NULL HeapRegion
1947     // here.
1948     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1949     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1950               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1951               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1952   }
1953 
1954   // Verify the task fingers
1955   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
1956   for (uint i = 0; i < parallel_marking_threads(); ++i) {
1957     G1CMTask* task = _tasks[i];
1958     HeapWord* task_finger = task->finger();
1959     if (task_finger != NULL && task_finger < _heap_end) {
1960       // See above note on the global finger verification.
1961       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1962       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1963                 !task_hr->in_collection_set(),
1964                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1965                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1966     }
1967   }
1968 }
1969 #endif // PRODUCT
1970 void G1ConcurrentMark::create_live_data() {
1971   _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
1972 }
1973 
1974 void G1ConcurrentMark::finalize_live_data() {
1975   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
1976 }
1977 
1978 void G1ConcurrentMark::verify_live_data() {
1979   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
1980 }
1981 
1982 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
1983   _g1h->g1_rem_set()->clear_card_live_data(workers);
1984 }
1985 
1986 #ifdef ASSERT
1987 void G1ConcurrentMark::verify_live_data_clear() {
1988   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
1989 }
1990 #endif
1991 
1992 void G1ConcurrentMark::print_stats() {
1993   if (!log_is_enabled(Debug, gc, stats)) {
1994     return;
1995   }
1996   log_debug(gc, stats)("---------------------------------------------------------------------");
1997   for (size_t i = 0; i < _active_tasks; ++i) {
1998     _tasks[i]->print_stats();
1999     log_debug(gc, stats)("---------------------------------------------------------------------");
2000   }
2001 }
2002 
2003 void G1ConcurrentMark::abort() {
2004   if (!cmThread()->during_cycle() || _has_aborted) {
2005     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2006     return;
2007   }
2008 
2009   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2010   // concurrent bitmap clearing.
2011   {
2012     GCTraceTime(Debug, gc)("Clear Next Bitmap");
2013     clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2014   }
2015   // Note we cannot clear the previous marking bitmap here
2016   // since VerifyDuringGC verifies the objects marked during
2017   // a full GC against the previous bitmap.
2018 
2019   {
2020     GCTraceTime(Debug, gc)("Clear Live Data");
2021     clear_live_data(_g1h->workers());
2022   }
2023   DEBUG_ONLY({
2024     GCTraceTime(Debug, gc)("Verify Live Data Clear");
2025     verify_live_data_clear();
2026   })
2027   // Empty mark stack
2028   reset_marking_state();
2029   for (uint i = 0; i < _max_worker_id; ++i) {
2030     _tasks[i]->clear_region_fields();
2031   }
2032   _first_overflow_barrier_sync.abort();
2033   _second_overflow_barrier_sync.abort();
2034   _has_aborted = true;
2035 
2036   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2037   satb_mq_set.abandon_partial_marking();
2038   // This can be called either during or outside marking, we'll read
2039   // the expected_active value from the SATB queue set.
2040   satb_mq_set.set_active_all_threads(
2041                                  false, /* new active value */
2042                                  satb_mq_set.is_active() /* expected_active */);
2043 }
2044 
2045 static void print_ms_time_info(const char* prefix, const char* name,
2046                                NumberSeq& ns) {
2047   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2048                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2049   if (ns.num() > 0) {
2050     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2051                            prefix, ns.sd(), ns.maximum());
2052   }
2053 }
2054 
2055 void G1ConcurrentMark::print_summary_info() {
2056   Log(gc, marking) log;
2057   if (!log.is_trace()) {
2058     return;
2059   }
2060 
2061   log.trace(" Concurrent marking:");
2062   print_ms_time_info("  ", "init marks", _init_times);
2063   print_ms_time_info("  ", "remarks", _remark_times);
2064   {
2065     print_ms_time_info("     ", "final marks", _remark_mark_times);
2066     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2067 
2068   }
2069   print_ms_time_info("  ", "cleanups", _cleanup_times);
2070   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2071             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2072   if (G1ScrubRemSets) {
2073     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2074               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2075   }
2076   log.trace("  Total stop_world time = %8.2f s.",
2077             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2078   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2079             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2080 }
2081 
2082 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2083   _parallel_workers->print_worker_threads_on(st);
2084 }
2085 
2086 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2087   _parallel_workers->threads_do(tc);
2088 }
2089 
2090 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2091   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2092       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2093   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2094   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2095 }
2096 
2097 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2098   ReferenceProcessor* result = g1h->ref_processor_cm();
2099   assert(result != NULL, "CM reference processor should not be NULL");
2100   return result;
2101 }
2102 
2103 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2104                                G1ConcurrentMark* cm,
2105                                G1CMTask* task)
2106   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2107     _g1h(g1h), _cm(cm), _task(task)
2108 { }
2109 
2110 void G1CMTask::setup_for_region(HeapRegion* hr) {
2111   assert(hr != NULL,
2112         "claim_region() should have filtered out NULL regions");
2113   _curr_region  = hr;
2114   _finger       = hr->bottom();
2115   update_region_limit();
2116 }
2117 
2118 void G1CMTask::update_region_limit() {
2119   HeapRegion* hr            = _curr_region;
2120   HeapWord* bottom          = hr->bottom();
2121   HeapWord* limit           = hr->next_top_at_mark_start();
2122 
2123   if (limit == bottom) {
2124     // The region was collected underneath our feet.
2125     // We set the finger to bottom to ensure that the bitmap
2126     // iteration that will follow this will not do anything.
2127     // (this is not a condition that holds when we set the region up,
2128     // as the region is not supposed to be empty in the first place)
2129     _finger = bottom;
2130   } else if (limit >= _region_limit) {
2131     assert(limit >= _finger, "peace of mind");
2132   } else {
2133     assert(limit < _region_limit, "only way to get here");
2134     // This can happen under some pretty unusual circumstances.  An
2135     // evacuation pause empties the region underneath our feet (NTAMS
2136     // at bottom). We then do some allocation in the region (NTAMS
2137     // stays at bottom), followed by the region being used as a GC
2138     // alloc region (NTAMS will move to top() and the objects
2139     // originally below it will be grayed). All objects now marked in
2140     // the region are explicitly grayed, if below the global finger,
2141     // and we do not need in fact to scan anything else. So, we simply
2142     // set _finger to be limit to ensure that the bitmap iteration
2143     // doesn't do anything.
2144     _finger = limit;
2145   }
2146 
2147   _region_limit = limit;
2148 }
2149 
2150 void G1CMTask::giveup_current_region() {
2151   assert(_curr_region != NULL, "invariant");
2152   clear_region_fields();
2153 }
2154 
2155 void G1CMTask::clear_region_fields() {
2156   // Values for these three fields that indicate that we're not
2157   // holding on to a region.
2158   _curr_region   = NULL;
2159   _finger        = NULL;
2160   _region_limit  = NULL;
2161 }
2162 
2163 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2164   if (cm_oop_closure == NULL) {
2165     assert(_cm_oop_closure != NULL, "invariant");
2166   } else {
2167     assert(_cm_oop_closure == NULL, "invariant");
2168   }
2169   _cm_oop_closure = cm_oop_closure;
2170 }
2171 
2172 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2173   guarantee(nextMarkBitMap != NULL, "invariant");
2174   _nextMarkBitMap                = nextMarkBitMap;
2175   clear_region_fields();
2176 
2177   _calls                         = 0;
2178   _elapsed_time_ms               = 0.0;
2179   _termination_time_ms           = 0.0;
2180   _termination_start_time_ms     = 0.0;
2181 }
2182 
2183 bool G1CMTask::should_exit_termination() {
2184   regular_clock_call();
2185   // This is called when we are in the termination protocol. We should
2186   // quit if, for some reason, this task wants to abort or the global
2187   // stack is not empty (this means that we can get work from it).
2188   return !_cm->mark_stack_empty() || has_aborted();
2189 }
2190 
2191 void G1CMTask::reached_limit() {
2192   assert(_words_scanned >= _words_scanned_limit ||
2193          _refs_reached >= _refs_reached_limit ,
2194          "shouldn't have been called otherwise");
2195   regular_clock_call();
2196 }
2197 
2198 void G1CMTask::regular_clock_call() {
2199   if (has_aborted()) return;
2200 
2201   // First, we need to recalculate the words scanned and refs reached
2202   // limits for the next clock call.
2203   recalculate_limits();
2204 
2205   // During the regular clock call we do the following
2206 
2207   // (1) If an overflow has been flagged, then we abort.
2208   if (_cm->has_overflown()) {
2209     set_has_aborted();
2210     return;
2211   }
2212 
2213   // If we are not concurrent (i.e. we're doing remark) we don't need
2214   // to check anything else. The other steps are only needed during
2215   // the concurrent marking phase.
2216   if (!concurrent()) return;
2217 
2218   // (2) If marking has been aborted for Full GC, then we also abort.
2219   if (_cm->has_aborted()) {
2220     set_has_aborted();
2221     return;
2222   }
2223 
2224   double curr_time_ms = os::elapsedVTime() * 1000.0;
2225 
2226   // (4) We check whether we should yield. If we have to, then we abort.
2227   if (SuspendibleThreadSet::should_yield()) {
2228     // We should yield. To do this we abort the task. The caller is
2229     // responsible for yielding.
2230     set_has_aborted();
2231     return;
2232   }
2233 
2234   // (5) We check whether we've reached our time quota. If we have,
2235   // then we abort.
2236   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2237   if (elapsed_time_ms > _time_target_ms) {
2238     set_has_aborted();
2239     _has_timed_out = true;
2240     return;
2241   }
2242 
2243   // (6) Finally, we check whether there are enough completed STAB
2244   // buffers available for processing. If there are, we abort.
2245   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2246   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2247     // we do need to process SATB buffers, we'll abort and restart
2248     // the marking task to do so
2249     set_has_aborted();
2250     return;
2251   }
2252 }
2253 
2254 void G1CMTask::recalculate_limits() {
2255   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2256   _words_scanned_limit      = _real_words_scanned_limit;
2257 
2258   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2259   _refs_reached_limit       = _real_refs_reached_limit;
2260 }
2261 
2262 void G1CMTask::decrease_limits() {
2263   // This is called when we believe that we're going to do an infrequent
2264   // operation which will increase the per byte scanned cost (i.e. move
2265   // entries to/from the global stack). It basically tries to decrease the
2266   // scanning limit so that the clock is called earlier.
2267 
2268   _words_scanned_limit = _real_words_scanned_limit -
2269     3 * words_scanned_period / 4;
2270   _refs_reached_limit  = _real_refs_reached_limit -
2271     3 * refs_reached_period / 4;
2272 }
2273 
2274 void G1CMTask::move_entries_to_global_stack() {
2275   // Local array where we'll store the entries that will be popped
2276   // from the local queue.
2277   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2278 
2279   size_t n = 0;
2280   G1TaskQueueEntry task_entry;
2281   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2282     buffer[n] = task_entry;
2283     ++n;
2284   }
2285   if (n < G1CMMarkStack::EntriesPerChunk) {
2286     buffer[n] = G1TaskQueueEntry();
2287   }
2288 
2289   if (n > 0) {
2290     if (!_cm->mark_stack_push(buffer)) {
2291       set_has_aborted();
2292     }
2293   }
2294 
2295   // This operation was quite expensive, so decrease the limits.
2296   decrease_limits();
2297 }
2298 
2299 bool G1CMTask::get_entries_from_global_stack() {
2300   // Local array where we'll store the entries that will be popped
2301   // from the global stack.
2302   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2303 
2304   if (!_cm->mark_stack_pop(buffer)) {
2305     return false;
2306   }
2307 
2308   // We did actually pop at least one entry.
2309   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2310     G1TaskQueueEntry task_entry = buffer[i];
2311     if (task_entry.is_null()) {
2312       break;
2313     }
2314     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2315     bool success = _task_queue->push(task_entry);
2316     // We only call this when the local queue is empty or under a
2317     // given target limit. So, we do not expect this push to fail.
2318     assert(success, "invariant");
2319   }
2320 
2321   // This operation was quite expensive, so decrease the limits
2322   decrease_limits();
2323   return true;
2324 }
2325 
2326 void G1CMTask::drain_local_queue(bool partially) {
2327   if (has_aborted()) {
2328     return;
2329   }
2330 
2331   // Decide what the target size is, depending whether we're going to
2332   // drain it partially (so that other tasks can steal if they run out
2333   // of things to do) or totally (at the very end).
2334   size_t target_size;
2335   if (partially) {
2336     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2337   } else {
2338     target_size = 0;
2339   }
2340 
2341   if (_task_queue->size() > target_size) {
2342     G1TaskQueueEntry entry;
2343     bool ret = _task_queue->pop_local(entry);
2344     while (ret) {
2345       scan_task_entry(entry);
2346       if (_task_queue->size() <= target_size || has_aborted()) {
2347         ret = false;
2348       } else {
2349         ret = _task_queue->pop_local(entry);
2350       }
2351     }
2352   }
2353 }
2354 
2355 void G1CMTask::drain_global_stack(bool partially) {
2356   if (has_aborted()) return;
2357 
2358   // We have a policy to drain the local queue before we attempt to
2359   // drain the global stack.
2360   assert(partially || _task_queue->size() == 0, "invariant");
2361 
2362   // Decide what the target size is, depending whether we're going to
2363   // drain it partially (so that other tasks can steal if they run out
2364   // of things to do) or totally (at the very end).
2365   // Notice that when draining the global mark stack partially, due to the racyness
2366   // of the mark stack size update we might in fact drop below the target. But,
2367   // this is not a problem.
2368   // In case of total draining, we simply process until the global mark stack is
2369   // totally empty, disregarding the size counter.
2370   if (partially) {
2371     size_t const target_size = _cm->partial_mark_stack_size_target();
2372     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2373       if (get_entries_from_global_stack()) {
2374         drain_local_queue(partially);
2375       }
2376     }
2377   } else {
2378     while (!has_aborted() && get_entries_from_global_stack()) {
2379       drain_local_queue(partially);
2380     }
2381   }
2382 }
2383 
2384 // SATB Queue has several assumptions on whether to call the par or
2385 // non-par versions of the methods. this is why some of the code is
2386 // replicated. We should really get rid of the single-threaded version
2387 // of the code to simplify things.
2388 void G1CMTask::drain_satb_buffers() {
2389   if (has_aborted()) return;
2390 
2391   // We set this so that the regular clock knows that we're in the
2392   // middle of draining buffers and doesn't set the abort flag when it
2393   // notices that SATB buffers are available for draining. It'd be
2394   // very counter productive if it did that. :-)
2395   _draining_satb_buffers = true;
2396 
2397   G1CMSATBBufferClosure satb_cl(this, _g1h);
2398   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2399 
2400   // This keeps claiming and applying the closure to completed buffers
2401   // until we run out of buffers or we need to abort.
2402   while (!has_aborted() &&
2403          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2404     regular_clock_call();
2405   }
2406 
2407   _draining_satb_buffers = false;
2408 
2409   assert(has_aborted() ||
2410          concurrent() ||
2411          satb_mq_set.completed_buffers_num() == 0, "invariant");
2412 
2413   // again, this was a potentially expensive operation, decrease the
2414   // limits to get the regular clock call early
2415   decrease_limits();
2416 }
2417 
2418 void G1CMTask::print_stats() {
2419   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2420                        _worker_id, _calls);
2421   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2422                        _elapsed_time_ms, _termination_time_ms);
2423   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2424                        _step_times_ms.num(), _step_times_ms.avg(),
2425                        _step_times_ms.sd());
2426   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2427                        _step_times_ms.maximum(), _step_times_ms.sum());
2428 }
2429 
2430 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2431   return _task_queues->steal(worker_id, hash_seed, task_entry);
2432 }
2433 
2434 /*****************************************************************************
2435 
2436     The do_marking_step(time_target_ms, ...) method is the building
2437     block of the parallel marking framework. It can be called in parallel
2438     with other invocations of do_marking_step() on different tasks
2439     (but only one per task, obviously) and concurrently with the
2440     mutator threads, or during remark, hence it eliminates the need
2441     for two versions of the code. When called during remark, it will
2442     pick up from where the task left off during the concurrent marking
2443     phase. Interestingly, tasks are also claimable during evacuation
2444     pauses too, since do_marking_step() ensures that it aborts before
2445     it needs to yield.
2446 
2447     The data structures that it uses to do marking work are the
2448     following:
2449 
2450       (1) Marking Bitmap. If there are gray objects that appear only
2451       on the bitmap (this happens either when dealing with an overflow
2452       or when the initial marking phase has simply marked the roots
2453       and didn't push them on the stack), then tasks claim heap
2454       regions whose bitmap they then scan to find gray objects. A
2455       global finger indicates where the end of the last claimed region
2456       is. A local finger indicates how far into the region a task has
2457       scanned. The two fingers are used to determine how to gray an
2458       object (i.e. whether simply marking it is OK, as it will be
2459       visited by a task in the future, or whether it needs to be also
2460       pushed on a stack).
2461 
2462       (2) Local Queue. The local queue of the task which is accessed
2463       reasonably efficiently by the task. Other tasks can steal from
2464       it when they run out of work. Throughout the marking phase, a
2465       task attempts to keep its local queue short but not totally
2466       empty, so that entries are available for stealing by other
2467       tasks. Only when there is no more work, a task will totally
2468       drain its local queue.
2469 
2470       (3) Global Mark Stack. This handles local queue overflow. During
2471       marking only sets of entries are moved between it and the local
2472       queues, as access to it requires a mutex and more fine-grain
2473       interaction with it which might cause contention. If it
2474       overflows, then the marking phase should restart and iterate
2475       over the bitmap to identify gray objects. Throughout the marking
2476       phase, tasks attempt to keep the global mark stack at a small
2477       length but not totally empty, so that entries are available for
2478       popping by other tasks. Only when there is no more work, tasks
2479       will totally drain the global mark stack.
2480 
2481       (4) SATB Buffer Queue. This is where completed SATB buffers are
2482       made available. Buffers are regularly removed from this queue
2483       and scanned for roots, so that the queue doesn't get too
2484       long. During remark, all completed buffers are processed, as
2485       well as the filled in parts of any uncompleted buffers.
2486 
2487     The do_marking_step() method tries to abort when the time target
2488     has been reached. There are a few other cases when the
2489     do_marking_step() method also aborts:
2490 
2491       (1) When the marking phase has been aborted (after a Full GC).
2492 
2493       (2) When a global overflow (on the global stack) has been
2494       triggered. Before the task aborts, it will actually sync up with
2495       the other tasks to ensure that all the marking data structures
2496       (local queues, stacks, fingers etc.)  are re-initialized so that
2497       when do_marking_step() completes, the marking phase can
2498       immediately restart.
2499 
2500       (3) When enough completed SATB buffers are available. The
2501       do_marking_step() method only tries to drain SATB buffers right
2502       at the beginning. So, if enough buffers are available, the
2503       marking step aborts and the SATB buffers are processed at
2504       the beginning of the next invocation.
2505 
2506       (4) To yield. when we have to yield then we abort and yield
2507       right at the end of do_marking_step(). This saves us from a lot
2508       of hassle as, by yielding we might allow a Full GC. If this
2509       happens then objects will be compacted underneath our feet, the
2510       heap might shrink, etc. We save checking for this by just
2511       aborting and doing the yield right at the end.
2512 
2513     From the above it follows that the do_marking_step() method should
2514     be called in a loop (or, otherwise, regularly) until it completes.
2515 
2516     If a marking step completes without its has_aborted() flag being
2517     true, it means it has completed the current marking phase (and
2518     also all other marking tasks have done so and have all synced up).
2519 
2520     A method called regular_clock_call() is invoked "regularly" (in
2521     sub ms intervals) throughout marking. It is this clock method that
2522     checks all the abort conditions which were mentioned above and
2523     decides when the task should abort. A work-based scheme is used to
2524     trigger this clock method: when the number of object words the
2525     marking phase has scanned or the number of references the marking
2526     phase has visited reach a given limit. Additional invocations to
2527     the method clock have been planted in a few other strategic places
2528     too. The initial reason for the clock method was to avoid calling
2529     vtime too regularly, as it is quite expensive. So, once it was in
2530     place, it was natural to piggy-back all the other conditions on it
2531     too and not constantly check them throughout the code.
2532 
2533     If do_termination is true then do_marking_step will enter its
2534     termination protocol.
2535 
2536     The value of is_serial must be true when do_marking_step is being
2537     called serially (i.e. by the VMThread) and do_marking_step should
2538     skip any synchronization in the termination and overflow code.
2539     Examples include the serial remark code and the serial reference
2540     processing closures.
2541 
2542     The value of is_serial must be false when do_marking_step is
2543     being called by any of the worker threads in a work gang.
2544     Examples include the concurrent marking code (CMMarkingTask),
2545     the MT remark code, and the MT reference processing closures.
2546 
2547  *****************************************************************************/
2548 
2549 void G1CMTask::do_marking_step(double time_target_ms,
2550                                bool do_termination,
2551                                bool is_serial) {
2552   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2553   assert(concurrent() == _cm->concurrent(), "they should be the same");
2554 
2555   G1Policy* g1_policy = _g1h->g1_policy();
2556   assert(_task_queues != NULL, "invariant");
2557   assert(_task_queue != NULL, "invariant");
2558   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2559 
2560   assert(!_claimed,
2561          "only one thread should claim this task at any one time");
2562 
2563   // OK, this doesn't safeguard again all possible scenarios, as it is
2564   // possible for two threads to set the _claimed flag at the same
2565   // time. But it is only for debugging purposes anyway and it will
2566   // catch most problems.
2567   _claimed = true;
2568 
2569   _start_time_ms = os::elapsedVTime() * 1000.0;
2570 
2571   // If do_stealing is true then do_marking_step will attempt to
2572   // steal work from the other G1CMTasks. It only makes sense to
2573   // enable stealing when the termination protocol is enabled
2574   // and do_marking_step() is not being called serially.
2575   bool do_stealing = do_termination && !is_serial;
2576 
2577   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2578   _time_target_ms = time_target_ms - diff_prediction_ms;
2579 
2580   // set up the variables that are used in the work-based scheme to
2581   // call the regular clock method
2582   _words_scanned = 0;
2583   _refs_reached  = 0;
2584   recalculate_limits();
2585 
2586   // clear all flags
2587   clear_has_aborted();
2588   _has_timed_out = false;
2589   _draining_satb_buffers = false;
2590 
2591   ++_calls;
2592 
2593   // Set up the bitmap and oop closures. Anything that uses them is
2594   // eventually called from this method, so it is OK to allocate these
2595   // statically.
2596   G1CMBitMapClosure bitmap_closure(this, _cm);
2597   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2598   set_cm_oop_closure(&cm_oop_closure);
2599 
2600   if (_cm->has_overflown()) {
2601     // This can happen if the mark stack overflows during a GC pause
2602     // and this task, after a yield point, restarts. We have to abort
2603     // as we need to get into the overflow protocol which happens
2604     // right at the end of this task.
2605     set_has_aborted();
2606   }
2607 
2608   // First drain any available SATB buffers. After this, we will not
2609   // look at SATB buffers before the next invocation of this method.
2610   // If enough completed SATB buffers are queued up, the regular clock
2611   // will abort this task so that it restarts.
2612   drain_satb_buffers();
2613   // ...then partially drain the local queue and the global stack
2614   drain_local_queue(true);
2615   drain_global_stack(true);
2616 
2617   do {
2618     if (!has_aborted() && _curr_region != NULL) {
2619       // This means that we're already holding on to a region.
2620       assert(_finger != NULL, "if region is not NULL, then the finger "
2621              "should not be NULL either");
2622 
2623       // We might have restarted this task after an evacuation pause
2624       // which might have evacuated the region we're holding on to
2625       // underneath our feet. Let's read its limit again to make sure
2626       // that we do not iterate over a region of the heap that
2627       // contains garbage (update_region_limit() will also move
2628       // _finger to the start of the region if it is found empty).
2629       update_region_limit();
2630       // We will start from _finger not from the start of the region,
2631       // as we might be restarting this task after aborting half-way
2632       // through scanning this region. In this case, _finger points to
2633       // the address where we last found a marked object. If this is a
2634       // fresh region, _finger points to start().
2635       MemRegion mr = MemRegion(_finger, _region_limit);
2636 
2637       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2638              "humongous regions should go around loop once only");
2639 
2640       // Some special cases:
2641       // If the memory region is empty, we can just give up the region.
2642       // If the current region is humongous then we only need to check
2643       // the bitmap for the bit associated with the start of the object,
2644       // scan the object if it's live, and give up the region.
2645       // Otherwise, let's iterate over the bitmap of the part of the region
2646       // that is left.
2647       // If the iteration is successful, give up the region.
2648       if (mr.is_empty()) {
2649         giveup_current_region();
2650         regular_clock_call();
2651       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2652         if (_nextMarkBitMap->is_marked(mr.start())) {
2653           // The object is marked - apply the closure
2654           bitmap_closure.do_addr(mr.start());
2655         }
2656         // Even if this task aborted while scanning the humongous object
2657         // we can (and should) give up the current region.
2658         giveup_current_region();
2659         regular_clock_call();
2660       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2661         giveup_current_region();
2662         regular_clock_call();
2663       } else {
2664         assert(has_aborted(), "currently the only way to do so");
2665         // The only way to abort the bitmap iteration is to return
2666         // false from the do_bit() method. However, inside the
2667         // do_bit() method we move the _finger to point to the
2668         // object currently being looked at. So, if we bail out, we
2669         // have definitely set _finger to something non-null.
2670         assert(_finger != NULL, "invariant");
2671 
2672         // Region iteration was actually aborted. So now _finger
2673         // points to the address of the object we last scanned. If we
2674         // leave it there, when we restart this task, we will rescan
2675         // the object. It is easy to avoid this. We move the finger by
2676         // enough to point to the next possible object header.
2677         assert(_finger < _region_limit, "invariant");
2678         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2679         // Check if bitmap iteration was aborted while scanning the last object
2680         if (new_finger >= _region_limit) {
2681           giveup_current_region();
2682         } else {
2683           move_finger_to(new_finger);
2684         }
2685       }
2686     }
2687     // At this point we have either completed iterating over the
2688     // region we were holding on to, or we have aborted.
2689 
2690     // We then partially drain the local queue and the global stack.
2691     // (Do we really need this?)
2692     drain_local_queue(true);
2693     drain_global_stack(true);
2694 
2695     // Read the note on the claim_region() method on why it might
2696     // return NULL with potentially more regions available for
2697     // claiming and why we have to check out_of_regions() to determine
2698     // whether we're done or not.
2699     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2700       // We are going to try to claim a new region. We should have
2701       // given up on the previous one.
2702       // Separated the asserts so that we know which one fires.
2703       assert(_curr_region  == NULL, "invariant");
2704       assert(_finger       == NULL, "invariant");
2705       assert(_region_limit == NULL, "invariant");
2706       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2707       if (claimed_region != NULL) {
2708         // Yes, we managed to claim one
2709         setup_for_region(claimed_region);
2710         assert(_curr_region == claimed_region, "invariant");
2711       }
2712       // It is important to call the regular clock here. It might take
2713       // a while to claim a region if, for example, we hit a large
2714       // block of empty regions. So we need to call the regular clock
2715       // method once round the loop to make sure it's called
2716       // frequently enough.
2717       regular_clock_call();
2718     }
2719 
2720     if (!has_aborted() && _curr_region == NULL) {
2721       assert(_cm->out_of_regions(),
2722              "at this point we should be out of regions");
2723     }
2724   } while ( _curr_region != NULL && !has_aborted());
2725 
2726   if (!has_aborted()) {
2727     // We cannot check whether the global stack is empty, since other
2728     // tasks might be pushing objects to it concurrently.
2729     assert(_cm->out_of_regions(),
2730            "at this point we should be out of regions");
2731     // Try to reduce the number of available SATB buffers so that
2732     // remark has less work to do.
2733     drain_satb_buffers();
2734   }
2735 
2736   // Since we've done everything else, we can now totally drain the
2737   // local queue and global stack.
2738   drain_local_queue(false);
2739   drain_global_stack(false);
2740 
2741   // Attempt at work stealing from other task's queues.
2742   if (do_stealing && !has_aborted()) {
2743     // We have not aborted. This means that we have finished all that
2744     // we could. Let's try to do some stealing...
2745 
2746     // We cannot check whether the global stack is empty, since other
2747     // tasks might be pushing objects to it concurrently.
2748     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2749            "only way to reach here");
2750     while (!has_aborted()) {
2751       G1TaskQueueEntry entry;
2752       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2753         scan_task_entry(entry);
2754 
2755         // And since we're towards the end, let's totally drain the
2756         // local queue and global stack.
2757         drain_local_queue(false);
2758         drain_global_stack(false);
2759       } else {
2760         break;
2761       }
2762     }
2763   }
2764 
2765   // We still haven't aborted. Now, let's try to get into the
2766   // termination protocol.
2767   if (do_termination && !has_aborted()) {
2768     // We cannot check whether the global stack is empty, since other
2769     // tasks might be concurrently pushing objects on it.
2770     // Separated the asserts so that we know which one fires.
2771     assert(_cm->out_of_regions(), "only way to reach here");
2772     assert(_task_queue->size() == 0, "only way to reach here");
2773     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2774 
2775     // The G1CMTask class also extends the TerminatorTerminator class,
2776     // hence its should_exit_termination() method will also decide
2777     // whether to exit the termination protocol or not.
2778     bool finished = (is_serial ||
2779                      _cm->terminator()->offer_termination(this));
2780     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2781     _termination_time_ms +=
2782       termination_end_time_ms - _termination_start_time_ms;
2783 
2784     if (finished) {
2785       // We're all done.
2786 
2787       if (_worker_id == 0) {
2788         // let's allow task 0 to do this
2789         if (concurrent()) {
2790           assert(_cm->concurrent_marking_in_progress(), "invariant");
2791           // we need to set this to false before the next
2792           // safepoint. This way we ensure that the marking phase
2793           // doesn't observe any more heap expansions.
2794           _cm->clear_concurrent_marking_in_progress();
2795         }
2796       }
2797 
2798       // We can now guarantee that the global stack is empty, since
2799       // all other tasks have finished. We separated the guarantees so
2800       // that, if a condition is false, we can immediately find out
2801       // which one.
2802       guarantee(_cm->out_of_regions(), "only way to reach here");
2803       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2804       guarantee(_task_queue->size() == 0, "only way to reach here");
2805       guarantee(!_cm->has_overflown(), "only way to reach here");
2806     } else {
2807       // Apparently there's more work to do. Let's abort this task. It
2808       // will restart it and we can hopefully find more things to do.
2809       set_has_aborted();
2810     }
2811   }
2812 
2813   // Mainly for debugging purposes to make sure that a pointer to the
2814   // closure which was statically allocated in this frame doesn't
2815   // escape it by accident.
2816   set_cm_oop_closure(NULL);
2817   double end_time_ms = os::elapsedVTime() * 1000.0;
2818   double elapsed_time_ms = end_time_ms - _start_time_ms;
2819   // Update the step history.
2820   _step_times_ms.add(elapsed_time_ms);
2821 
2822   if (has_aborted()) {
2823     // The task was aborted for some reason.
2824     if (_has_timed_out) {
2825       double diff_ms = elapsed_time_ms - _time_target_ms;
2826       // Keep statistics of how well we did with respect to hitting
2827       // our target only if we actually timed out (if we aborted for
2828       // other reasons, then the results might get skewed).
2829       _marking_step_diffs_ms.add(diff_ms);
2830     }
2831 
2832     if (_cm->has_overflown()) {
2833       // This is the interesting one. We aborted because a global
2834       // overflow was raised. This means we have to restart the
2835       // marking phase and start iterating over regions. However, in
2836       // order to do this we have to make sure that all tasks stop
2837       // what they are doing and re-initialize in a safe manner. We
2838       // will achieve this with the use of two barrier sync points.
2839 
2840       if (!is_serial) {
2841         // We only need to enter the sync barrier if being called
2842         // from a parallel context
2843         _cm->enter_first_sync_barrier(_worker_id);
2844 
2845         // When we exit this sync barrier we know that all tasks have
2846         // stopped doing marking work. So, it's now safe to
2847         // re-initialize our data structures. At the end of this method,
2848         // task 0 will clear the global data structures.
2849       }
2850 
2851       // We clear the local state of this task...
2852       clear_region_fields();
2853 
2854       if (!is_serial) {
2855         // ...and enter the second barrier.
2856         _cm->enter_second_sync_barrier(_worker_id);
2857       }
2858       // At this point, if we're during the concurrent phase of
2859       // marking, everything has been re-initialized and we're
2860       // ready to restart.
2861     }
2862   }
2863 
2864   _claimed = false;
2865 }
2866 
2867 G1CMTask::G1CMTask(uint worker_id,
2868                    G1ConcurrentMark* cm,
2869                    G1CMTaskQueue* task_queue,
2870                    G1CMTaskQueueSet* task_queues)
2871   : _g1h(G1CollectedHeap::heap()),
2872     _worker_id(worker_id), _cm(cm),
2873     _objArray_processor(this),
2874     _claimed(false),
2875     _nextMarkBitMap(NULL), _hash_seed(17),
2876     _task_queue(task_queue),
2877     _task_queues(task_queues),
2878     _cm_oop_closure(NULL) {
2879   guarantee(task_queue != NULL, "invariant");
2880   guarantee(task_queues != NULL, "invariant");
2881 
2882   _marking_step_diffs_ms.add(0.5);
2883 }
2884 
2885 // These are formatting macros that are used below to ensure
2886 // consistent formatting. The *_H_* versions are used to format the
2887 // header for a particular value and they should be kept consistent
2888 // with the corresponding macro. Also note that most of the macros add
2889 // the necessary white space (as a prefix) which makes them a bit
2890 // easier to compose.
2891 
2892 // All the output lines are prefixed with this string to be able to
2893 // identify them easily in a large log file.
2894 #define G1PPRL_LINE_PREFIX            "###"
2895 
2896 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2897 #ifdef _LP64
2898 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2899 #else // _LP64
2900 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2901 #endif // _LP64
2902 
2903 // For per-region info
2904 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2905 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2906 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2907 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2908 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2909 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2910 
2911 // For summary info
2912 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2913 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2914 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2915 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2916 
2917 G1PrintRegionLivenessInfoClosure::
2918 G1PrintRegionLivenessInfoClosure(const char* phase_name)
2919   : _total_used_bytes(0), _total_capacity_bytes(0),
2920     _total_prev_live_bytes(0), _total_next_live_bytes(0),
2921     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
2922   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2923   MemRegion g1_reserved = g1h->g1_reserved();
2924   double now = os::elapsedTime();
2925 
2926   // Print the header of the output.
2927   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2928   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2929                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2930                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2931                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2932                           HeapRegion::GrainBytes);
2933   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2934   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2935                           G1PPRL_TYPE_H_FORMAT
2936                           G1PPRL_ADDR_BASE_H_FORMAT
2937                           G1PPRL_BYTE_H_FORMAT
2938                           G1PPRL_BYTE_H_FORMAT
2939                           G1PPRL_BYTE_H_FORMAT
2940                           G1PPRL_DOUBLE_H_FORMAT
2941                           G1PPRL_BYTE_H_FORMAT
2942                           G1PPRL_BYTE_H_FORMAT,
2943                           "type", "address-range",
2944                           "used", "prev-live", "next-live", "gc-eff",
2945                           "remset", "code-roots");
2946   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2947                           G1PPRL_TYPE_H_FORMAT
2948                           G1PPRL_ADDR_BASE_H_FORMAT
2949                           G1PPRL_BYTE_H_FORMAT
2950                           G1PPRL_BYTE_H_FORMAT
2951                           G1PPRL_BYTE_H_FORMAT
2952                           G1PPRL_DOUBLE_H_FORMAT
2953                           G1PPRL_BYTE_H_FORMAT
2954                           G1PPRL_BYTE_H_FORMAT,
2955                           "", "",
2956                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2957                           "(bytes)", "(bytes)");
2958 }
2959 
2960 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
2961   const char* type       = r->get_type_str();
2962   HeapWord* bottom       = r->bottom();
2963   HeapWord* end          = r->end();
2964   size_t capacity_bytes  = r->capacity();
2965   size_t used_bytes      = r->used();
2966   size_t prev_live_bytes = r->live_bytes();
2967   size_t next_live_bytes = r->next_live_bytes();
2968   double gc_eff          = r->gc_efficiency();
2969   size_t remset_bytes    = r->rem_set()->mem_size();
2970   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2971 
2972   _total_used_bytes      += used_bytes;
2973   _total_capacity_bytes  += capacity_bytes;
2974   _total_prev_live_bytes += prev_live_bytes;
2975   _total_next_live_bytes += next_live_bytes;
2976   _total_remset_bytes    += remset_bytes;
2977   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2978 
2979   // Print a line for this particular region.
2980   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2981                           G1PPRL_TYPE_FORMAT
2982                           G1PPRL_ADDR_BASE_FORMAT
2983                           G1PPRL_BYTE_FORMAT
2984                           G1PPRL_BYTE_FORMAT
2985                           G1PPRL_BYTE_FORMAT
2986                           G1PPRL_DOUBLE_FORMAT
2987                           G1PPRL_BYTE_FORMAT
2988                           G1PPRL_BYTE_FORMAT,
2989                           type, p2i(bottom), p2i(end),
2990                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2991                           remset_bytes, strong_code_roots_bytes);
2992 
2993   return false;
2994 }
2995 
2996 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2997   // add static memory usages to remembered set sizes
2998   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2999   // Print the footer of the output.
3000   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3001   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3002                          " SUMMARY"
3003                          G1PPRL_SUM_MB_FORMAT("capacity")
3004                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3005                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3006                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3007                          G1PPRL_SUM_MB_FORMAT("remset")
3008                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3009                          bytes_to_mb(_total_capacity_bytes),
3010                          bytes_to_mb(_total_used_bytes),
3011                          perc(_total_used_bytes, _total_capacity_bytes),
3012                          bytes_to_mb(_total_prev_live_bytes),
3013                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3014                          bytes_to_mb(_total_next_live_bytes),
3015                          perc(_total_next_live_bytes, _total_capacity_bytes),
3016                          bytes_to_mb(_total_remset_bytes),
3017                          bytes_to_mb(_total_strong_code_roots_bytes));
3018 }