New src/hotspot/share/gc/g1/g1ConcurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "code/codeCache.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc/g1/g1CollectorState.hpp"
  31 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  32 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  33 #include "gc/g1/g1DirtyCardQueue.hpp"
  34 #include "gc/g1/g1HeapVerifier.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/g1ThreadLocalData.hpp"
  40 #include "gc/g1/g1Trace.hpp"
  41 #include "gc/g1/heapRegion.inline.hpp"
  42 #include "gc/g1/heapRegionRemSet.hpp"
  43 #include "gc/g1/heapRegionSet.inline.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTraceTime.inline.hpp"
  47 #include "gc/shared/gcVMOperations.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/suspendibleThreadSet.hpp"
  52 #include "gc/shared/taskqueue.inline.hpp"
  53 #include "gc/shared/weakProcessor.inline.hpp"
  54 #include "gc/shared/workerPolicy.hpp"
  55 #include "include/jvm.h"
  56 #include "logging/log.hpp"
  57 #include "memory/allocation.hpp"
  58 #include "memory/resourceArea.hpp"
  59 #include "memory/universe.hpp"
  60 #include "oops/access.inline.hpp"
  61 #include "oops/oop.inline.hpp"
  62 #include "runtime/atomic.hpp"
  63 #include "runtime/handles.inline.hpp"
  64 #include "runtime/mutexLocker.inline.hpp"
  65 #include "runtime/java.hpp"
  66 #include "runtime/prefetch.inline.hpp"
  67 #include "services/memTracker.hpp"
  68 #include "utilities/align.hpp"
  69 #include "utilities/growableArray.hpp"
  70 
  71 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  72   assert(addr < _cm->finger(), "invariant");
  73   assert(addr >= _task->finger(), "invariant");
  74 
  75   // We move that task's local finger along.
  76   _task->move_finger_to(addr);
  77 
  78   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  79   // we only partially drain the local queue and global stack
  80   _task->drain_local_queue(true);
  81   _task->drain_global_stack(true);
  82 
  83   // if the has_aborted flag has been raised, we need to bail out of
  84   // the iteration
  85   return !_task->has_aborted();
  86 }
  87 
  88 G1CMMarkStack::G1CMMarkStack() :
  89   _max_chunk_capacity(0),
  90   _base(NULL),
  91   _chunk_capacity(0) {
  92   set_empty();
  93 }
  94 
  95 bool G1CMMarkStack::resize(size_t new_capacity) {
  96   assert(is_empty(), "Only resize when stack is empty.");
  97   assert(new_capacity <= _max_chunk_capacity,
  98          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  99 
 100   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
 101 
 102   if (new_base == NULL) {
 103     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 104     return false;
 105   }
 106   // Release old mapping.
 107   if (_base != NULL) {
 108     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 109   }
 110 
 111   _base = new_base;
 112   _chunk_capacity = new_capacity;
 113   set_empty();
 114 
 115   return true;
 116 }
 117 
 118 size_t G1CMMarkStack::capacity_alignment() {
 119   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 120 }
 121 
 122 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 123   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 124 
 125   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 126 
 127   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 128   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 129 
 130   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 131             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 132             _max_chunk_capacity,
 133             initial_chunk_capacity);
 134 
 135   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 136                 initial_chunk_capacity, _max_chunk_capacity);
 137 
 138   return resize(initial_chunk_capacity);
 139 }
 140 
 141 void G1CMMarkStack::expand() {
 142   if (_chunk_capacity == _max_chunk_capacity) {
 143     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 144     return;
 145   }
 146   size_t old_capacity = _chunk_capacity;
 147   // Double capacity if possible
 148   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 149 
 150   if (resize(new_capacity)) {
 151     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 152                   old_capacity, new_capacity);
 153   } else {
 154     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 155                     old_capacity, new_capacity);
 156   }
 157 }
 158 
 159 G1CMMarkStack::~G1CMMarkStack() {
 160   if (_base != NULL) {
 161     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 162   }
 163 }
 164 
 165 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 166   elem->next = *list;
 167   *list = elem;
 168 }
 169 
 170 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 171   MutexLocker x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 172   add_chunk_to_list(&_chunk_list, elem);
 173   _chunks_in_chunk_list++;
 174 }
 175 
 176 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 177   MutexLocker x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 178   add_chunk_to_list(&_free_list, elem);
 179 }
 180 
 181 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 182   TaskQueueEntryChunk* result = *list;
 183   if (result != NULL) {
 184     *list = (*list)->next;
 185   }
 186   return result;
 187 }
 188 
 189 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 190   MutexLocker x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 191   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 192   if (result != NULL) {
 193     _chunks_in_chunk_list--;
 194   }
 195   return result;
 196 }
 197 
 198 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 199   MutexLocker x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 200   return remove_chunk_from_list(&_free_list);
 201 }
 202 
 203 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 204   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 205   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 206   // wraparound of _hwm.
 207   if (_hwm >= _chunk_capacity) {
 208     return NULL;
 209   }
 210 
 211   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 212   if (cur_idx >= _chunk_capacity) {
 213     return NULL;
 214   }
 215 
 216   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 217   result->next = NULL;
 218   return result;
 219 }
 220 
 221 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 222   // Get a new chunk.
 223   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 224 
 225   if (new_chunk == NULL) {
 226     // Did not get a chunk from the free list. Allocate from backing memory.
 227     new_chunk = allocate_new_chunk();
 228 
 229     if (new_chunk == NULL) {
 230       return false;
 231     }
 232   }
 233 
 234   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 235 
 236   add_chunk_to_chunk_list(new_chunk);
 237 
 238   return true;
 239 }
 240 
 241 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 242   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 243 
 244   if (cur == NULL) {
 245     return false;
 246   }
 247 
 248   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 249 
 250   add_chunk_to_free_list(cur);
 251   return true;
 252 }
 253 
 254 void G1CMMarkStack::set_empty() {
 255   _chunks_in_chunk_list = 0;
 256   _hwm = 0;
 257   _chunk_list = NULL;
 258   _free_list = NULL;
 259 }
 260 
 261 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 262     _root_regions(NULL),
 263     _max_regions(max_regions),
 264     _num_root_regions(0),
 265     _claimed_root_regions(0),
 266     _scan_in_progress(false),
 267     _should_abort(false) {
 268   _root_regions = new MemRegion[_max_regions];
 269   if (_root_regions == NULL) {
 270     vm_exit_during_initialization("Could not allocate root MemRegion set.");
 271   }
 272 }
 273 
 274 G1CMRootMemRegions::~G1CMRootMemRegions() {
 275   delete[] _root_regions;
 276 }
 277 
 278 void G1CMRootMemRegions::reset() {
 279   _num_root_regions = 0;
 280 }
 281 
 282 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 283   assert_at_safepoint();
 284   size_t idx = Atomic::add((size_t)1, &_num_root_regions) - 1;
 285   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space " SIZE_FORMAT, _max_regions);
 286   assert(start != NULL && end != NULL && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 287          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 288   _root_regions[idx].set_start(start);
 289   _root_regions[idx].set_end(end);
 290 }
 291 
 292 void G1CMRootMemRegions::prepare_for_scan() {
 293   assert(!scan_in_progress(), "pre-condition");
 294 
 295   _scan_in_progress = _num_root_regions > 0;
 296 
 297   _claimed_root_regions = 0;
 298   _should_abort = false;
 299 }
 300 
 301 const MemRegion* G1CMRootMemRegions::claim_next() {
 302   if (_should_abort) {
 303     // If someone has set the should_abort flag, we return NULL to
 304     // force the caller to bail out of their loop.
 305     return NULL;
 306   }
 307 
 308   if (_claimed_root_regions >= _num_root_regions) {
 309     return NULL;
 310   }
 311 
 312   size_t claimed_index = Atomic::add((size_t)1, &_claimed_root_regions) - 1;
 313   if (claimed_index < _num_root_regions) {
 314     return &_root_regions[claimed_index];
 315   }
 316   return NULL;
 317 }
 318 
 319 uint G1CMRootMemRegions::num_root_regions() const {
 320   return (uint)_num_root_regions;
 321 }
 322 
 323 void G1CMRootMemRegions::notify_scan_done() {
 324   MutexLocker x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 325   _scan_in_progress = false;
 326   RootRegionScan_lock->notify_all();
 327 }
 328 
 329 void G1CMRootMemRegions::cancel_scan() {
 330   notify_scan_done();
 331 }
 332 
 333 void G1CMRootMemRegions::scan_finished() {
 334   assert(scan_in_progress(), "pre-condition");
 335 
 336   if (!_should_abort) {
 337     assert(_claimed_root_regions >= num_root_regions(),
 338            "we should have claimed all root regions, claimed " SIZE_FORMAT ", length = %u",
 339            _claimed_root_regions, num_root_regions());
 340   }
 341 
 342   notify_scan_done();
 343 }
 344 
 345 bool G1CMRootMemRegions::wait_until_scan_finished() {
 346   if (!scan_in_progress()) {
 347     return false;
 348   }
 349 
 350   {
 351     MonitorLocker ml(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 352     while (scan_in_progress()) {
 353       ml.wait();
 354     }
 355   }
 356   return true;
 357 }
 358 
 359 // Returns the maximum number of workers to be used in a concurrent
 360 // phase based on the number of GC workers being used in a STW
 361 // phase.
 362 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 363   return MAX2((num_gc_workers + 2) / 4, 1U);
 364 }
 365 
 366 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 367                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 368                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 369   // _cm_thread set inside the constructor
 370   _g1h(g1h),
 371   _completed_initialization(false),
 372 
 373   _mark_bitmap_1(),
 374   _mark_bitmap_2(),
 375   _prev_mark_bitmap(&_mark_bitmap_1),
 376   _next_mark_bitmap(&_mark_bitmap_2),
 377 
 378   _heap(_g1h->reserved_region()),
 379 
 380   _root_regions(_g1h->max_regions()),
 381 
 382   _global_mark_stack(),
 383 
 384   // _finger set in set_non_marking_state
 385 
 386   _worker_id_offset(G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 387   _max_num_tasks(ParallelGCThreads),
 388   // _num_active_tasks set in set_non_marking_state()
 389   // _tasks set inside the constructor
 390 
 391   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 392   _terminator((int) _max_num_tasks, _task_queues),
 393 
 394   _first_overflow_barrier_sync(),
 395   _second_overflow_barrier_sync(),
 396 
 397   _has_overflown(false),
 398   _concurrent(false),
 399   _has_aborted(false),
 400   _restart_for_overflow(false),
 401   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 402   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 403 
 404   // _verbose_level set below
 405 
 406   _init_times(),
 407   _remark_times(),
 408   _remark_mark_times(),
 409   _remark_weak_ref_times(),
 410   _cleanup_times(),
 411   _total_cleanup_time(0.0),
 412 
 413   _accum_task_vtime(NULL),
 414 
 415   _concurrent_workers(NULL),
 416   _num_concurrent_workers(0),
 417   _max_concurrent_workers(0),
 418 
 419   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 420   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 421 {
 422   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 423   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 424 
 425   // Create & start ConcurrentMark thread.
 426   _cm_thread = new G1ConcurrentMarkThread(this);
 427   if (_cm_thread->osthread() == NULL) {
 428     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 429   }
 430 
 431   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 432 
 433   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 434     // Calculate the number of concurrent worker threads by scaling
 435     // the number of parallel GC threads.
 436     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 437     FLAG_SET_ERGO(ConcGCThreads, marking_thread_num);
 438   }
 439 
 440   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 441   if (ConcGCThreads > ParallelGCThreads) {
 442     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 443                     ConcGCThreads, ParallelGCThreads);
 444     return;
 445   }
 446 
 447   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 448   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 449 
 450   _num_concurrent_workers = ConcGCThreads;
 451   _max_concurrent_workers = _num_concurrent_workers;
 452 
 453   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 454   _concurrent_workers->initialize_workers();
 455 
 456   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 457     size_t mark_stack_size =
 458       MIN2(MarkStackSizeMax,
 459           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 460     // Verify that the calculated value for MarkStackSize is in range.
 461     // It would be nice to use the private utility routine from Arguments.
 462     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 463       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 464                       "must be between 1 and " SIZE_FORMAT,
 465                       mark_stack_size, MarkStackSizeMax);
 466       return;
 467     }
 468     FLAG_SET_ERGO(MarkStackSize, mark_stack_size);
 469   } else {
 470     // Verify MarkStackSize is in range.
 471     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 472       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 473         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 474           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 475                           "must be between 1 and " SIZE_FORMAT,
 476                           MarkStackSize, MarkStackSizeMax);
 477           return;
 478         }
 479       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 480         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 481           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 482                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 483                           MarkStackSize, MarkStackSizeMax);
 484           return;
 485         }
 486       }
 487     }
 488   }
 489 
 490   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 491     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 492   }
 493 
 494   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 495   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 496 
 497   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 498   _num_active_tasks = _max_num_tasks;
 499 
 500   for (uint i = 0; i < _max_num_tasks; ++i) {
 501     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 502     task_queue->initialize();
 503     _task_queues->register_queue(i, task_queue);
 504 
 505     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 506 
 507     _accum_task_vtime[i] = 0.0;
 508   }
 509 
 510   reset_at_marking_complete();
 511   _completed_initialization = true;
 512 }
 513 
 514 void G1ConcurrentMark::reset() {
 515   _has_aborted = false;
 516 
 517   reset_marking_for_restart();
 518 
 519   // Reset all tasks, since different phases will use different number of active
 520   // threads. So, it's easiest to have all of them ready.
 521   for (uint i = 0; i < _max_num_tasks; ++i) {
 522     _tasks[i]->reset(_next_mark_bitmap);
 523   }
 524 
 525   uint max_regions = _g1h->max_regions();
 526   for (uint i = 0; i < max_regions; i++) {
 527     _top_at_rebuild_starts[i] = NULL;
 528     _region_mark_stats[i].clear();
 529   }
 530 }
 531 
 532 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 533   for (uint j = 0; j < _max_num_tasks; ++j) {
 534     _tasks[j]->clear_mark_stats_cache(region_idx);
 535   }
 536   _top_at_rebuild_starts[region_idx] = NULL;
 537   _region_mark_stats[region_idx].clear();
 538 }
 539 
 540 void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 541   uint const region_idx = r->hrm_index();
 542   if (r->is_humongous()) {
 543     assert(r->is_starts_humongous(), "Got humongous continues region here");
 544     uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 545     for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 546       clear_statistics_in_region(j);
 547     }
 548   } else {
 549     clear_statistics_in_region(region_idx);
 550   }
 551 }
 552 
 553 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) {
 554   if (bitmap->is_marked(addr)) {
 555     bitmap->clear(addr);
 556   }
 557 }
 558 
 559 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 560   assert_at_safepoint_on_vm_thread();
 561 
 562   // Need to clear all mark bits of the humongous object.
 563   clear_mark_if_set(_prev_mark_bitmap, r->bottom());
 564   clear_mark_if_set(_next_mark_bitmap, r->bottom());
 565 
 566   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 567     return;
 568   }
 569 
 570   // Clear any statistics about the region gathered so far.
 571   clear_statistics(r);
 572 }
 573 
 574 void G1ConcurrentMark::reset_marking_for_restart() {
 575   _global_mark_stack.set_empty();
 576 
 577   // Expand the marking stack, if we have to and if we can.
 578   if (has_overflown()) {
 579     _global_mark_stack.expand();
 580 
 581     uint max_regions = _g1h->max_regions();
 582     for (uint i = 0; i < max_regions; i++) {
 583       _region_mark_stats[i].clear_during_overflow();
 584     }
 585   }
 586 
 587   clear_has_overflown();
 588   _finger = _heap.start();
 589 
 590   for (uint i = 0; i < _max_num_tasks; ++i) {
 591     G1CMTaskQueue* queue = _task_queues->queue(i);
 592     queue->set_empty();
 593   }
 594 }
 595 
 596 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 597   assert(active_tasks <= _max_num_tasks, "we should not have more");
 598 
 599   _num_active_tasks = active_tasks;
 600   // Need to update the three data structures below according to the
 601   // number of active threads for this phase.
 602   _terminator.terminator()->reset_for_reuse((int) active_tasks);
 603   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 604   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 605 }
 606 
 607 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 608   set_concurrency(active_tasks);
 609 
 610   _concurrent = concurrent;
 611 
 612   if (!concurrent) {
 613     // At this point we should be in a STW phase, and completed marking.
 614     assert_at_safepoint_on_vm_thread();
 615     assert(out_of_regions(),
 616            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 617            p2i(_finger), p2i(_heap.end()));
 618   }
 619 }
 620 
 621 void G1ConcurrentMark::reset_at_marking_complete() {
 622   // We set the global marking state to some default values when we're
 623   // not doing marking.
 624   reset_marking_for_restart();
 625   _num_active_tasks = 0;
 626 }
 627 
 628 G1ConcurrentMark::~G1ConcurrentMark() {
 629   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 630   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 631   // The G1ConcurrentMark instance is never freed.
 632   ShouldNotReachHere();
 633 }
 634 
 635 class G1ClearBitMapTask : public AbstractGangTask {
 636 public:
 637   static size_t chunk_size() { return M; }
 638 
 639 private:
 640   // Heap region closure used for clearing the given mark bitmap.
 641   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 642   private:
 643     G1CMBitMap* _bitmap;
 644     G1ConcurrentMark* _cm;
 645   public:
 646     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _bitmap(bitmap), _cm(cm) {
 647     }
 648 
 649     virtual bool do_heap_region(HeapRegion* r) {
 650       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 651 
 652       HeapWord* cur = r->bottom();
 653       HeapWord* const end = r->end();
 654 
 655       while (cur < end) {
 656         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 657         _bitmap->clear_range(mr);
 658 
 659         cur += chunk_size_in_words;
 660 
 661         // Abort iteration if after yielding the marking has been aborted.
 662         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 663           return true;
 664         }
 665         // Repeat the asserts from before the start of the closure. We will do them
 666         // as asserts here to minimize their overhead on the product. However, we
 667         // will have them as guarantees at the beginning / end of the bitmap
 668         // clearing to get some checking in the product.
 669         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 670         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 671       }
 672       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 673 
 674       return false;
 675     }
 676   };
 677 
 678   G1ClearBitmapHRClosure _cl;
 679   HeapRegionClaimer _hr_claimer;
 680   bool _suspendible; // If the task is suspendible, workers must join the STS.
 681 
 682 public:
 683   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 684     AbstractGangTask("G1 Clear Bitmap"),
 685     _cl(bitmap, suspendible ? cm : NULL),
 686     _hr_claimer(n_workers),
 687     _suspendible(suspendible)
 688   { }
 689 
 690   void work(uint worker_id) {
 691     SuspendibleThreadSetJoiner sts_join(_suspendible);
 692     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 693   }
 694 
 695   bool is_complete() {
 696     return _cl.is_complete();
 697   }
 698 };
 699 
 700 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 701   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 702 
 703   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 704   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 705 
 706   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 707 
 708   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 709 
 710   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 711   workers->run_task(&cl, num_workers);
 712   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 713 }
 714 
 715 void G1ConcurrentMark::cleanup_for_next_mark() {
 716   // Make sure that the concurrent mark thread looks to still be in
 717   // the current cycle.
 718   guarantee(cm_thread()->during_cycle(), "invariant");
 719 
 720   // We are finishing up the current cycle by clearing the next
 721   // marking bitmap and getting it ready for the next cycle. During
 722   // this time no other cycle can start. So, let's make sure that this
 723   // is the case.
 724   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 725 
 726   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 727 
 728   // Repeat the asserts from above.
 729   guarantee(cm_thread()->during_cycle(), "invariant");
 730   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 731 }
 732 
 733 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 734   assert_at_safepoint_on_vm_thread();
 735   clear_bitmap(_prev_mark_bitmap, workers, false);
 736 }
 737 
 738 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 739 public:
 740   bool do_heap_region(HeapRegion* r) {
 741     r->note_start_of_marking();
 742     return false;
 743   }
 744 };
 745 
 746 void G1ConcurrentMark::pre_initial_mark() {
 747   assert_at_safepoint_on_vm_thread();
 748 
 749   // Reset marking state.
 750   reset();
 751 
 752   // For each region note start of marking.
 753   NoteStartOfMarkHRClosure startcl;
 754   _g1h->heap_region_iterate(&startcl);
 755 
 756   _root_regions.reset();
 757 }
 758 
 759 
 760 void G1ConcurrentMark::post_initial_mark() {
 761   // Start Concurrent Marking weak-reference discovery.
 762   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 763   // enable ("weak") refs discovery
 764   rp->enable_discovery();
 765   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 766 
 767   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 768   // This is the start of  the marking cycle, we're expected all
 769   // threads to have SATB queues with active set to false.
 770   satb_mq_set.set_active_all_threads(true, /* new active value */
 771                                      false /* expected_active */);
 772 
 773   _root_regions.prepare_for_scan();
 774 
 775   // update_g1_committed() will be called at the end of an evac pause
 776   // when marking is on. So, it's also called at the end of the
 777   // initial-mark pause to update the heap end, if the heap expands
 778   // during it. No need to call it here.
 779 }
 780 
 781 /*
 782  * Notice that in the next two methods, we actually leave the STS
 783  * during the barrier sync and join it immediately afterwards. If we
 784  * do not do this, the following deadlock can occur: one thread could
 785  * be in the barrier sync code, waiting for the other thread to also
 786  * sync up, whereas another one could be trying to yield, while also
 787  * waiting for the other threads to sync up too.
 788  *
 789  * Note, however, that this code is also used during remark and in
 790  * this case we should not attempt to leave / enter the STS, otherwise
 791  * we'll either hit an assert (debug / fastdebug) or deadlock
 792  * (product). So we should only leave / enter the STS if we are
 793  * operating concurrently.
 794  *
 795  * Because the thread that does the sync barrier has left the STS, it
 796  * is possible to be suspended for a Full GC or an evacuation pause
 797  * could occur. This is actually safe, since the entering the sync
 798  * barrier is one of the last things do_marking_step() does, and it
 799  * doesn't manipulate any data structures afterwards.
 800  */
 801 
 802 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 803   bool barrier_aborted;
 804   {
 805     SuspendibleThreadSetLeaver sts_leave(concurrent());
 806     barrier_aborted = !_first_overflow_barrier_sync.enter();
 807   }
 808 
 809   // at this point everyone should have synced up and not be doing any
 810   // more work
 811 
 812   if (barrier_aborted) {
 813     // If the barrier aborted we ignore the overflow condition and
 814     // just abort the whole marking phase as quickly as possible.
 815     return;
 816   }
 817 }
 818 
 819 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 820   SuspendibleThreadSetLeaver sts_leave(concurrent());
 821   _second_overflow_barrier_sync.enter();
 822 
 823   // at this point everything should be re-initialized and ready to go
 824 }
 825 
 826 class G1CMConcurrentMarkingTask : public AbstractGangTask {
 827   G1ConcurrentMark*     _cm;
 828 
 829 public:
 830   void work(uint worker_id) {
 831     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 832     ResourceMark rm;
 833 
 834     double start_vtime = os::elapsedVTime();
 835 
 836     {
 837       SuspendibleThreadSetJoiner sts_join;
 838 
 839       assert(worker_id < _cm->active_tasks(), "invariant");
 840 
 841       G1CMTask* task = _cm->task(worker_id);
 842       task->record_start_time();
 843       if (!_cm->has_aborted()) {
 844         do {
 845           task->do_marking_step(G1ConcMarkStepDurationMillis,
 846                                 true  /* do_termination */,
 847                                 false /* is_serial*/);
 848 
 849           _cm->do_yield_check();
 850         } while (!_cm->has_aborted() && task->has_aborted());
 851       }
 852       task->record_end_time();
 853       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 854     }
 855 
 856     double end_vtime = os::elapsedVTime();
 857     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 858   }
 859 
 860   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 861       AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 862 
 863   ~G1CMConcurrentMarkingTask() { }
 864 };
 865 
 866 uint G1ConcurrentMark::calc_active_marking_workers() {
 867   uint result = 0;
 868   if (!UseDynamicNumberOfGCThreads ||
 869       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 870        !ForceDynamicNumberOfGCThreads)) {
 871     result = _max_concurrent_workers;
 872   } else {
 873     result =
 874       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
 875                                                 1, /* Minimum workers */
 876                                                 _num_concurrent_workers,
 877                                                 Threads::number_of_non_daemon_threads());
 878     // Don't scale the result down by scale_concurrent_workers() because
 879     // that scaling has already gone into "_max_concurrent_workers".
 880   }
 881   assert(result > 0 && result <= _max_concurrent_workers,
 882          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 883          _max_concurrent_workers, result);
 884   return result;
 885 }
 886 
 887 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
 888 #ifdef ASSERT
 889   HeapWord* last = region->last();
 890   HeapRegion* hr = _g1h->heap_region_containing(last);
 891   assert(hr->is_old() || hr->next_top_at_mark_start() == hr->bottom(),
 892          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
 893   assert(hr->next_top_at_mark_start() == region->start(),
 894          "MemRegion start should be equal to nTAMS");
 895 #endif
 896 
 897   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 898 
 899   const uintx interval = PrefetchScanIntervalInBytes;
 900   HeapWord* curr = region->start();
 901   const HeapWord* end = region->end();
 902   while (curr < end) {
 903     Prefetch::read(curr, interval);
 904     oop obj = oop(curr);
 905     int size = obj->oop_iterate_size(&cl);
 906     assert(size == obj->size(), "sanity");
 907     curr += size;
 908   }
 909 }
 910 
 911 class G1CMRootRegionScanTask : public AbstractGangTask {
 912   G1ConcurrentMark* _cm;
 913 public:
 914   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 915     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 916 
 917   void work(uint worker_id) {
 918     assert(Thread::current()->is_ConcurrentGC_thread(),
 919            "this should only be done by a conc GC thread");
 920 
 921     G1CMRootMemRegions* root_regions = _cm->root_regions();
 922     const MemRegion* region = root_regions->claim_next();
 923     while (region != NULL) {
 924       _cm->scan_root_region(region, worker_id);
 925       region = root_regions->claim_next();
 926     }
 927   }
 928 };
 929 
 930 void G1ConcurrentMark::scan_root_regions() {
 931   // scan_in_progress() will have been set to true only if there was
 932   // at least one root region to scan. So, if it's false, we
 933   // should not attempt to do any further work.
 934   if (root_regions()->scan_in_progress()) {
 935     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 936 
 937     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 938                                    // We distribute work on a per-region basis, so starting
 939                                    // more threads than that is useless.
 940                                    root_regions()->num_root_regions());
 941     assert(_num_concurrent_workers <= _max_concurrent_workers,
 942            "Maximum number of marking threads exceeded");
 943 
 944     G1CMRootRegionScanTask task(this);
 945     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 946                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 947     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 948 
 949     // It's possible that has_aborted() is true here without actually
 950     // aborting the survivor scan earlier. This is OK as it's
 951     // mainly used for sanity checking.
 952     root_regions()->scan_finished();
 953   }
 954 }
 955 
 956 void G1ConcurrentMark::concurrent_cycle_start() {
 957   _gc_timer_cm->register_gc_start();
 958 
 959   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 960 
 961   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 962 }
 963 
 964 void G1ConcurrentMark::concurrent_cycle_end() {
 965   _g1h->collector_state()->set_clearing_next_bitmap(false);
 966 
 967   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 968 
 969   if (has_aborted()) {
 970     log_info(gc, marking)("Concurrent Mark Abort");
 971     _gc_tracer_cm->report_concurrent_mode_failure();
 972   }
 973 
 974   _gc_timer_cm->register_gc_end();
 975 
 976   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 977 }
 978 
 979 void G1ConcurrentMark::mark_from_roots() {
 980   _restart_for_overflow = false;
 981 
 982   _num_concurrent_workers = calc_active_marking_workers();
 983 
 984   uint active_workers = MAX2(1U, _num_concurrent_workers);
 985 
 986   // Setting active workers is not guaranteed since fewer
 987   // worker threads may currently exist and more may not be
 988   // available.
 989   active_workers = _concurrent_workers->update_active_workers(active_workers);
 990   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 991 
 992   // Parallel task terminator is set in "set_concurrency_and_phase()"
 993   set_concurrency_and_phase(active_workers, true /* concurrent */);
 994 
 995   G1CMConcurrentMarkingTask marking_task(this);
 996   _concurrent_workers->run_task(&marking_task);
 997   print_stats();
 998 }
 999 
1000 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
1001   G1HeapVerifier* verifier = _g1h->verifier();
1002 
1003   verifier->verify_region_sets_optional();
1004 
1005   if (VerifyDuringGC) {
1006     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1007 
1008     size_t const BufLen = 512;
1009     char buffer[BufLen];
1010 
1011     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1012     verifier->verify(type, vo, buffer);
1013   }
1014 
1015   verifier->check_bitmaps(caller);
1016 }
1017 
1018 class G1UpdateRemSetTrackingBeforeRebuildTask : public AbstractGangTask {
1019   G1CollectedHeap* _g1h;
1020   G1ConcurrentMark* _cm;
1021   HeapRegionClaimer _hrclaimer;
1022   uint volatile _total_selected_for_rebuild;
1023 
1024   G1PrintRegionLivenessInfoClosure _cl;
1025 
1026   class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
1027     G1CollectedHeap* _g1h;
1028     G1ConcurrentMark* _cm;
1029 
1030     G1PrintRegionLivenessInfoClosure* _cl;
1031 
1032     uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
1033 
1034     void update_remset_before_rebuild(HeapRegion* hr) {
1035       G1RemSetTrackingPolicy* tracking_policy = _g1h->policy()->remset_tracker();
1036 
1037       bool selected_for_rebuild;
1038       if (hr->is_humongous()) {
1039         bool const is_live = _cm->liveness(hr->humongous_start_region()->hrm_index()) > 0;
1040         selected_for_rebuild = tracking_policy->update_humongous_before_rebuild(hr, is_live);
1041       } else {
1042         size_t const live_bytes = _cm->liveness(hr->hrm_index());
1043         selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
1044       }
1045       if (selected_for_rebuild) {
1046         _num_regions_selected_for_rebuild++;
1047       }
1048       _cm->update_top_at_rebuild_start(hr);
1049     }
1050 
1051     // Distribute the given words across the humongous object starting with hr and
1052     // note end of marking.
1053     void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) {
1054       uint const region_idx = hr->hrm_index();
1055       size_t const obj_size_in_words = (size_t)oop(hr->bottom())->size();
1056       uint const num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(obj_size_in_words);
1057 
1058       // "Distributing" zero words means that we only note end of marking for these
1059       // regions.
1060       assert(marked_words == 0 || obj_size_in_words == marked_words,
1061              "Marked words should either be 0 or the same as humongous object (" SIZE_FORMAT ") but is " SIZE_FORMAT,
1062              obj_size_in_words, marked_words);
1063 
1064       for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) {
1065         HeapRegion* const r = _g1h->region_at(i);
1066         size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words);
1067 
1068         log_trace(gc, marking)("Adding " SIZE_FORMAT " words to humongous region %u (%s)",
1069                                words_to_add, i, r->get_type_str());
1070         add_marked_bytes_and_note_end(r, words_to_add * HeapWordSize);
1071         marked_words -= words_to_add;
1072       }
1073       assert(marked_words == 0,
1074              SIZE_FORMAT " words left after distributing space across %u regions",
1075              marked_words, num_regions_in_humongous);
1076     }
1077 
1078     void update_marked_bytes(HeapRegion* hr) {
1079       uint const region_idx = hr->hrm_index();
1080       size_t const marked_words = _cm->liveness(region_idx);
1081       // The marking attributes the object's size completely to the humongous starts
1082       // region. We need to distribute this value across the entire set of regions a
1083       // humongous object spans.
1084       if (hr->is_humongous()) {
1085         assert(hr->is_starts_humongous() || marked_words == 0,
1086                "Should not have marked words " SIZE_FORMAT " in non-starts humongous region %u (%s)",
1087                marked_words, region_idx, hr->get_type_str());
1088         if (hr->is_starts_humongous()) {
1089           distribute_marked_bytes(hr, marked_words);
1090         }
1091       } else {
1092         log_trace(gc, marking)("Adding " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str());
1093         add_marked_bytes_and_note_end(hr, marked_words * HeapWordSize);
1094       }
1095     }
1096 
1097     void add_marked_bytes_and_note_end(HeapRegion* hr, size_t marked_bytes) {
1098       hr->add_to_marked_bytes(marked_bytes);
1099       _cl->do_heap_region(hr);
1100       hr->note_end_of_marking();
1101     }
1102 
1103   public:
1104     G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm, G1PrintRegionLivenessInfoClosure* cl) :
1105       _g1h(g1h), _cm(cm), _cl(cl), _num_regions_selected_for_rebuild(0) { }
1106 
1107     virtual bool do_heap_region(HeapRegion* r) {
1108       update_remset_before_rebuild(r);
1109       update_marked_bytes(r);
1110 
1111       return false;
1112     }
1113 
1114     uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1115   };
1116 
1117 public:
1118   G1UpdateRemSetTrackingBeforeRebuildTask(G1CollectedHeap* g1h, G1ConcurrentMark* cm, uint num_workers) :
1119     AbstractGangTask("G1 Update RemSet Tracking Before Rebuild"),
1120     _g1h(g1h), _cm(cm), _hrclaimer(num_workers), _total_selected_for_rebuild(0), _cl("Post-Marking") { }
1121 
1122   virtual void work(uint worker_id) {
1123     G1UpdateRemSetTrackingBeforeRebuild update_cl(_g1h, _cm, &_cl);
1124     _g1h->heap_region_par_iterate_from_worker_offset(&update_cl, &_hrclaimer, worker_id);
1125     Atomic::add(update_cl.num_selected_for_rebuild(), &_total_selected_for_rebuild);
1126   }
1127 
1128   uint total_selected_for_rebuild() const { return _total_selected_for_rebuild; }
1129 
1130   // Number of regions for which roughly one thread should be spawned for this work.
1131   static const uint RegionsPerThread = 384;
1132 };
1133 
1134 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1135   G1CollectedHeap* _g1h;
1136 public:
1137   G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1138 
1139   virtual bool do_heap_region(HeapRegion* r) {
1140     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1141     return false;
1142   }
1143 };
1144 
1145 void G1ConcurrentMark::remark() {
1146   assert_at_safepoint_on_vm_thread();
1147 
1148   // If a full collection has happened, we should not continue. However we might
1149   // have ended up here as the Remark VM operation has been scheduled already.
1150   if (has_aborted()) {
1151     return;
1152   }
1153 
1154   G1Policy* policy = _g1h->policy();
1155   policy->record_concurrent_mark_remark_start();
1156 
1157   double start = os::elapsedTime();
1158 
1159   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1160 
1161   {
1162     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1163     finalize_marking();
1164   }
1165 
1166   double mark_work_end = os::elapsedTime();
1167 
1168   bool const mark_finished = !has_overflown();
1169   if (mark_finished) {
1170     weak_refs_work(false /* clear_all_soft_refs */);
1171 
1172     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1173     // We're done with marking.
1174     // This is the end of the marking cycle, we're expected all
1175     // threads to have SATB queues with active set to true.
1176     satb_mq_set.set_active_all_threads(false, /* new active value */
1177                                        true /* expected_active */);
1178 
1179     {
1180       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1181       flush_all_task_caches();
1182     }
1183 
1184     // Install newly created mark bitmap as "prev".
1185     swap_mark_bitmaps();
1186     {
1187       GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking Before Rebuild", _gc_timer_cm);
1188 
1189       uint const workers_by_capacity = (_g1h->num_regions() + G1UpdateRemSetTrackingBeforeRebuildTask::RegionsPerThread - 1) /
1190                                        G1UpdateRemSetTrackingBeforeRebuildTask::RegionsPerThread;
1191       uint const num_workers = MIN2(_g1h->workers()->active_workers(), workers_by_capacity);
1192 
1193       G1UpdateRemSetTrackingBeforeRebuildTask cl(_g1h, this, num_workers);
1194       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_regions());
1195       _g1h->workers()->run_task(&cl, num_workers);
1196 
1197       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1198                                       _g1h->num_regions(), cl.total_selected_for_rebuild());
1199     }
1200     {
1201       GCTraceTime(Debug, gc, phases) debug("Reclaim Empty Regions", _gc_timer_cm);
1202       reclaim_empty_regions();
1203     }
1204 
1205     // Clean out dead classes
1206     if (ClassUnloadingWithConcurrentMark) {
1207       GCTraceTime(Debug, gc, phases) debug("Purge Metaspace", _gc_timer_cm);
1208       ClassLoaderDataGraph::purge();
1209     }
1210 
1211     _g1h->resize_heap_if_necessary();
1212 
1213     compute_new_sizes();
1214 
1215     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after");
1216 
1217     assert(!restart_for_overflow(), "sanity");
1218     // Completely reset the marking state since marking completed
1219     reset_at_marking_complete();
1220   } else {
1221     // We overflowed.  Restart concurrent marking.
1222     _restart_for_overflow = true;
1223 
1224     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1225 
1226     // Clear the marking state because we will be restarting
1227     // marking due to overflowing the global mark stack.
1228     reset_marking_for_restart();
1229   }
1230 
1231   {
1232     GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1233     report_object_count(mark_finished);
1234   }
1235 
1236   // Statistics
1237   double now = os::elapsedTime();
1238   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1239   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1240   _remark_times.add((now - start) * 1000.0);
1241 
1242   policy->record_concurrent_mark_remark_end();
1243 }
1244 
1245 class G1ReclaimEmptyRegionsTask : public AbstractGangTask {
1246   // Per-region work during the Cleanup pause.
1247   class G1ReclaimEmptyRegionsClosure : public HeapRegionClosure {
1248     G1CollectedHeap* _g1h;
1249     size_t _freed_bytes;
1250     FreeRegionList* _local_cleanup_list;
1251     uint _old_regions_removed;
1252     uint _humongous_regions_removed;
1253 
1254   public:
1255     G1ReclaimEmptyRegionsClosure(G1CollectedHeap* g1h,
1256                                  FreeRegionList* local_cleanup_list) :
1257       _g1h(g1h),
1258       _freed_bytes(0),
1259       _local_cleanup_list(local_cleanup_list),
1260       _old_regions_removed(0),
1261       _humongous_regions_removed(0) { }
1262 
1263     size_t freed_bytes() { return _freed_bytes; }
1264     const uint old_regions_removed() { return _old_regions_removed; }
1265     const uint humongous_regions_removed() { return _humongous_regions_removed; }
1266 
1267     bool do_heap_region(HeapRegion *hr) {
1268       if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1269         _freed_bytes += hr->used();
1270         hr->set_containing_set(NULL);
1271         if (hr->is_humongous()) {
1272           _humongous_regions_removed++;
1273           _g1h->free_humongous_region(hr, _local_cleanup_list);
1274         } else {
1275           _old_regions_removed++;
1276           _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */);
1277         }
1278         hr->clear_cardtable();
1279         _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1280         log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1281       }
1282 
1283       return false;
1284     }
1285   };
1286 
1287   G1CollectedHeap* _g1h;
1288   FreeRegionList* _cleanup_list;
1289   HeapRegionClaimer _hrclaimer;
1290 
1291 public:
1292   G1ReclaimEmptyRegionsTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1293     AbstractGangTask("G1 Cleanup"),
1294     _g1h(g1h),
1295     _cleanup_list(cleanup_list),
1296     _hrclaimer(n_workers) {
1297   }
1298 
1299   void work(uint worker_id) {
1300     FreeRegionList local_cleanup_list("Local Cleanup List");
1301     G1ReclaimEmptyRegionsClosure cl(_g1h, &local_cleanup_list);
1302     _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1303     assert(cl.is_complete(), "Shouldn't have aborted!");
1304 
1305     // Now update the old/humongous region sets
1306     _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1307     {
1308       MutexLocker x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1309       _g1h->decrement_summary_bytes(cl.freed_bytes());
1310 
1311       _cleanup_list->add_ordered(&local_cleanup_list);
1312       assert(local_cleanup_list.is_empty(), "post-condition");
1313     }
1314   }
1315 };
1316 
1317 void G1ConcurrentMark::reclaim_empty_regions() {
1318   WorkGang* workers = _g1h->workers();
1319   FreeRegionList empty_regions_list("Empty Regions After Mark List");
1320 
1321   G1ReclaimEmptyRegionsTask cl(_g1h, &empty_regions_list, workers->active_workers());
1322   workers->run_task(&cl);
1323 
1324   if (!empty_regions_list.is_empty()) {
1325     log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1326     // Now print the empty regions list.
1327     G1HRPrinter* hrp = _g1h->hr_printer();
1328     if (hrp->is_active()) {
1329       FreeRegionListIterator iter(&empty_regions_list);
1330       while (iter.more_available()) {
1331         HeapRegion* hr = iter.get_next();
1332         hrp->cleanup(hr);
1333       }
1334     }
1335     // And actually make them available.
1336     _g1h->prepend_to_freelist(&empty_regions_list);
1337   }
1338 }
1339 
1340 void G1ConcurrentMark::compute_new_sizes() {
1341   MetaspaceGC::compute_new_size();
1342 
1343   // Cleanup will have freed any regions completely full of garbage.
1344   // Update the soft reference policy with the new heap occupancy.
1345   Universe::update_heap_info_at_gc();
1346 
1347   // We reclaimed old regions so we should calculate the sizes to make
1348   // sure we update the old gen/space data.
1349   _g1h->g1mm()->update_sizes();
1350 }
1351 
1352 void G1ConcurrentMark::cleanup() {
1353   assert_at_safepoint_on_vm_thread();
1354 
1355   // If a full collection has happened, we shouldn't do this.
1356   if (has_aborted()) {
1357     return;
1358   }
1359 
1360   G1Policy* policy = _g1h->policy();
1361   policy->record_concurrent_mark_cleanup_start();
1362 
1363   double start = os::elapsedTime();
1364 
1365   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before");
1366 
1367   {
1368     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1369     G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1370     _g1h->heap_region_iterate(&cl);
1371   }
1372 
1373   if (log_is_enabled(Trace, gc, liveness)) {
1374     G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1375     _g1h->heap_region_iterate(&cl);
1376   }
1377 
1378   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1379 
1380   // We need to make this be a "collection" so any collection pause that
1381   // races with it goes around and waits for Cleanup to finish.
1382   _g1h->increment_total_collections();
1383 
1384   // Local statistics
1385   double recent_cleanup_time = (os::elapsedTime() - start);
1386   _total_cleanup_time += recent_cleanup_time;
1387   _cleanup_times.add(recent_cleanup_time);
1388 
1389   {
1390     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1391     policy->record_concurrent_mark_cleanup_end();
1392   }
1393 }
1394 
1395 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1396 // Uses the G1CMTask associated with a worker thread (for serial reference
1397 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1398 // trace referent objects.
1399 //
1400 // Using the G1CMTask and embedded local queues avoids having the worker
1401 // threads operating on the global mark stack. This reduces the risk
1402 // of overflowing the stack - which we would rather avoid at this late
1403 // state. Also using the tasks' local queues removes the potential
1404 // of the workers interfering with each other that could occur if
1405 // operating on the global stack.
1406 
1407 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1408   G1ConcurrentMark* _cm;
1409   G1CMTask*         _task;
1410   uint              _ref_counter_limit;
1411   uint              _ref_counter;
1412   bool              _is_serial;
1413 public:
1414   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1415     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1416     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1417     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1418   }
1419 
1420   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1421   virtual void do_oop(      oop* p) { do_oop_work(p); }
1422 
1423   template <class T> void do_oop_work(T* p) {
1424     if (_cm->has_overflown()) {
1425       return;
1426     }
1427     if (!_task->deal_with_reference(p)) {
1428       // We did not add anything to the mark bitmap (or mark stack), so there is
1429       // no point trying to drain it.
1430       return;
1431     }
1432     _ref_counter--;
1433 
1434     if (_ref_counter == 0) {
1435       // We have dealt with _ref_counter_limit references, pushing them
1436       // and objects reachable from them on to the local stack (and
1437       // possibly the global stack). Call G1CMTask::do_marking_step() to
1438       // process these entries.
1439       //
1440       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1441       // there's nothing more to do (i.e. we're done with the entries that
1442       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1443       // above) or we overflow.
1444       //
1445       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1446       // flag while there may still be some work to do. (See the comment at
1447       // the beginning of G1CMTask::do_marking_step() for those conditions -
1448       // one of which is reaching the specified time target.) It is only
1449       // when G1CMTask::do_marking_step() returns without setting the
1450       // has_aborted() flag that the marking step has completed.
1451       do {
1452         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1453         _task->do_marking_step(mark_step_duration_ms,
1454                                false      /* do_termination */,
1455                                _is_serial);
1456       } while (_task->has_aborted() && !_cm->has_overflown());
1457       _ref_counter = _ref_counter_limit;
1458     }
1459   }
1460 };
1461 
1462 // 'Drain' oop closure used by both serial and parallel reference processing.
1463 // Uses the G1CMTask associated with a given worker thread (for serial
1464 // reference processing the G1CMtask for worker 0 is used). Calls the
1465 // do_marking_step routine, with an unbelievably large timeout value,
1466 // to drain the marking data structures of the remaining entries
1467 // added by the 'keep alive' oop closure above.
1468 
1469 class G1CMDrainMarkingStackClosure : public VoidClosure {
1470   G1ConcurrentMark* _cm;
1471   G1CMTask*         _task;
1472   bool              _is_serial;
1473  public:
1474   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1475     _cm(cm), _task(task), _is_serial(is_serial) {
1476     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1477   }
1478 
1479   void do_void() {
1480     do {
1481       // We call G1CMTask::do_marking_step() to completely drain the local
1482       // and global marking stacks of entries pushed by the 'keep alive'
1483       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1484       //
1485       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1486       // if there's nothing more to do (i.e. we've completely drained the
1487       // entries that were pushed as a a result of applying the 'keep alive'
1488       // closure to the entries on the discovered ref lists) or we overflow
1489       // the global marking stack.
1490       //
1491       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1492       // flag while there may still be some work to do. (See the comment at
1493       // the beginning of G1CMTask::do_marking_step() for those conditions -
1494       // one of which is reaching the specified time target.) It is only
1495       // when G1CMTask::do_marking_step() returns without setting the
1496       // has_aborted() flag that the marking step has completed.
1497 
1498       _task->do_marking_step(1000000000.0 /* something very large */,
1499                              true         /* do_termination */,
1500                              _is_serial);
1501     } while (_task->has_aborted() && !_cm->has_overflown());
1502   }
1503 };
1504 
1505 // Implementation of AbstractRefProcTaskExecutor for parallel
1506 // reference processing at the end of G1 concurrent marking
1507 
1508 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1509 private:
1510   G1CollectedHeap*  _g1h;
1511   G1ConcurrentMark* _cm;
1512   WorkGang*         _workers;
1513   uint              _active_workers;
1514 
1515 public:
1516   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1517                           G1ConcurrentMark* cm,
1518                           WorkGang* workers,
1519                           uint n_workers) :
1520     _g1h(g1h), _cm(cm),
1521     _workers(workers), _active_workers(n_workers) { }
1522 
1523   virtual void execute(ProcessTask& task, uint ergo_workers);
1524 };
1525 
1526 class G1CMRefProcTaskProxy : public AbstractGangTask {
1527   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1528   ProcessTask&      _proc_task;
1529   G1CollectedHeap*  _g1h;
1530   G1ConcurrentMark* _cm;
1531 
1532 public:
1533   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1534                        G1CollectedHeap* g1h,
1535                        G1ConcurrentMark* cm) :
1536     AbstractGangTask("Process reference objects in parallel"),
1537     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1538     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1539     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1540   }
1541 
1542   virtual void work(uint worker_id) {
1543     ResourceMark rm;
1544     HandleMark hm;
1545     G1CMTask* task = _cm->task(worker_id);
1546     G1CMIsAliveClosure g1_is_alive(_g1h);
1547     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1548     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1549 
1550     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1551   }
1552 };
1553 
1554 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task, uint ergo_workers) {
1555   assert(_workers != NULL, "Need parallel worker threads.");
1556   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1557   assert(_workers->active_workers() >= ergo_workers,
1558          "Ergonomically chosen workers(%u) should be less than or equal to active workers(%u)",
1559          ergo_workers, _workers->active_workers());
1560 
1561   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1562 
1563   // We need to reset the concurrency level before each
1564   // proxy task execution, so that the termination protocol
1565   // and overflow handling in G1CMTask::do_marking_step() knows
1566   // how many workers to wait for.
1567   _cm->set_concurrency(ergo_workers);
1568   _workers->run_task(&proc_task_proxy, ergo_workers);
1569 }
1570 
1571 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1572   ResourceMark rm;
1573   HandleMark   hm;
1574 
1575   // Is alive closure.
1576   G1CMIsAliveClosure g1_is_alive(_g1h);
1577 
1578   // Inner scope to exclude the cleaning of the string table
1579   // from the displayed time.
1580   {
1581     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1582 
1583     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1584 
1585     // See the comment in G1CollectedHeap::ref_processing_init()
1586     // about how reference processing currently works in G1.
1587 
1588     // Set the soft reference policy
1589     rp->setup_policy(clear_all_soft_refs);
1590     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1591 
1592     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1593     // in serial reference processing. Note these closures are also
1594     // used for serially processing (by the the current thread) the
1595     // JNI references during parallel reference processing.
1596     //
1597     // These closures do not need to synchronize with the worker
1598     // threads involved in parallel reference processing as these
1599     // instances are executed serially by the current thread (e.g.
1600     // reference processing is not multi-threaded and is thus
1601     // performed by the current thread instead of a gang worker).
1602     //
1603     // The gang tasks involved in parallel reference processing create
1604     // their own instances of these closures, which do their own
1605     // synchronization among themselves.
1606     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1607     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1608 
1609     // We need at least one active thread. If reference processing
1610     // is not multi-threaded we use the current (VMThread) thread,
1611     // otherwise we use the work gang from the G1CollectedHeap and
1612     // we utilize all the worker threads we can.
1613     bool processing_is_mt = rp->processing_is_mt();
1614     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1615     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1616 
1617     // Parallel processing task executor.
1618     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1619                                               _g1h->workers(), active_workers);
1620     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1621 
1622     // Set the concurrency level. The phase was already set prior to
1623     // executing the remark task.
1624     set_concurrency(active_workers);
1625 
1626     // Set the degree of MT processing here.  If the discovery was done MT,
1627     // the number of threads involved during discovery could differ from
1628     // the number of active workers.  This is OK as long as the discovered
1629     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1630     rp->set_active_mt_degree(active_workers);
1631 
1632     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1633 
1634     // Process the weak references.
1635     const ReferenceProcessorStats& stats =
1636         rp->process_discovered_references(&g1_is_alive,
1637                                           &g1_keep_alive,
1638                                           &g1_drain_mark_stack,
1639                                           executor,
1640                                           &pt);
1641     _gc_tracer_cm->report_gc_reference_stats(stats);
1642     pt.print_all_references();
1643 
1644     // The do_oop work routines of the keep_alive and drain_marking_stack
1645     // oop closures will set the has_overflown flag if we overflow the
1646     // global marking stack.
1647 
1648     assert(has_overflown() || _global_mark_stack.is_empty(),
1649            "Mark stack should be empty (unless it has overflown)");
1650 
1651     assert(rp->num_queues() == active_workers, "why not");
1652 
1653     rp->verify_no_references_recorded();
1654     assert(!rp->discovery_enabled(), "Post condition");
1655   }
1656 
1657   if (has_overflown()) {
1658     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1659     // overflowed while processing references. Exit the VM.
1660     fatal("Overflow during reference processing, can not continue. Please "
1661           "increase MarkStackSizeMax (current value: " SIZE_FORMAT ") and "
1662           "restart.", MarkStackSizeMax);
1663     return;
1664   }
1665 
1666   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1667 
1668   {
1669     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1670     WeakProcessor::weak_oops_do(_g1h->workers(), &g1_is_alive, &do_nothing_cl, 1);
1671   }
1672 
1673   // Unload Klasses, String, Code Cache, etc.
1674   if (ClassUnloadingWithConcurrentMark) {
1675     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1676     bool purged_classes = SystemDictionary::do_unloading(_gc_timer_cm);
1677     _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1678   } else if (StringDedup::is_enabled()) {
1679     GCTraceTime(Debug, gc, phases) debug("String Deduplication", _gc_timer_cm);
1680     _g1h->string_dedup_cleaning(&g1_is_alive, NULL);
1681   }
1682 }
1683 
1684 class G1PrecleanYieldClosure : public YieldClosure {
1685   G1ConcurrentMark* _cm;
1686 
1687 public:
1688   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1689 
1690   virtual bool should_return() {
1691     return _cm->has_aborted();
1692   }
1693 
1694   virtual bool should_return_fine_grain() {
1695     _cm->do_yield_check();
1696     return _cm->has_aborted();
1697   }
1698 };
1699 
1700 void G1ConcurrentMark::preclean() {
1701   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1702 
1703   SuspendibleThreadSetJoiner joiner;
1704 
1705   G1CMKeepAliveAndDrainClosure keep_alive(this, task(0), true /* is_serial */);
1706   G1CMDrainMarkingStackClosure drain_mark_stack(this, task(0), true /* is_serial */);
1707 
1708   set_concurrency_and_phase(1, true);
1709 
1710   G1PrecleanYieldClosure yield_cl(this);
1711 
1712   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1713   // Precleaning is single threaded. Temporarily disable MT discovery.
1714   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1715   rp->preclean_discovered_references(rp->is_alive_non_header(),
1716                                      &keep_alive,
1717                                      &drain_mark_stack,
1718                                      &yield_cl,
1719                                      _gc_timer_cm);
1720 }
1721 
1722 // When sampling object counts, we already swapped the mark bitmaps, so we need to use
1723 // the prev bitmap determining liveness.
1724 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1725   G1CollectedHeap* _g1h;
1726 public:
1727   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) { }
1728 
1729   bool do_object_b(oop obj) {
1730     HeapWord* addr = (HeapWord*)obj;
1731     return addr != NULL &&
1732            (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_dead(obj));
1733   }
1734 };
1735 
1736 void G1ConcurrentMark::report_object_count(bool mark_completed) {
1737   // Depending on the completion of the marking liveness needs to be determined
1738   // using either the next or prev bitmap.
1739   if (mark_completed) {
1740     G1ObjectCountIsAliveClosure is_alive(_g1h);
1741     _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1742   } else {
1743     G1CMIsAliveClosure is_alive(_g1h);
1744     _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1745   }
1746 }
1747 
1748 
1749 void G1ConcurrentMark::swap_mark_bitmaps() {
1750   G1CMBitMap* temp = _prev_mark_bitmap;
1751   _prev_mark_bitmap = _next_mark_bitmap;
1752   _next_mark_bitmap = temp;
1753   _g1h->collector_state()->set_clearing_next_bitmap(true);
1754 }
1755 
1756 // Closure for marking entries in SATB buffers.
1757 class G1CMSATBBufferClosure : public SATBBufferClosure {
1758 private:
1759   G1CMTask* _task;
1760   G1CollectedHeap* _g1h;
1761 
1762   // This is very similar to G1CMTask::deal_with_reference, but with
1763   // more relaxed requirements for the argument, so this must be more
1764   // circumspect about treating the argument as an object.
1765   void do_entry(void* entry) const {
1766     _task->increment_refs_reached();
1767     oop const obj = static_cast<oop>(entry);
1768     _task->make_reference_grey(obj);
1769   }
1770 
1771 public:
1772   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1773     : _task(task), _g1h(g1h) { }
1774 
1775   virtual void do_buffer(void** buffer, size_t size) {
1776     for (size_t i = 0; i < size; ++i) {
1777       do_entry(buffer[i]);
1778     }
1779   }
1780 };
1781 
1782 class G1RemarkThreadsClosure : public ThreadClosure {
1783   G1CMSATBBufferClosure _cm_satb_cl;
1784   G1CMOopClosure _cm_cl;
1785   MarkingCodeBlobClosure _code_cl;
1786   uintx _claim_token;
1787 
1788  public:
1789   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1790     _cm_satb_cl(task, g1h),
1791     _cm_cl(g1h, task),
1792     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1793     _claim_token(Threads::thread_claim_token()) {}
1794 
1795   void do_thread(Thread* thread) {
1796     if (thread->claim_threads_do(true, _claim_token)) {
1797       SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(thread);
1798       queue.apply_closure_and_empty(&_cm_satb_cl);
1799       if (thread->is_Java_thread()) {
1800         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1801         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1802         // * Alive if on the stack of an executing method
1803         // * Weakly reachable otherwise
1804         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1805         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1806         JavaThread* jt = (JavaThread*)thread;
1807         jt->nmethods_do(&_code_cl);
1808       }
1809     }
1810   }
1811 };
1812 
1813 class G1CMRemarkTask : public AbstractGangTask {
1814   G1ConcurrentMark* _cm;
1815 public:
1816   void work(uint worker_id) {
1817     G1CMTask* task = _cm->task(worker_id);
1818     task->record_start_time();
1819     {
1820       ResourceMark rm;
1821       HandleMark hm;
1822 
1823       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1824       Threads::threads_do(&threads_f);
1825     }
1826 
1827     do {
1828       task->do_marking_step(1000000000.0 /* something very large */,
1829                             true         /* do_termination       */,
1830                             false        /* is_serial            */);
1831     } while (task->has_aborted() && !_cm->has_overflown());
1832     // If we overflow, then we do not want to restart. We instead
1833     // want to abort remark and do concurrent marking again.
1834     task->record_end_time();
1835   }
1836 
1837   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1838     AbstractGangTask("Par Remark"), _cm(cm) {
1839     _cm->terminator()->reset_for_reuse(active_workers);
1840   }
1841 };
1842 
1843 void G1ConcurrentMark::finalize_marking() {
1844   ResourceMark rm;
1845   HandleMark   hm;
1846 
1847   _g1h->ensure_parsability(false);
1848 
1849   // this is remark, so we'll use up all active threads
1850   uint active_workers = _g1h->workers()->active_workers();
1851   set_concurrency_and_phase(active_workers, false /* concurrent */);
1852   // Leave _parallel_marking_threads at it's
1853   // value originally calculated in the G1ConcurrentMark
1854   // constructor and pass values of the active workers
1855   // through the gang in the task.
1856 
1857   {
1858     StrongRootsScope srs(active_workers);
1859 
1860     G1CMRemarkTask remarkTask(this, active_workers);
1861     // We will start all available threads, even if we decide that the
1862     // active_workers will be fewer. The extra ones will just bail out
1863     // immediately.
1864     _g1h->workers()->run_task(&remarkTask);
1865   }
1866 
1867   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1868   guarantee(has_overflown() ||
1869             satb_mq_set.completed_buffers_num() == 0,
1870             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1871             BOOL_TO_STR(has_overflown()),
1872             satb_mq_set.completed_buffers_num());
1873 
1874   print_stats();
1875 }
1876 
1877 void G1ConcurrentMark::flush_all_task_caches() {
1878   size_t hits = 0;
1879   size_t misses = 0;
1880   for (uint i = 0; i < _max_num_tasks; i++) {
1881     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1882     hits += stats.first;
1883     misses += stats.second;
1884   }
1885   size_t sum = hits + misses;
1886   log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1887                        hits, misses, percent_of(hits, sum));
1888 }
1889 
1890 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1891   _prev_mark_bitmap->clear_range(mr);
1892 }
1893 
1894 HeapRegion*
1895 G1ConcurrentMark::claim_region(uint worker_id) {
1896   // "checkpoint" the finger
1897   HeapWord* finger = _finger;
1898 
1899   while (finger < _heap.end()) {
1900     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1901 
1902     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1903     // Make sure that the reads below do not float before loading curr_region.
1904     OrderAccess::loadload();
1905     // Above heap_region_containing may return NULL as we always scan claim
1906     // until the end of the heap. In this case, just jump to the next region.
1907     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1908 
1909     // Is the gap between reading the finger and doing the CAS too long?
1910     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1911     if (res == finger && curr_region != NULL) {
1912       // we succeeded
1913       HeapWord*   bottom        = curr_region->bottom();
1914       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1915 
1916       // notice that _finger == end cannot be guaranteed here since,
1917       // someone else might have moved the finger even further
1918       assert(_finger >= end, "the finger should have moved forward");
1919 
1920       if (limit > bottom) {
1921         return curr_region;
1922       } else {
1923         assert(limit == bottom,
1924                "the region limit should be at bottom");
1925         // we return NULL and the caller should try calling
1926         // claim_region() again.
1927         return NULL;
1928       }
1929     } else {
1930       assert(_finger > finger, "the finger should have moved forward");
1931       // read it again
1932       finger = _finger;
1933     }
1934   }
1935 
1936   return NULL;
1937 }
1938 
1939 #ifndef PRODUCT
1940 class VerifyNoCSetOops {
1941   G1CollectedHeap* _g1h;
1942   const char* _phase;
1943   int _info;
1944 
1945 public:
1946   VerifyNoCSetOops(const char* phase, int info = -1) :
1947     _g1h(G1CollectedHeap::heap()),
1948     _phase(phase),
1949     _info(info)
1950   { }
1951 
1952   void operator()(G1TaskQueueEntry task_entry) const {
1953     if (task_entry.is_array_slice()) {
1954       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1955       return;
1956     }
1957     guarantee(oopDesc::is_oop(task_entry.obj()),
1958               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1959               p2i(task_entry.obj()), _phase, _info);
1960     HeapRegion* r = _g1h->heap_region_containing(task_entry.obj());
1961     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1962               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1963               p2i(task_entry.obj()), _phase, _info, r->hrm_index());
1964   }
1965 };
1966 
1967 void G1ConcurrentMark::verify_no_collection_set_oops() {
1968   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1969   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1970     return;
1971   }
1972 
1973   // Verify entries on the global mark stack
1974   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1975 
1976   // Verify entries on the task queues
1977   for (uint i = 0; i < _max_num_tasks; ++i) {
1978     G1CMTaskQueue* queue = _task_queues->queue(i);
1979     queue->iterate(VerifyNoCSetOops("Queue", i));
1980   }
1981 
1982   // Verify the global finger
1983   HeapWord* global_finger = finger();
1984   if (global_finger != NULL && global_finger < _heap.end()) {
1985     // Since we always iterate over all regions, we might get a NULL HeapRegion
1986     // here.
1987     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1988     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1989               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1990               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1991   }
1992 
1993   // Verify the task fingers
1994   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1995   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1996     G1CMTask* task = _tasks[i];
1997     HeapWord* task_finger = task->finger();
1998     if (task_finger != NULL && task_finger < _heap.end()) {
1999       // See above note on the global finger verification.
2000       HeapRegion* r = _g1h->heap_region_containing(task_finger);
2001       guarantee(r == NULL || task_finger == r->bottom() ||
2002                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
2003                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2004                 p2i(task_finger), HR_FORMAT_PARAMS(r));
2005     }
2006   }
2007 }
2008 #endif // PRODUCT
2009 
2010 void G1ConcurrentMark::rebuild_rem_set_concurrently() {
2011   _g1h->rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
2012 }
2013 
2014 void G1ConcurrentMark::print_stats() {
2015   if (!log_is_enabled(Debug, gc, stats)) {
2016     return;
2017   }
2018   log_debug(gc, stats)("---------------------------------------------------------------------");
2019   for (size_t i = 0; i < _num_active_tasks; ++i) {
2020     _tasks[i]->print_stats();
2021     log_debug(gc, stats)("---------------------------------------------------------------------");
2022   }
2023 }
2024 
2025 void G1ConcurrentMark::concurrent_cycle_abort() {
2026   if (!cm_thread()->during_cycle() || _has_aborted) {
2027     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2028     return;
2029   }
2030 
2031   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2032   // concurrent bitmap clearing.
2033   {
2034     GCTraceTime(Debug, gc) debug("Clear Next Bitmap");
2035     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
2036   }
2037   // Note we cannot clear the previous marking bitmap here
2038   // since VerifyDuringGC verifies the objects marked during
2039   // a full GC against the previous bitmap.
2040 
2041   // Empty mark stack
2042   reset_marking_for_restart();
2043   for (uint i = 0; i < _max_num_tasks; ++i) {
2044     _tasks[i]->clear_region_fields();
2045   }
2046   _first_overflow_barrier_sync.abort();
2047   _second_overflow_barrier_sync.abort();
2048   _has_aborted = true;
2049 
2050   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2051   satb_mq_set.abandon_partial_marking();
2052   // This can be called either during or outside marking, we'll read
2053   // the expected_active value from the SATB queue set.
2054   satb_mq_set.set_active_all_threads(
2055                                  false, /* new active value */
2056                                  satb_mq_set.is_active() /* expected_active */);
2057 }
2058 
2059 static void print_ms_time_info(const char* prefix, const char* name,
2060                                NumberSeq& ns) {
2061   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2062                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2063   if (ns.num() > 0) {
2064     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2065                            prefix, ns.sd(), ns.maximum());
2066   }
2067 }
2068 
2069 void G1ConcurrentMark::print_summary_info() {
2070   Log(gc, marking) log;
2071   if (!log.is_trace()) {
2072     return;
2073   }
2074 
2075   log.trace(" Concurrent marking:");
2076   print_ms_time_info("  ", "init marks", _init_times);
2077   print_ms_time_info("  ", "remarks", _remark_times);
2078   {
2079     print_ms_time_info("     ", "final marks", _remark_mark_times);
2080     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2081 
2082   }
2083   print_ms_time_info("  ", "cleanups", _cleanup_times);
2084   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2085             _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2086   log.trace("  Total stop_world time = %8.2f s.",
2087             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2088   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2089             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
2090 }
2091 
2092 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2093   _concurrent_workers->print_worker_threads_on(st);
2094 }
2095 
2096 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2097   _concurrent_workers->threads_do(tc);
2098 }
2099 
2100 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2101   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2102                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
2103   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
2104   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
2105 }
2106 
2107 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2108   ReferenceProcessor* result = g1h->ref_processor_cm();
2109   assert(result != NULL, "CM reference processor should not be NULL");
2110   return result;
2111 }
2112 
2113 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2114                                G1CMTask* task)
2115   : MetadataVisitingOopIterateClosure(get_cm_oop_closure_ref_processor(g1h)),
2116     _g1h(g1h), _task(task)
2117 { }
2118 
2119 void G1CMTask::setup_for_region(HeapRegion* hr) {
2120   assert(hr != NULL,
2121         "claim_region() should have filtered out NULL regions");
2122   _curr_region  = hr;
2123   _finger       = hr->bottom();
2124   update_region_limit();
2125 }
2126 
2127 void G1CMTask::update_region_limit() {
2128   HeapRegion* hr            = _curr_region;
2129   HeapWord* bottom          = hr->bottom();
2130   HeapWord* limit           = hr->next_top_at_mark_start();
2131 
2132   if (limit == bottom) {
2133     // The region was collected underneath our feet.
2134     // We set the finger to bottom to ensure that the bitmap
2135     // iteration that will follow this will not do anything.
2136     // (this is not a condition that holds when we set the region up,
2137     // as the region is not supposed to be empty in the first place)
2138     _finger = bottom;
2139   } else if (limit >= _region_limit) {
2140     assert(limit >= _finger, "peace of mind");
2141   } else {
2142     assert(limit < _region_limit, "only way to get here");
2143     // This can happen under some pretty unusual circumstances.  An
2144     // evacuation pause empties the region underneath our feet (NTAMS
2145     // at bottom). We then do some allocation in the region (NTAMS
2146     // stays at bottom), followed by the region being used as a GC
2147     // alloc region (NTAMS will move to top() and the objects
2148     // originally below it will be grayed). All objects now marked in
2149     // the region are explicitly grayed, if below the global finger,
2150     // and we do not need in fact to scan anything else. So, we simply
2151     // set _finger to be limit to ensure that the bitmap iteration
2152     // doesn't do anything.
2153     _finger = limit;
2154   }
2155 
2156   _region_limit = limit;
2157 }
2158 
2159 void G1CMTask::giveup_current_region() {
2160   assert(_curr_region != NULL, "invariant");
2161   clear_region_fields();
2162 }
2163 
2164 void G1CMTask::clear_region_fields() {
2165   // Values for these three fields that indicate that we're not
2166   // holding on to a region.
2167   _curr_region   = NULL;
2168   _finger        = NULL;
2169   _region_limit  = NULL;
2170 }
2171 
2172 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2173   if (cm_oop_closure == NULL) {
2174     assert(_cm_oop_closure != NULL, "invariant");
2175   } else {
2176     assert(_cm_oop_closure == NULL, "invariant");
2177   }
2178   _cm_oop_closure = cm_oop_closure;
2179 }
2180 
2181 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2182   guarantee(next_mark_bitmap != NULL, "invariant");
2183   _next_mark_bitmap              = next_mark_bitmap;
2184   clear_region_fields();
2185 
2186   _calls                         = 0;
2187   _elapsed_time_ms               = 0.0;
2188   _termination_time_ms           = 0.0;
2189   _termination_start_time_ms     = 0.0;
2190 
2191   _mark_stats_cache.reset();
2192 }
2193 
2194 bool G1CMTask::should_exit_termination() {
2195   if (!regular_clock_call()) {
2196     return true;
2197   }
2198 
2199   // This is called when we are in the termination protocol. We should
2200   // quit if, for some reason, this task wants to abort or the global
2201   // stack is not empty (this means that we can get work from it).
2202   return !_cm->mark_stack_empty() || has_aborted();
2203 }
2204 
2205 void G1CMTask::reached_limit() {
2206   assert(_words_scanned >= _words_scanned_limit ||
2207          _refs_reached >= _refs_reached_limit ,
2208          "shouldn't have been called otherwise");
2209   abort_marking_if_regular_check_fail();
2210 }
2211 
2212 bool G1CMTask::regular_clock_call() {
2213   if (has_aborted()) {
2214     return false;
2215   }
2216 
2217   // First, we need to recalculate the words scanned and refs reached
2218   // limits for the next clock call.
2219   recalculate_limits();
2220 
2221   // During the regular clock call we do the following
2222 
2223   // (1) If an overflow has been flagged, then we abort.
2224   if (_cm->has_overflown()) {
2225     return false;
2226   }
2227 
2228   // If we are not concurrent (i.e. we're doing remark) we don't need
2229   // to check anything else. The other steps are only needed during
2230   // the concurrent marking phase.
2231   if (!_cm->concurrent()) {
2232     return true;
2233   }
2234 
2235   // (2) If marking has been aborted for Full GC, then we also abort.
2236   if (_cm->has_aborted()) {
2237     return false;
2238   }
2239 
2240   double curr_time_ms = os::elapsedVTime() * 1000.0;
2241 
2242   // (4) We check whether we should yield. If we have to, then we abort.
2243   if (SuspendibleThreadSet::should_yield()) {
2244     // We should yield. To do this we abort the task. The caller is
2245     // responsible for yielding.
2246     return false;
2247   }
2248 
2249   // (5) We check whether we've reached our time quota. If we have,
2250   // then we abort.
2251   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2252   if (elapsed_time_ms > _time_target_ms) {
2253     _has_timed_out = true;
2254     return false;
2255   }
2256 
2257   // (6) Finally, we check whether there are enough completed STAB
2258   // buffers available for processing. If there are, we abort.
2259   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2260   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2261     // we do need to process SATB buffers, we'll abort and restart
2262     // the marking task to do so
2263     return false;
2264   }
2265   return true;
2266 }
2267 
2268 void G1CMTask::recalculate_limits() {
2269   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2270   _words_scanned_limit      = _real_words_scanned_limit;
2271 
2272   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2273   _refs_reached_limit       = _real_refs_reached_limit;
2274 }
2275 
2276 void G1CMTask::decrease_limits() {
2277   // This is called when we believe that we're going to do an infrequent
2278   // operation which will increase the per byte scanned cost (i.e. move
2279   // entries to/from the global stack). It basically tries to decrease the
2280   // scanning limit so that the clock is called earlier.
2281 
2282   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2283   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2284 }
2285 
2286 void G1CMTask::move_entries_to_global_stack() {
2287   // Local array where we'll store the entries that will be popped
2288   // from the local queue.
2289   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2290 
2291   size_t n = 0;
2292   G1TaskQueueEntry task_entry;
2293   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2294     buffer[n] = task_entry;
2295     ++n;
2296   }
2297   if (n < G1CMMarkStack::EntriesPerChunk) {
2298     buffer[n] = G1TaskQueueEntry();
2299   }
2300 
2301   if (n > 0) {
2302     if (!_cm->mark_stack_push(buffer)) {
2303       set_has_aborted();
2304     }
2305   }
2306 
2307   // This operation was quite expensive, so decrease the limits.
2308   decrease_limits();
2309 }
2310 
2311 bool G1CMTask::get_entries_from_global_stack() {
2312   // Local array where we'll store the entries that will be popped
2313   // from the global stack.
2314   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2315 
2316   if (!_cm->mark_stack_pop(buffer)) {
2317     return false;
2318   }
2319 
2320   // We did actually pop at least one entry.
2321   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2322     G1TaskQueueEntry task_entry = buffer[i];
2323     if (task_entry.is_null()) {
2324       break;
2325     }
2326     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2327     bool success = _task_queue->push(task_entry);
2328     // We only call this when the local queue is empty or under a
2329     // given target limit. So, we do not expect this push to fail.
2330     assert(success, "invariant");
2331   }
2332 
2333   // This operation was quite expensive, so decrease the limits
2334   decrease_limits();
2335   return true;
2336 }
2337 
2338 void G1CMTask::drain_local_queue(bool partially) {
2339   if (has_aborted()) {
2340     return;
2341   }
2342 
2343   // Decide what the target size is, depending whether we're going to
2344   // drain it partially (so that other tasks can steal if they run out
2345   // of things to do) or totally (at the very end).
2346   size_t target_size;
2347   if (partially) {
2348     target_size = MIN2((size_t)_task_queue->max_elems()/3, (size_t)GCDrainStackTargetSize);
2349   } else {
2350     target_size = 0;
2351   }
2352 
2353   if (_task_queue->size() > target_size) {
2354     G1TaskQueueEntry entry;
2355     bool ret = _task_queue->pop_local(entry);
2356     while (ret) {
2357       scan_task_entry(entry);
2358       if (_task_queue->size() <= target_size || has_aborted()) {
2359         ret = false;
2360       } else {
2361         ret = _task_queue->pop_local(entry);
2362       }
2363     }
2364   }
2365 }
2366 
2367 void G1CMTask::drain_global_stack(bool partially) {
2368   if (has_aborted()) {
2369     return;
2370   }
2371 
2372   // We have a policy to drain the local queue before we attempt to
2373   // drain the global stack.
2374   assert(partially || _task_queue->size() == 0, "invariant");
2375 
2376   // Decide what the target size is, depending whether we're going to
2377   // drain it partially (so that other tasks can steal if they run out
2378   // of things to do) or totally (at the very end).
2379   // Notice that when draining the global mark stack partially, due to the racyness
2380   // of the mark stack size update we might in fact drop below the target. But,
2381   // this is not a problem.
2382   // In case of total draining, we simply process until the global mark stack is
2383   // totally empty, disregarding the size counter.
2384   if (partially) {
2385     size_t const target_size = _cm->partial_mark_stack_size_target();
2386     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2387       if (get_entries_from_global_stack()) {
2388         drain_local_queue(partially);
2389       }
2390     }
2391   } else {
2392     while (!has_aborted() && get_entries_from_global_stack()) {
2393       drain_local_queue(partially);
2394     }
2395   }
2396 }
2397 
2398 // SATB Queue has several assumptions on whether to call the par or
2399 // non-par versions of the methods. this is why some of the code is
2400 // replicated. We should really get rid of the single-threaded version
2401 // of the code to simplify things.
2402 void G1CMTask::drain_satb_buffers() {
2403   if (has_aborted()) {
2404     return;
2405   }
2406 
2407   // We set this so that the regular clock knows that we're in the
2408   // middle of draining buffers and doesn't set the abort flag when it
2409   // notices that SATB buffers are available for draining. It'd be
2410   // very counter productive if it did that. :-)
2411   _draining_satb_buffers = true;
2412 
2413   G1CMSATBBufferClosure satb_cl(this, _g1h);
2414   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2415 
2416   // This keeps claiming and applying the closure to completed buffers
2417   // until we run out of buffers or we need to abort.
2418   while (!has_aborted() &&
2419          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2420     abort_marking_if_regular_check_fail();
2421   }
2422 
2423   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2424   // some other thread might be adding to the queue.  If not concurrent,
2425   // some other thread might have won the race for the last buffer, but
2426   // has not yet decremented the count.
2427 
2428   _draining_satb_buffers = false;
2429 
2430   // again, this was a potentially expensive operation, decrease the
2431   // limits to get the regular clock call early
2432   decrease_limits();
2433 }
2434 
2435 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2436   _mark_stats_cache.reset(region_idx);
2437 }
2438 
2439 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2440   return _mark_stats_cache.evict_all();
2441 }
2442 
2443 void G1CMTask::print_stats() {
2444   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2445   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2446                        _elapsed_time_ms, _termination_time_ms);
2447   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2448                        _step_times_ms.num(),
2449                        _step_times_ms.avg(),
2450                        _step_times_ms.sd(),
2451                        _step_times_ms.maximum(),
2452                        _step_times_ms.sum());
2453   size_t const hits = _mark_stats_cache.hits();
2454   size_t const misses = _mark_stats_cache.misses();
2455   log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2456                        hits, misses, percent_of(hits, hits + misses));
2457 }
2458 
2459 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2460   return _task_queues->steal(worker_id, task_entry);
2461 }
2462 
2463 /*****************************************************************************
2464 
2465     The do_marking_step(time_target_ms, ...) method is the building
2466     block of the parallel marking framework. It can be called in parallel
2467     with other invocations of do_marking_step() on different tasks
2468     (but only one per task, obviously) and concurrently with the
2469     mutator threads, or during remark, hence it eliminates the need
2470     for two versions of the code. When called during remark, it will
2471     pick up from where the task left off during the concurrent marking
2472     phase. Interestingly, tasks are also claimable during evacuation
2473     pauses too, since do_marking_step() ensures that it aborts before
2474     it needs to yield.
2475 
2476     The data structures that it uses to do marking work are the
2477     following:
2478 
2479       (1) Marking Bitmap. If there are gray objects that appear only
2480       on the bitmap (this happens either when dealing with an overflow
2481       or when the initial marking phase has simply marked the roots
2482       and didn't push them on the stack), then tasks claim heap
2483       regions whose bitmap they then scan to find gray objects. A
2484       global finger indicates where the end of the last claimed region
2485       is. A local finger indicates how far into the region a task has
2486       scanned. The two fingers are used to determine how to gray an
2487       object (i.e. whether simply marking it is OK, as it will be
2488       visited by a task in the future, or whether it needs to be also
2489       pushed on a stack).
2490 
2491       (2) Local Queue. The local queue of the task which is accessed
2492       reasonably efficiently by the task. Other tasks can steal from
2493       it when they run out of work. Throughout the marking phase, a
2494       task attempts to keep its local queue short but not totally
2495       empty, so that entries are available for stealing by other
2496       tasks. Only when there is no more work, a task will totally
2497       drain its local queue.
2498 
2499       (3) Global Mark Stack. This handles local queue overflow. During
2500       marking only sets of entries are moved between it and the local
2501       queues, as access to it requires a mutex and more fine-grain
2502       interaction with it which might cause contention. If it
2503       overflows, then the marking phase should restart and iterate
2504       over the bitmap to identify gray objects. Throughout the marking
2505       phase, tasks attempt to keep the global mark stack at a small
2506       length but not totally empty, so that entries are available for
2507       popping by other tasks. Only when there is no more work, tasks
2508       will totally drain the global mark stack.
2509 
2510       (4) SATB Buffer Queue. This is where completed SATB buffers are
2511       made available. Buffers are regularly removed from this queue
2512       and scanned for roots, so that the queue doesn't get too
2513       long. During remark, all completed buffers are processed, as
2514       well as the filled in parts of any uncompleted buffers.
2515 
2516     The do_marking_step() method tries to abort when the time target
2517     has been reached. There are a few other cases when the
2518     do_marking_step() method also aborts:
2519 
2520       (1) When the marking phase has been aborted (after a Full GC).
2521 
2522       (2) When a global overflow (on the global stack) has been
2523       triggered. Before the task aborts, it will actually sync up with
2524       the other tasks to ensure that all the marking data structures
2525       (local queues, stacks, fingers etc.)  are re-initialized so that
2526       when do_marking_step() completes, the marking phase can
2527       immediately restart.
2528 
2529       (3) When enough completed SATB buffers are available. The
2530       do_marking_step() method only tries to drain SATB buffers right
2531       at the beginning. So, if enough buffers are available, the
2532       marking step aborts and the SATB buffers are processed at
2533       the beginning of the next invocation.
2534 
2535       (4) To yield. when we have to yield then we abort and yield
2536       right at the end of do_marking_step(). This saves us from a lot
2537       of hassle as, by yielding we might allow a Full GC. If this
2538       happens then objects will be compacted underneath our feet, the
2539       heap might shrink, etc. We save checking for this by just
2540       aborting and doing the yield right at the end.
2541 
2542     From the above it follows that the do_marking_step() method should
2543     be called in a loop (or, otherwise, regularly) until it completes.
2544 
2545     If a marking step completes without its has_aborted() flag being
2546     true, it means it has completed the current marking phase (and
2547     also all other marking tasks have done so and have all synced up).
2548 
2549     A method called regular_clock_call() is invoked "regularly" (in
2550     sub ms intervals) throughout marking. It is this clock method that
2551     checks all the abort conditions which were mentioned above and
2552     decides when the task should abort. A work-based scheme is used to
2553     trigger this clock method: when the number of object words the
2554     marking phase has scanned or the number of references the marking
2555     phase has visited reach a given limit. Additional invocations to
2556     the method clock have been planted in a few other strategic places
2557     too. The initial reason for the clock method was to avoid calling
2558     vtime too regularly, as it is quite expensive. So, once it was in
2559     place, it was natural to piggy-back all the other conditions on it
2560     too and not constantly check them throughout the code.
2561 
2562     If do_termination is true then do_marking_step will enter its
2563     termination protocol.
2564 
2565     The value of is_serial must be true when do_marking_step is being
2566     called serially (i.e. by the VMThread) and do_marking_step should
2567     skip any synchronization in the termination and overflow code.
2568     Examples include the serial remark code and the serial reference
2569     processing closures.
2570 
2571     The value of is_serial must be false when do_marking_step is
2572     being called by any of the worker threads in a work gang.
2573     Examples include the concurrent marking code (CMMarkingTask),
2574     the MT remark code, and the MT reference processing closures.
2575 
2576  *****************************************************************************/
2577 
2578 void G1CMTask::do_marking_step(double time_target_ms,
2579                                bool do_termination,
2580                                bool is_serial) {
2581   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2582 
2583   _start_time_ms = os::elapsedVTime() * 1000.0;
2584 
2585   // If do_stealing is true then do_marking_step will attempt to
2586   // steal work from the other G1CMTasks. It only makes sense to
2587   // enable stealing when the termination protocol is enabled
2588   // and do_marking_step() is not being called serially.
2589   bool do_stealing = do_termination && !is_serial;
2590 
2591   double diff_prediction_ms = _g1h->policy()->predictor().get_new_prediction(&_marking_step_diff_ms);
2592   _time_target_ms = time_target_ms - diff_prediction_ms;
2593 
2594   // set up the variables that are used in the work-based scheme to
2595   // call the regular clock method
2596   _words_scanned = 0;
2597   _refs_reached  = 0;
2598   recalculate_limits();
2599 
2600   // clear all flags
2601   clear_has_aborted();
2602   _has_timed_out = false;
2603   _draining_satb_buffers = false;
2604 
2605   ++_calls;
2606 
2607   // Set up the bitmap and oop closures. Anything that uses them is
2608   // eventually called from this method, so it is OK to allocate these
2609   // statically.
2610   G1CMBitMapClosure bitmap_closure(this, _cm);
2611   G1CMOopClosure cm_oop_closure(_g1h, this);
2612   set_cm_oop_closure(&cm_oop_closure);
2613 
2614   if (_cm->has_overflown()) {
2615     // This can happen if the mark stack overflows during a GC pause
2616     // and this task, after a yield point, restarts. We have to abort
2617     // as we need to get into the overflow protocol which happens
2618     // right at the end of this task.
2619     set_has_aborted();
2620   }
2621 
2622   // First drain any available SATB buffers. After this, we will not
2623   // look at SATB buffers before the next invocation of this method.
2624   // If enough completed SATB buffers are queued up, the regular clock
2625   // will abort this task so that it restarts.
2626   drain_satb_buffers();
2627   // ...then partially drain the local queue and the global stack
2628   drain_local_queue(true);
2629   drain_global_stack(true);
2630 
2631   do {
2632     if (!has_aborted() && _curr_region != NULL) {
2633       // This means that we're already holding on to a region.
2634       assert(_finger != NULL, "if region is not NULL, then the finger "
2635              "should not be NULL either");
2636 
2637       // We might have restarted this task after an evacuation pause
2638       // which might have evacuated the region we're holding on to
2639       // underneath our feet. Let's read its limit again to make sure
2640       // that we do not iterate over a region of the heap that
2641       // contains garbage (update_region_limit() will also move
2642       // _finger to the start of the region if it is found empty).
2643       update_region_limit();
2644       // We will start from _finger not from the start of the region,
2645       // as we might be restarting this task after aborting half-way
2646       // through scanning this region. In this case, _finger points to
2647       // the address where we last found a marked object. If this is a
2648       // fresh region, _finger points to start().
2649       MemRegion mr = MemRegion(_finger, _region_limit);
2650 
2651       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2652              "humongous regions should go around loop once only");
2653 
2654       // Some special cases:
2655       // If the memory region is empty, we can just give up the region.
2656       // If the current region is humongous then we only need to check
2657       // the bitmap for the bit associated with the start of the object,
2658       // scan the object if it's live, and give up the region.
2659       // Otherwise, let's iterate over the bitmap of the part of the region
2660       // that is left.
2661       // If the iteration is successful, give up the region.
2662       if (mr.is_empty()) {
2663         giveup_current_region();
2664         abort_marking_if_regular_check_fail();
2665       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2666         if (_next_mark_bitmap->is_marked(mr.start())) {
2667           // The object is marked - apply the closure
2668           bitmap_closure.do_addr(mr.start());
2669         }
2670         // Even if this task aborted while scanning the humongous object
2671         // we can (and should) give up the current region.
2672         giveup_current_region();
2673         abort_marking_if_regular_check_fail();
2674       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2675         giveup_current_region();
2676         abort_marking_if_regular_check_fail();
2677       } else {
2678         assert(has_aborted(), "currently the only way to do so");
2679         // The only way to abort the bitmap iteration is to return
2680         // false from the do_bit() method. However, inside the
2681         // do_bit() method we move the _finger to point to the
2682         // object currently being looked at. So, if we bail out, we
2683         // have definitely set _finger to something non-null.
2684         assert(_finger != NULL, "invariant");
2685 
2686         // Region iteration was actually aborted. So now _finger
2687         // points to the address of the object we last scanned. If we
2688         // leave it there, when we restart this task, we will rescan
2689         // the object. It is easy to avoid this. We move the finger by
2690         // enough to point to the next possible object header.
2691         assert(_finger < _region_limit, "invariant");
2692         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2693         // Check if bitmap iteration was aborted while scanning the last object
2694         if (new_finger >= _region_limit) {
2695           giveup_current_region();
2696         } else {
2697           move_finger_to(new_finger);
2698         }
2699       }
2700     }
2701     // At this point we have either completed iterating over the
2702     // region we were holding on to, or we have aborted.
2703 
2704     // We then partially drain the local queue and the global stack.
2705     // (Do we really need this?)
2706     drain_local_queue(true);
2707     drain_global_stack(true);
2708 
2709     // Read the note on the claim_region() method on why it might
2710     // return NULL with potentially more regions available for
2711     // claiming and why we have to check out_of_regions() to determine
2712     // whether we're done or not.
2713     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2714       // We are going to try to claim a new region. We should have
2715       // given up on the previous one.
2716       // Separated the asserts so that we know which one fires.
2717       assert(_curr_region  == NULL, "invariant");
2718       assert(_finger       == NULL, "invariant");
2719       assert(_region_limit == NULL, "invariant");
2720       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2721       if (claimed_region != NULL) {
2722         // Yes, we managed to claim one
2723         setup_for_region(claimed_region);
2724         assert(_curr_region == claimed_region, "invariant");
2725       }
2726       // It is important to call the regular clock here. It might take
2727       // a while to claim a region if, for example, we hit a large
2728       // block of empty regions. So we need to call the regular clock
2729       // method once round the loop to make sure it's called
2730       // frequently enough.
2731       abort_marking_if_regular_check_fail();
2732     }
2733 
2734     if (!has_aborted() && _curr_region == NULL) {
2735       assert(_cm->out_of_regions(),
2736              "at this point we should be out of regions");
2737     }
2738   } while ( _curr_region != NULL && !has_aborted());
2739 
2740   if (!has_aborted()) {
2741     // We cannot check whether the global stack is empty, since other
2742     // tasks might be pushing objects to it concurrently.
2743     assert(_cm->out_of_regions(),
2744            "at this point we should be out of regions");
2745     // Try to reduce the number of available SATB buffers so that
2746     // remark has less work to do.
2747     drain_satb_buffers();
2748   }
2749 
2750   // Since we've done everything else, we can now totally drain the
2751   // local queue and global stack.
2752   drain_local_queue(false);
2753   drain_global_stack(false);
2754 
2755   // Attempt at work stealing from other task's queues.
2756   if (do_stealing && !has_aborted()) {
2757     // We have not aborted. This means that we have finished all that
2758     // we could. Let's try to do some stealing...
2759 
2760     // We cannot check whether the global stack is empty, since other
2761     // tasks might be pushing objects to it concurrently.
2762     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2763            "only way to reach here");
2764     while (!has_aborted()) {
2765       G1TaskQueueEntry entry;
2766       if (_cm->try_stealing(_worker_id, entry)) {
2767         scan_task_entry(entry);
2768 
2769         // And since we're towards the end, let's totally drain the
2770         // local queue and global stack.
2771         drain_local_queue(false);
2772         drain_global_stack(false);
2773       } else {
2774         break;
2775       }
2776     }
2777   }
2778 
2779   // We still haven't aborted. Now, let's try to get into the
2780   // termination protocol.
2781   if (do_termination && !has_aborted()) {
2782     // We cannot check whether the global stack is empty, since other
2783     // tasks might be concurrently pushing objects on it.
2784     // Separated the asserts so that we know which one fires.
2785     assert(_cm->out_of_regions(), "only way to reach here");
2786     assert(_task_queue->size() == 0, "only way to reach here");
2787     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2788 
2789     // The G1CMTask class also extends the TerminatorTerminator class,
2790     // hence its should_exit_termination() method will also decide
2791     // whether to exit the termination protocol or not.
2792     bool finished = (is_serial ||
2793                      _cm->terminator()->offer_termination(this));
2794     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2795     _termination_time_ms +=
2796       termination_end_time_ms - _termination_start_time_ms;
2797 
2798     if (finished) {
2799       // We're all done.
2800 
2801       // We can now guarantee that the global stack is empty, since
2802       // all other tasks have finished. We separated the guarantees so
2803       // that, if a condition is false, we can immediately find out
2804       // which one.
2805       guarantee(_cm->out_of_regions(), "only way to reach here");
2806       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2807       guarantee(_task_queue->size() == 0, "only way to reach here");
2808       guarantee(!_cm->has_overflown(), "only way to reach here");
2809       guarantee(!has_aborted(), "should never happen if termination has completed");
2810     } else {
2811       // Apparently there's more work to do. Let's abort this task. It
2812       // will restart it and we can hopefully find more things to do.
2813       set_has_aborted();
2814     }
2815   }
2816 
2817   // Mainly for debugging purposes to make sure that a pointer to the
2818   // closure which was statically allocated in this frame doesn't
2819   // escape it by accident.
2820   set_cm_oop_closure(NULL);
2821   double end_time_ms = os::elapsedVTime() * 1000.0;
2822   double elapsed_time_ms = end_time_ms - _start_time_ms;
2823   // Update the step history.
2824   _step_times_ms.add(elapsed_time_ms);
2825 
2826   if (has_aborted()) {
2827     // The task was aborted for some reason.
2828     if (_has_timed_out) {
2829       double diff_ms = elapsed_time_ms - _time_target_ms;
2830       // Keep statistics of how well we did with respect to hitting
2831       // our target only if we actually timed out (if we aborted for
2832       // other reasons, then the results might get skewed).
2833       _marking_step_diff_ms.add(diff_ms);
2834     }
2835 
2836     if (_cm->has_overflown()) {
2837       // This is the interesting one. We aborted because a global
2838       // overflow was raised. This means we have to restart the
2839       // marking phase and start iterating over regions. However, in
2840       // order to do this we have to make sure that all tasks stop
2841       // what they are doing and re-initialize in a safe manner. We
2842       // will achieve this with the use of two barrier sync points.
2843 
2844       if (!is_serial) {
2845         // We only need to enter the sync barrier if being called
2846         // from a parallel context
2847         _cm->enter_first_sync_barrier(_worker_id);
2848 
2849         // When we exit this sync barrier we know that all tasks have
2850         // stopped doing marking work. So, it's now safe to
2851         // re-initialize our data structures.
2852       }
2853 
2854       clear_region_fields();
2855       flush_mark_stats_cache();
2856 
2857       if (!is_serial) {
2858         // If we're executing the concurrent phase of marking, reset the marking
2859         // state; otherwise the marking state is reset after reference processing,
2860         // during the remark pause.
2861         // If we reset here as a result of an overflow during the remark we will
2862         // see assertion failures from any subsequent set_concurrency_and_phase()
2863         // calls.
2864         if (_cm->concurrent() && _worker_id == 0) {
2865           // Worker 0 is responsible for clearing the global data structures because
2866           // of an overflow. During STW we should not clear the overflow flag (in
2867           // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2868           // method to abort the pause and restart concurrent marking.
2869           _cm->reset_marking_for_restart();
2870 
2871           log_info(gc, marking)("Concurrent Mark reset for overflow");
2872         }
2873 
2874         // ...and enter the second barrier.
2875         _cm->enter_second_sync_barrier(_worker_id);
2876       }
2877       // At this point, if we're during the concurrent phase of
2878       // marking, everything has been re-initialized and we're
2879       // ready to restart.
2880     }
2881   }
2882 }
2883 
2884 G1CMTask::G1CMTask(uint worker_id,
2885                    G1ConcurrentMark* cm,
2886                    G1CMTaskQueue* task_queue,
2887                    G1RegionMarkStats* mark_stats,
2888                    uint max_regions) :
2889   _objArray_processor(this),
2890   _worker_id(worker_id),
2891   _g1h(G1CollectedHeap::heap()),
2892   _cm(cm),
2893   _next_mark_bitmap(NULL),
2894   _task_queue(task_queue),
2895   _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2896   _calls(0),
2897   _time_target_ms(0.0),
2898   _start_time_ms(0.0),
2899   _cm_oop_closure(NULL),
2900   _curr_region(NULL),
2901   _finger(NULL),
2902   _region_limit(NULL),
2903   _words_scanned(0),
2904   _words_scanned_limit(0),
2905   _real_words_scanned_limit(0),
2906   _refs_reached(0),
2907   _refs_reached_limit(0),
2908   _real_refs_reached_limit(0),
2909   _has_aborted(false),
2910   _has_timed_out(false),
2911   _draining_satb_buffers(false),
2912   _step_times_ms(),
2913   _elapsed_time_ms(0.0),
2914   _termination_time_ms(0.0),
2915   _termination_start_time_ms(0.0),
2916   _marking_step_diff_ms()
2917 {
2918   guarantee(task_queue != NULL, "invariant");
2919 
2920   _marking_step_diff_ms.add(0.5);
2921 }
2922 
2923 // These are formatting macros that are used below to ensure
2924 // consistent formatting. The *_H_* versions are used to format the
2925 // header for a particular value and they should be kept consistent
2926 // with the corresponding macro. Also note that most of the macros add
2927 // the necessary white space (as a prefix) which makes them a bit
2928 // easier to compose.
2929 
2930 // All the output lines are prefixed with this string to be able to
2931 // identify them easily in a large log file.
2932 #define G1PPRL_LINE_PREFIX            "###"
2933 
2934 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2935 #ifdef _LP64
2936 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2937 #else // _LP64
2938 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2939 #endif // _LP64
2940 
2941 // For per-region info
2942 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2943 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2944 #define G1PPRL_STATE_FORMAT           "   %-5s"
2945 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2946 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2947 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2948 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2949 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2950 
2951 // For summary info
2952 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2953 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2954 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2955 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2956 
2957 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2958   _total_used_bytes(0), _total_capacity_bytes(0),
2959   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2960   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2961 {
2962   if (!log_is_enabled(Trace, gc, liveness)) {
2963     return;
2964   }
2965 
2966   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2967   MemRegion g1_reserved = g1h->g1_reserved();
2968   double now = os::elapsedTime();
2969 
2970   // Print the header of the output.
2971   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2972   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2973                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2974                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2975                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2976                           HeapRegion::GrainBytes);
2977   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2978   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2979                           G1PPRL_TYPE_H_FORMAT
2980                           G1PPRL_ADDR_BASE_H_FORMAT
2981                           G1PPRL_BYTE_H_FORMAT
2982                           G1PPRL_BYTE_H_FORMAT
2983                           G1PPRL_BYTE_H_FORMAT
2984                           G1PPRL_DOUBLE_H_FORMAT
2985                           G1PPRL_BYTE_H_FORMAT
2986                           G1PPRL_STATE_H_FORMAT
2987                           G1PPRL_BYTE_H_FORMAT,
2988                           "type", "address-range",
2989                           "used", "prev-live", "next-live", "gc-eff",
2990                           "remset", "state", "code-roots");
2991   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2992                           G1PPRL_TYPE_H_FORMAT
2993                           G1PPRL_ADDR_BASE_H_FORMAT
2994                           G1PPRL_BYTE_H_FORMAT
2995                           G1PPRL_BYTE_H_FORMAT
2996                           G1PPRL_BYTE_H_FORMAT
2997                           G1PPRL_DOUBLE_H_FORMAT
2998                           G1PPRL_BYTE_H_FORMAT
2999                           G1PPRL_STATE_H_FORMAT
3000                           G1PPRL_BYTE_H_FORMAT,
3001                           "", "",
3002                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3003                           "(bytes)", "", "(bytes)");
3004 }
3005 
3006 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
3007   if (!log_is_enabled(Trace, gc, liveness)) {
3008     return false;
3009   }
3010 
3011   const char* type       = r->get_type_str();
3012   HeapWord* bottom       = r->bottom();
3013   HeapWord* end          = r->end();
3014   size_t capacity_bytes  = r->capacity();
3015   size_t used_bytes      = r->used();
3016   size_t prev_live_bytes = r->live_bytes();
3017   size_t next_live_bytes = r->next_live_bytes();
3018   double gc_eff          = r->gc_efficiency();
3019   size_t remset_bytes    = r->rem_set()->mem_size();
3020   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3021   const char* remset_type = r->rem_set()->get_short_state_str();
3022 
3023   _total_used_bytes      += used_bytes;
3024   _total_capacity_bytes  += capacity_bytes;
3025   _total_prev_live_bytes += prev_live_bytes;
3026   _total_next_live_bytes += next_live_bytes;
3027   _total_remset_bytes    += remset_bytes;
3028   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3029 
3030   // Print a line for this particular region.
3031   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3032                           G1PPRL_TYPE_FORMAT
3033                           G1PPRL_ADDR_BASE_FORMAT
3034                           G1PPRL_BYTE_FORMAT
3035                           G1PPRL_BYTE_FORMAT
3036                           G1PPRL_BYTE_FORMAT
3037                           G1PPRL_DOUBLE_FORMAT
3038                           G1PPRL_BYTE_FORMAT
3039                           G1PPRL_STATE_FORMAT
3040                           G1PPRL_BYTE_FORMAT,
3041                           type, p2i(bottom), p2i(end),
3042                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3043                           remset_bytes, remset_type, strong_code_roots_bytes);
3044 
3045   return false;
3046 }
3047 
3048 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3049   if (!log_is_enabled(Trace, gc, liveness)) {
3050     return;
3051   }
3052 
3053   // add static memory usages to remembered set sizes
3054   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3055   // Print the footer of the output.
3056   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3057   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3058                          " SUMMARY"
3059                          G1PPRL_SUM_MB_FORMAT("capacity")
3060                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3061                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3062                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3063                          G1PPRL_SUM_MB_FORMAT("remset")
3064                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3065                          bytes_to_mb(_total_capacity_bytes),
3066                          bytes_to_mb(_total_used_bytes),
3067                          percent_of(_total_used_bytes, _total_capacity_bytes),
3068                          bytes_to_mb(_total_prev_live_bytes),
3069                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
3070                          bytes_to_mb(_total_next_live_bytes),
3071                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
3072                          bytes_to_mb(_total_remset_bytes),
3073                          bytes_to_mb(_total_strong_code_roots_bytes));
3074 }