New src/share/vm/gc/g1/g1ConcurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 #include "utilities/align.hpp"
  61 #include "utilities/growableArray.hpp"
  62 
  63 // Concurrent marking bit map wrapper
  64 
  65 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  66   _bm(),
  67   _shifter(shifter) {
  68   _bmStartWord = 0;
  69   _bmWordSize = 0;
  70 }
  71 
  72 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  73                                                  const HeapWord* limit) const {
  74   // First we must round addr *up* to a possible object boundary.
  75   addr = align_up(addr, HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   assert(limit != NULL, "limit must not be NULL");
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 #ifndef PRODUCT
  88 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  89   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  90   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  91          "size inconsistency");
  92   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  93          _bmWordSize  == heap_rs.word_size();
  94 }
  95 #endif
  96 
  97 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  98   _bm.print_on_error(st, prefix);
  99 }
 100 
 101 size_t G1CMBitMap::compute_size(size_t heap_size) {
 102   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 103 }
 104 
 105 size_t G1CMBitMap::mark_distance() {
 106   return MinObjAlignmentInBytes * BitsPerByte;
 107 }
 108 
 109 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 110   _bmStartWord = heap.start();
 111   _bmWordSize = heap.word_size();
 112 
 113   _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter);
 114 
 115   storage->set_mapping_changed_listener(&_listener);
 116 }
 117 
 118 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 119   if (zero_filled) {
 120     return;
 121   }
 122   // We need to clear the bitmap on commit, removing any existing information.
 123   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 124   _bm->clear_range(mr);
 125 }
 126 
 127 void G1CMBitMap::clear_range(MemRegion mr) {
 128   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 129   assert(!mr.is_empty(), "unexpected empty region");
 130   // convert address range into offset range
 131   _bm.at_put_range(heapWordToOffset(mr.start()),
 132                    heapWordToOffset(mr.end()), false);
 133 }
 134 
 135 G1CMMarkStack::G1CMMarkStack() :
 136   _max_chunk_capacity(0),
 137   _base(NULL),
 138   _chunk_capacity(0) {
 139   set_empty();
 140 }
 141 
 142 bool G1CMMarkStack::resize(size_t new_capacity) {
 143   assert(is_empty(), "Only resize when stack is empty.");
 144   assert(new_capacity <= _max_chunk_capacity,
 145          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
 146 
 147   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
 148 
 149   if (new_base == NULL) {
 150     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 151     return false;
 152   }
 153   // Release old mapping.
 154   if (_base != NULL) {
 155     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 156   }
 157 
 158   _base = new_base;
 159   _chunk_capacity = new_capacity;
 160   set_empty();
 161 
 162   return true;
 163 }
 164 
 165 size_t G1CMMarkStack::capacity_alignment() {
 166   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 167 }
 168 
 169 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 170   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 171 
 172   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 173 
 174   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 175   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 176 
 177   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 178             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 179             _max_chunk_capacity,
 180             initial_chunk_capacity);
 181 
 182   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 183                 initial_chunk_capacity, _max_chunk_capacity);
 184 
 185   return resize(initial_chunk_capacity);
 186 }
 187 
 188 void G1CMMarkStack::expand() {
 189   if (_chunk_capacity == _max_chunk_capacity) {
 190     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 191     return;
 192   }
 193   size_t old_capacity = _chunk_capacity;
 194   // Double capacity if possible
 195   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 196 
 197   if (resize(new_capacity)) {
 198     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 199                   old_capacity, new_capacity);
 200   } else {
 201     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 202                     old_capacity, new_capacity);
 203   }
 204 }
 205 
 206 G1CMMarkStack::~G1CMMarkStack() {
 207   if (_base != NULL) {
 208     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 209   }
 210 }
 211 
 212 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 213   elem->next = *list;
 214   *list = elem;
 215 }
 216 
 217 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 218   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 219   add_chunk_to_list(&_chunk_list, elem);
 220   _chunks_in_chunk_list++;
 221 }
 222 
 223 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 224   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 225   add_chunk_to_list(&_free_list, elem);
 226 }
 227 
 228 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 229   TaskQueueEntryChunk* result = *list;
 230   if (result != NULL) {
 231     *list = (*list)->next;
 232   }
 233   return result;
 234 }
 235 
 236 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 237   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 238   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 239   if (result != NULL) {
 240     _chunks_in_chunk_list--;
 241   }
 242   return result;
 243 }
 244 
 245 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 246   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 247   return remove_chunk_from_list(&_free_list);
 248 }
 249 
 250 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 251   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 252   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 253   // wraparound of _hwm.
 254   if (_hwm >= _chunk_capacity) {
 255     return NULL;
 256   }
 257 
 258   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 259   if (cur_idx >= _chunk_capacity) {
 260     return NULL;
 261   }
 262 
 263   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 264   result->next = NULL;
 265   return result;
 266 }
 267 
 268 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 269   // Get a new chunk.
 270   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 271 
 272   if (new_chunk == NULL) {
 273     // Did not get a chunk from the free list. Allocate from backing memory.
 274     new_chunk = allocate_new_chunk();
 275 
 276     if (new_chunk == NULL) {
 277       return false;
 278     }
 279   }
 280 
 281   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 282 
 283   add_chunk_to_chunk_list(new_chunk);
 284 
 285   return true;
 286 }
 287 
 288 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 289   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 290 
 291   if (cur == NULL) {
 292     return false;
 293   }
 294 
 295   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 296 
 297   add_chunk_to_free_list(cur);
 298   return true;
 299 }
 300 
 301 void G1CMMarkStack::set_empty() {
 302   _chunks_in_chunk_list = 0;
 303   _hwm = 0;
 304   _chunk_list = NULL;
 305   _free_list = NULL;
 306 }
 307 
 308 G1CMRootRegions::G1CMRootRegions() :
 309   _cm(NULL), _scan_in_progress(false),
 310   _should_abort(false), _claimed_survivor_index(0) { }
 311 
 312 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 313   _survivors = survivors;
 314   _cm = cm;
 315 }
 316 
 317 void G1CMRootRegions::prepare_for_scan() {
 318   assert(!scan_in_progress(), "pre-condition");
 319 
 320   // Currently, only survivors can be root regions.
 321   _claimed_survivor_index = 0;
 322   _scan_in_progress = _survivors->regions()->is_nonempty();
 323   _should_abort = false;
 324 }
 325 
 326 HeapRegion* G1CMRootRegions::claim_next() {
 327   if (_should_abort) {
 328     // If someone has set the should_abort flag, we return NULL to
 329     // force the caller to bail out of their loop.
 330     return NULL;
 331   }
 332 
 333   // Currently, only survivors can be root regions.
 334   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 335 
 336   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 337   if (claimed_index < survivor_regions->length()) {
 338     return survivor_regions->at(claimed_index);
 339   }
 340   return NULL;
 341 }
 342 
 343 uint G1CMRootRegions::num_root_regions() const {
 344   return (uint)_survivors->regions()->length();
 345 }
 346 
 347 void G1CMRootRegions::notify_scan_done() {
 348   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 349   _scan_in_progress = false;
 350   RootRegionScan_lock->notify_all();
 351 }
 352 
 353 void G1CMRootRegions::cancel_scan() {
 354   notify_scan_done();
 355 }
 356 
 357 void G1CMRootRegions::scan_finished() {
 358   assert(scan_in_progress(), "pre-condition");
 359 
 360   // Currently, only survivors can be root regions.
 361   if (!_should_abort) {
 362     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 363     assert((uint)_claimed_survivor_index >= _survivors->length(),
 364            "we should have claimed all survivors, claimed index = %u, length = %u",
 365            (uint)_claimed_survivor_index, _survivors->length());
 366   }
 367 
 368   notify_scan_done();
 369 }
 370 
 371 bool G1CMRootRegions::wait_until_scan_finished() {
 372   if (!scan_in_progress()) return false;
 373 
 374   {
 375     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 376     while (scan_in_progress()) {
 377       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 378     }
 379   }
 380   return true;
 381 }
 382 
 383 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 384   return MAX2((n_par_threads + 2) / 4, 1U);
 385 }
 386 
 387 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 388   _g1h(g1h),
 389   _markBitMap1(),
 390   _markBitMap2(),
 391   _parallel_marking_threads(0),
 392   _max_parallel_marking_threads(0),
 393   _sleep_factor(0.0),
 394   _marking_task_overhead(1.0),
 395   _cleanup_list("Cleanup List"),
 396 
 397   _prevMarkBitMap(&_markBitMap1),
 398   _nextMarkBitMap(&_markBitMap2),
 399 
 400   _global_mark_stack(),
 401   // _finger set in set_non_marking_state
 402 
 403   _max_worker_id(ParallelGCThreads),
 404   // _active_tasks set in set_non_marking_state
 405   // _tasks set inside the constructor
 406   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 407   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 408 
 409   _has_overflown(false),
 410   _concurrent(false),
 411   _has_aborted(false),
 412   _restart_for_overflow(false),
 413   _concurrent_marking_in_progress(false),
 414   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 415   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 416 
 417   // _verbose_level set below
 418 
 419   _init_times(),
 420   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 421   _cleanup_times(),
 422   _total_counting_time(0.0),
 423   _total_rs_scrub_time(0.0),
 424 
 425   _parallel_workers(NULL),
 426 
 427   _completed_initialization(false) {
 428 
 429   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 430   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 431 
 432   // Create & start a ConcurrentMark thread.
 433   _cmThread = new ConcurrentMarkThread(this);
 434   assert(cmThread() != NULL, "CM Thread should have been created");
 435   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 436   if (_cmThread->osthread() == NULL) {
 437       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 438   }
 439 
 440   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 441   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 442   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 443 
 444   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 445   satb_qs.set_buffer_size(G1SATBBufferSize);
 446 
 447   _root_regions.init(_g1h->survivor(), this);
 448 
 449   if (ConcGCThreads > ParallelGCThreads) {
 450     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 451                     ConcGCThreads, ParallelGCThreads);
 452     return;
 453   }
 454   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 455     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 456     // if both are set
 457     _sleep_factor             = 0.0;
 458     _marking_task_overhead    = 1.0;
 459   } else if (G1MarkingOverheadPercent > 0) {
 460     // We will calculate the number of parallel marking threads based
 461     // on a target overhead with respect to the soft real-time goal
 462     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 463     double overall_cm_overhead =
 464       (double) MaxGCPauseMillis * marking_overhead /
 465       (double) GCPauseIntervalMillis;
 466     double cpu_ratio = 1.0 / os::initial_active_processor_count();
 467     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 468     double marking_task_overhead =
 469       overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
 470     double sleep_factor =
 471                        (1.0 - marking_task_overhead) / marking_task_overhead;
 472 
 473     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 474     _sleep_factor             = sleep_factor;
 475     _marking_task_overhead    = marking_task_overhead;
 476   } else {
 477     // Calculate the number of parallel marking threads by scaling
 478     // the number of parallel GC threads.
 479     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 480     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 481     _sleep_factor             = 0.0;
 482     _marking_task_overhead    = 1.0;
 483   }
 484 
 485   assert(ConcGCThreads > 0, "Should have been set");
 486   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 487   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 488   _parallel_marking_threads = ConcGCThreads;
 489   _max_parallel_marking_threads = _parallel_marking_threads;
 490 
 491   _parallel_workers = new WorkGang("G1 Marker",
 492        _max_parallel_marking_threads, false, true);
 493   if (_parallel_workers == NULL) {
 494     vm_exit_during_initialization("Failed necessary allocation.");
 495   } else {
 496     _parallel_workers->initialize_workers();
 497   }
 498 
 499   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 500     size_t mark_stack_size =
 501       MIN2(MarkStackSizeMax,
 502           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 503     // Verify that the calculated value for MarkStackSize is in range.
 504     // It would be nice to use the private utility routine from Arguments.
 505     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 506       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 507                       "must be between 1 and " SIZE_FORMAT,
 508                       mark_stack_size, MarkStackSizeMax);
 509       return;
 510     }
 511     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 512   } else {
 513     // Verify MarkStackSize is in range.
 514     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 515       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 516         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 517           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 518                           "must be between 1 and " SIZE_FORMAT,
 519                           MarkStackSize, MarkStackSizeMax);
 520           return;
 521         }
 522       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 523         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 524           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 525                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 526                           MarkStackSize, MarkStackSizeMax);
 527           return;
 528         }
 529       }
 530     }
 531   }
 532 
 533   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 534     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 535   }
 536 
 537   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 538   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 539 
 540   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 541   _active_tasks = _max_worker_id;
 542 
 543   for (uint i = 0; i < _max_worker_id; ++i) {
 544     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 545     task_queue->initialize();
 546     _task_queues->register_queue(i, task_queue);
 547 
 548     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 549 
 550     _accum_task_vtime[i] = 0.0;
 551   }
 552 
 553   // so that the call below can read a sensible value
 554   _heap_start = g1h->reserved_region().start();
 555   set_non_marking_state();
 556   _completed_initialization = true;
 557 }
 558 
 559 void G1ConcurrentMark::reset() {
 560   // Starting values for these two. This should be called in a STW
 561   // phase.
 562   MemRegion reserved = _g1h->g1_reserved();
 563   _heap_start = reserved.start();
 564   _heap_end   = reserved.end();
 565 
 566   // Separated the asserts so that we know which one fires.
 567   assert(_heap_start != NULL, "heap bounds should look ok");
 568   assert(_heap_end != NULL, "heap bounds should look ok");
 569   assert(_heap_start < _heap_end, "heap bounds should look ok");
 570 
 571   // Reset all the marking data structures and any necessary flags
 572   reset_marking_state();
 573 
 574   // We do reset all of them, since different phases will use
 575   // different number of active threads. So, it's easiest to have all
 576   // of them ready.
 577   for (uint i = 0; i < _max_worker_id; ++i) {
 578     _tasks[i]->reset(_nextMarkBitMap);
 579   }
 580 
 581   // we need this to make sure that the flag is on during the evac
 582   // pause with initial mark piggy-backed
 583   set_concurrent_marking_in_progress();
 584 }
 585 
 586 
 587 void G1ConcurrentMark::reset_marking_state() {
 588   _global_mark_stack.set_empty();
 589 
 590   // Expand the marking stack, if we have to and if we can.
 591   if (has_overflown()) {
 592     _global_mark_stack.expand();
 593   }
 594 
 595   clear_has_overflown();
 596   _finger = _heap_start;
 597 
 598   for (uint i = 0; i < _max_worker_id; ++i) {
 599     G1CMTaskQueue* queue = _task_queues->queue(i);
 600     queue->set_empty();
 601   }
 602 }
 603 
 604 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 605   assert(active_tasks <= _max_worker_id, "we should not have more");
 606 
 607   _active_tasks = active_tasks;
 608   // Need to update the three data structures below according to the
 609   // number of active threads for this phase.
 610   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 611   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 612   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 613 }
 614 
 615 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 616   set_concurrency(active_tasks);
 617 
 618   _concurrent = concurrent;
 619   // We propagate this to all tasks, not just the active ones.
 620   for (uint i = 0; i < _max_worker_id; ++i)
 621     _tasks[i]->set_concurrent(concurrent);
 622 
 623   if (concurrent) {
 624     set_concurrent_marking_in_progress();
 625   } else {
 626     // We currently assume that the concurrent flag has been set to
 627     // false before we start remark. At this point we should also be
 628     // in a STW phase.
 629     assert(!concurrent_marking_in_progress(), "invariant");
 630     assert(out_of_regions(),
 631            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 632            p2i(_finger), p2i(_heap_end));
 633   }
 634 }
 635 
 636 void G1ConcurrentMark::set_non_marking_state() {
 637   // We set the global marking state to some default values when we're
 638   // not doing marking.
 639   reset_marking_state();
 640   _active_tasks = 0;
 641   clear_concurrent_marking_in_progress();
 642 }
 643 
 644 G1ConcurrentMark::~G1ConcurrentMark() {
 645   // The G1ConcurrentMark instance is never freed.
 646   ShouldNotReachHere();
 647 }
 648 
 649 class G1ClearBitMapTask : public AbstractGangTask {
 650 public:
 651   static size_t chunk_size() { return M; }
 652 
 653 private:
 654   // Heap region closure used for clearing the given mark bitmap.
 655   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 656   private:
 657     G1CMBitMap* _bitmap;
 658     G1ConcurrentMark* _cm;
 659   public:
 660     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 661     }
 662 
 663     virtual bool doHeapRegion(HeapRegion* r) {
 664       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 665 
 666       HeapWord* cur = r->bottom();
 667       HeapWord* const end = r->end();
 668 
 669       while (cur < end) {
 670         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 671         _bitmap->clear_range(mr);
 672 
 673         cur += chunk_size_in_words;
 674 
 675         // Abort iteration if after yielding the marking has been aborted.
 676         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 677           return true;
 678         }
 679         // Repeat the asserts from before the start of the closure. We will do them
 680         // as asserts here to minimize their overhead on the product. However, we
 681         // will have them as guarantees at the beginning / end of the bitmap
 682         // clearing to get some checking in the product.
 683         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 684         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 685       }
 686       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 687 
 688       return false;
 689     }
 690   };
 691 
 692   G1ClearBitmapHRClosure _cl;
 693   HeapRegionClaimer _hr_claimer;
 694   bool _suspendible; // If the task is suspendible, workers must join the STS.
 695 
 696 public:
 697   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 698     AbstractGangTask("G1 Clear Bitmap"),
 699     _cl(bitmap, suspendible ? cm : NULL),
 700     _hr_claimer(n_workers),
 701     _suspendible(suspendible)
 702   { }
 703 
 704   void work(uint worker_id) {
 705     SuspendibleThreadSetJoiner sts_join(_suspendible);
 706     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
 707   }
 708 
 709   bool is_complete() {
 710     return _cl.complete();
 711   }
 712 };
 713 
 714 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 715   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 716 
 717   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 718   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 719 
 720   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 721 
 722   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 723 
 724   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 725   workers->run_task(&cl, num_workers);
 726   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 727 }
 728 
 729 void G1ConcurrentMark::cleanup_for_next_mark() {
 730   // Make sure that the concurrent mark thread looks to still be in
 731   // the current cycle.
 732   guarantee(cmThread()->during_cycle(), "invariant");
 733 
 734   // We are finishing up the current cycle by clearing the next
 735   // marking bitmap and getting it ready for the next cycle. During
 736   // this time no other cycle can start. So, let's make sure that this
 737   // is the case.
 738   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 739 
 740   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 741 
 742   // Clear the live count data. If the marking has been aborted, the abort()
 743   // call already did that.
 744   if (!has_aborted()) {
 745     clear_live_data(_parallel_workers);
 746     DEBUG_ONLY(verify_live_data_clear());
 747   }
 748 
 749   // Repeat the asserts from above.
 750   guarantee(cmThread()->during_cycle(), "invariant");
 751   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 752 }
 753 
 754 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 755   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 756   clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false);
 757 }
 758 
 759 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 760   G1CMBitMap* _bitmap;
 761   bool _error;
 762  public:
 763   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 764   }
 765 
 766   virtual bool doHeapRegion(HeapRegion* r) {
 767     // This closure can be called concurrently to the mutator, so we must make sure
 768     // that the result of the getNextMarkedWordAddress() call is compared to the
 769     // value passed to it as limit to detect any found bits.
 770     // end never changes in G1.
 771     HeapWord* end = r->end();
 772     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 773   }
 774 };
 775 
 776 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 777   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 778   _g1h->heap_region_iterate(&cl);
 779   return cl.complete();
 780 }
 781 
 782 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 783 public:
 784   bool doHeapRegion(HeapRegion* r) {
 785     r->note_start_of_marking();
 786     return false;
 787   }
 788 };
 789 
 790 void G1ConcurrentMark::checkpointRootsInitialPre() {
 791   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 792   G1Policy* g1p = g1h->g1_policy();
 793 
 794   _has_aborted = false;
 795 
 796   // Initialize marking structures. This has to be done in a STW phase.
 797   reset();
 798 
 799   // For each region note start of marking.
 800   NoteStartOfMarkHRClosure startcl;
 801   g1h->heap_region_iterate(&startcl);
 802 }
 803 
 804 
 805 void G1ConcurrentMark::checkpointRootsInitialPost() {
 806   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 807 
 808   // Start Concurrent Marking weak-reference discovery.
 809   ReferenceProcessor* rp = g1h->ref_processor_cm();
 810   // enable ("weak") refs discovery
 811   rp->enable_discovery();
 812   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 813 
 814   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 815   // This is the start of  the marking cycle, we're expected all
 816   // threads to have SATB queues with active set to false.
 817   satb_mq_set.set_active_all_threads(true, /* new active value */
 818                                      false /* expected_active */);
 819 
 820   _root_regions.prepare_for_scan();
 821 
 822   // update_g1_committed() will be called at the end of an evac pause
 823   // when marking is on. So, it's also called at the end of the
 824   // initial-mark pause to update the heap end, if the heap expands
 825   // during it. No need to call it here.
 826 }
 827 
 828 /*
 829  * Notice that in the next two methods, we actually leave the STS
 830  * during the barrier sync and join it immediately afterwards. If we
 831  * do not do this, the following deadlock can occur: one thread could
 832  * be in the barrier sync code, waiting for the other thread to also
 833  * sync up, whereas another one could be trying to yield, while also
 834  * waiting for the other threads to sync up too.
 835  *
 836  * Note, however, that this code is also used during remark and in
 837  * this case we should not attempt to leave / enter the STS, otherwise
 838  * we'll either hit an assert (debug / fastdebug) or deadlock
 839  * (product). So we should only leave / enter the STS if we are
 840  * operating concurrently.
 841  *
 842  * Because the thread that does the sync barrier has left the STS, it
 843  * is possible to be suspended for a Full GC or an evacuation pause
 844  * could occur. This is actually safe, since the entering the sync
 845  * barrier is one of the last things do_marking_step() does, and it
 846  * doesn't manipulate any data structures afterwards.
 847  */
 848 
 849 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 850   bool barrier_aborted;
 851   {
 852     SuspendibleThreadSetLeaver sts_leave(concurrent());
 853     barrier_aborted = !_first_overflow_barrier_sync.enter();
 854   }
 855 
 856   // at this point everyone should have synced up and not be doing any
 857   // more work
 858 
 859   if (barrier_aborted) {
 860     // If the barrier aborted we ignore the overflow condition and
 861     // just abort the whole marking phase as quickly as possible.
 862     return;
 863   }
 864 
 865   // If we're executing the concurrent phase of marking, reset the marking
 866   // state; otherwise the marking state is reset after reference processing,
 867   // during the remark pause.
 868   // If we reset here as a result of an overflow during the remark we will
 869   // see assertion failures from any subsequent set_concurrency_and_phase()
 870   // calls.
 871   if (concurrent()) {
 872     // let the task associated with with worker 0 do this
 873     if (worker_id == 0) {
 874       // task 0 is responsible for clearing the global data structures
 875       // We should be here because of an overflow. During STW we should
 876       // not clear the overflow flag since we rely on it being true when
 877       // we exit this method to abort the pause and restart concurrent
 878       // marking.
 879       reset_marking_state();
 880 
 881       log_info(gc, marking)("Concurrent Mark reset for overflow");
 882     }
 883   }
 884 
 885   // after this, each task should reset its own data structures then
 886   // then go into the second barrier
 887 }
 888 
 889 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 890   SuspendibleThreadSetLeaver sts_leave(concurrent());
 891   _second_overflow_barrier_sync.enter();
 892 
 893   // at this point everything should be re-initialized and ready to go
 894 }
 895 
 896 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 897 private:
 898   G1ConcurrentMark*     _cm;
 899   ConcurrentMarkThread* _cmt;
 900 
 901 public:
 902   void work(uint worker_id) {
 903     assert(Thread::current()->is_ConcurrentGC_thread(),
 904            "this should only be done by a conc GC thread");
 905     ResourceMark rm;
 906 
 907     double start_vtime = os::elapsedVTime();
 908 
 909     {
 910       SuspendibleThreadSetJoiner sts_join;
 911 
 912       assert(worker_id < _cm->active_tasks(), "invariant");
 913       G1CMTask* the_task = _cm->task(worker_id);
 914       the_task->record_start_time();
 915       if (!_cm->has_aborted()) {
 916         do {
 917           double start_vtime_sec = os::elapsedVTime();
 918           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 919 
 920           the_task->do_marking_step(mark_step_duration_ms,
 921                                     true  /* do_termination */,
 922                                     false /* is_serial*/);
 923 
 924           double end_vtime_sec = os::elapsedVTime();
 925           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 926           _cm->do_yield_check();
 927 
 928           jlong sleep_time_ms;
 929           if (!_cm->has_aborted() && the_task->has_aborted()) {
 930             sleep_time_ms =
 931               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 932             {
 933               SuspendibleThreadSetLeaver sts_leave;
 934               os::sleep(Thread::current(), sleep_time_ms, false);
 935             }
 936           }
 937         } while (!_cm->has_aborted() && the_task->has_aborted());
 938       }
 939       the_task->record_end_time();
 940       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 941     }
 942 
 943     double end_vtime = os::elapsedVTime();
 944     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 945   }
 946 
 947   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 948                             ConcurrentMarkThread* cmt) :
 949       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 950 
 951   ~G1CMConcurrentMarkingTask() { }
 952 };
 953 
 954 // Calculates the number of active workers for a concurrent
 955 // phase.
 956 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 957   uint n_conc_workers = 0;
 958   if (!UseDynamicNumberOfGCThreads ||
 959       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 960        !ForceDynamicNumberOfGCThreads)) {
 961     n_conc_workers = max_parallel_marking_threads();
 962   } else {
 963     n_conc_workers =
 964       AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
 965                                                       1, /* Minimum workers */
 966                                                       parallel_marking_threads(),
 967                                                       Threads::number_of_non_daemon_threads());
 968     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 969     // that scaling has already gone into "_max_parallel_marking_threads".
 970   }
 971   assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
 972          "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
 973          max_parallel_marking_threads(), n_conc_workers);
 974   return n_conc_workers;
 975 }
 976 
 977 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
 978   // Currently, only survivors can be root regions.
 979   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 980   G1RootRegionScanClosure cl(_g1h, this);
 981 
 982   const uintx interval = PrefetchScanIntervalInBytes;
 983   HeapWord* curr = hr->bottom();
 984   const HeapWord* end = hr->top();
 985   while (curr < end) {
 986     Prefetch::read(curr, interval);
 987     oop obj = oop(curr);
 988     int size = obj->oop_iterate_size(&cl);
 989     assert(size == obj->size(), "sanity");
 990     curr += size;
 991   }
 992 }
 993 
 994 class G1CMRootRegionScanTask : public AbstractGangTask {
 995 private:
 996   G1ConcurrentMark* _cm;
 997 
 998 public:
 999   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1000     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
1001 
1002   void work(uint worker_id) {
1003     assert(Thread::current()->is_ConcurrentGC_thread(),
1004            "this should only be done by a conc GC thread");
1005 
1006     G1CMRootRegions* root_regions = _cm->root_regions();
1007     HeapRegion* hr = root_regions->claim_next();
1008     while (hr != NULL) {
1009       _cm->scanRootRegion(hr);
1010       hr = root_regions->claim_next();
1011     }
1012   }
1013 };
1014 
1015 void G1ConcurrentMark::scan_root_regions() {
1016   // scan_in_progress() will have been set to true only if there was
1017   // at least one root region to scan. So, if it's false, we
1018   // should not attempt to do any further work.
1019   if (root_regions()->scan_in_progress()) {
1020     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1021 
1022     _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
1023                                      // We distribute work on a per-region basis, so starting
1024                                      // more threads than that is useless.
1025                                      root_regions()->num_root_regions());
1026     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1027            "Maximum number of marking threads exceeded");
1028 
1029     G1CMRootRegionScanTask task(this);
1030     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1031                         task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
1032     _parallel_workers->run_task(&task, _parallel_marking_threads);
1033 
1034     // It's possible that has_aborted() is true here without actually
1035     // aborting the survivor scan earlier. This is OK as it's
1036     // mainly used for sanity checking.
1037     root_regions()->scan_finished();
1038   }
1039 }
1040 
1041 void G1ConcurrentMark::concurrent_cycle_start() {
1042   _gc_timer_cm->register_gc_start();
1043 
1044   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1045 
1046   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1047 }
1048 
1049 void G1ConcurrentMark::concurrent_cycle_end() {
1050   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1051 
1052   if (has_aborted()) {
1053     _gc_tracer_cm->report_concurrent_mode_failure();
1054   }
1055 
1056   _gc_timer_cm->register_gc_end();
1057 
1058   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1059 }
1060 
1061 void G1ConcurrentMark::mark_from_roots() {
1062   // we might be tempted to assert that:
1063   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1064   //        "inconsistent argument?");
1065   // However that wouldn't be right, because it's possible that
1066   // a safepoint is indeed in progress as a younger generation
1067   // stop-the-world GC happens even as we mark in this generation.
1068 
1069   _restart_for_overflow = false;
1070 
1071   // _g1h has _n_par_threads
1072   _parallel_marking_threads = calc_parallel_marking_threads();
1073   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1074     "Maximum number of marking threads exceeded");
1075 
1076   uint active_workers = MAX2(1U, parallel_marking_threads());
1077   assert(active_workers > 0, "Should have been set");
1078 
1079   // Setting active workers is not guaranteed since fewer
1080   // worker threads may currently exist and more may not be
1081   // available.
1082   active_workers = _parallel_workers->update_active_workers(active_workers);
1083   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1084 
1085   // Parallel task terminator is set in "set_concurrency_and_phase()"
1086   set_concurrency_and_phase(active_workers, true /* concurrent */);
1087 
1088   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1089   _parallel_workers->run_task(&markingTask);
1090   print_stats();
1091 }
1092 
1093 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1094   // world is stopped at this checkpoint
1095   assert(SafepointSynchronize::is_at_safepoint(),
1096          "world should be stopped");
1097 
1098   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1099 
1100   // If a full collection has happened, we shouldn't do this.
1101   if (has_aborted()) {
1102     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1103     return;
1104   }
1105 
1106   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1107 
1108   if (VerifyDuringGC) {
1109     HandleMark hm;  // handle scope
1110     g1h->prepare_for_verify();
1111     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1112   }
1113   g1h->verifier()->check_bitmaps("Remark Start");
1114 
1115   G1Policy* g1p = g1h->g1_policy();
1116   g1p->record_concurrent_mark_remark_start();
1117 
1118   double start = os::elapsedTime();
1119 
1120   checkpointRootsFinalWork();
1121 
1122   double mark_work_end = os::elapsedTime();
1123 
1124   weakRefsWork(clear_all_soft_refs);
1125 
1126   if (has_overflown()) {
1127     // We overflowed.  Restart concurrent marking.
1128     _restart_for_overflow = true;
1129 
1130     // Verify the heap w.r.t. the previous marking bitmap.
1131     if (VerifyDuringGC) {
1132       HandleMark hm;  // handle scope
1133       g1h->prepare_for_verify();
1134       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1135     }
1136 
1137     // Clear the marking state because we will be restarting
1138     // marking due to overflowing the global mark stack.
1139     reset_marking_state();
1140   } else {
1141     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1142     // We're done with marking.
1143     // This is the end of  the marking cycle, we're expected all
1144     // threads to have SATB queues with active set to true.
1145     satb_mq_set.set_active_all_threads(false, /* new active value */
1146                                        true /* expected_active */);
1147 
1148     if (VerifyDuringGC) {
1149       HandleMark hm;  // handle scope
1150       g1h->prepare_for_verify();
1151       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1152     }
1153     g1h->verifier()->check_bitmaps("Remark End");
1154     assert(!restart_for_overflow(), "sanity");
1155     // Completely reset the marking state since marking completed
1156     set_non_marking_state();
1157   }
1158 
1159   // Statistics
1160   double now = os::elapsedTime();
1161   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1162   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1163   _remark_times.add((now - start) * 1000.0);
1164 
1165   g1p->record_concurrent_mark_remark_end();
1166 
1167   G1CMIsAliveClosure is_alive(g1h);
1168   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1169 }
1170 
1171 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1172   G1CollectedHeap* _g1;
1173   size_t _freed_bytes;
1174   FreeRegionList* _local_cleanup_list;
1175   uint _old_regions_removed;
1176   uint _humongous_regions_removed;
1177   HRRSCleanupTask* _hrrs_cleanup_task;
1178 
1179 public:
1180   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1181                              FreeRegionList* local_cleanup_list,
1182                              HRRSCleanupTask* hrrs_cleanup_task) :
1183     _g1(g1),
1184     _freed_bytes(0),
1185     _local_cleanup_list(local_cleanup_list),
1186     _old_regions_removed(0),
1187     _humongous_regions_removed(0),
1188     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1189 
1190   size_t freed_bytes() { return _freed_bytes; }
1191   const uint old_regions_removed() { return _old_regions_removed; }
1192   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1193 
1194   bool doHeapRegion(HeapRegion *hr) {
1195     if (hr->is_archive()) {
1196       return false;
1197     }
1198     _g1->reset_gc_time_stamps(hr);
1199     hr->note_end_of_marking();
1200 
1201     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1202       _freed_bytes += hr->used();
1203       hr->set_containing_set(NULL);
1204       if (hr->is_humongous()) {
1205         _humongous_regions_removed++;
1206         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1207       } else {
1208         _old_regions_removed++;
1209         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1210       }
1211     } else {
1212       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1213     }
1214 
1215     return false;
1216   }
1217 };
1218 
1219 class G1ParNoteEndTask: public AbstractGangTask {
1220   friend class G1NoteEndOfConcMarkClosure;
1221 
1222 protected:
1223   G1CollectedHeap* _g1h;
1224   FreeRegionList* _cleanup_list;
1225   HeapRegionClaimer _hrclaimer;
1226 
1227 public:
1228   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1229       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1230   }
1231 
1232   void work(uint worker_id) {
1233     FreeRegionList local_cleanup_list("Local Cleanup List");
1234     HRRSCleanupTask hrrs_cleanup_task;
1235     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1236                                            &hrrs_cleanup_task);
1237     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1238     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1239 
1240     // Now update the lists
1241     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1242     {
1243       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1244       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1245 
1246       // If we iterate over the global cleanup list at the end of
1247       // cleanup to do this printing we will not guarantee to only
1248       // generate output for the newly-reclaimed regions (the list
1249       // might not be empty at the beginning of cleanup; we might
1250       // still be working on its previous contents). So we do the
1251       // printing here, before we append the new regions to the global
1252       // cleanup list.
1253 
1254       G1HRPrinter* hr_printer = _g1h->hr_printer();
1255       if (hr_printer->is_active()) {
1256         FreeRegionListIterator iter(&local_cleanup_list);
1257         while (iter.more_available()) {
1258           HeapRegion* hr = iter.get_next();
1259           hr_printer->cleanup(hr);
1260         }
1261       }
1262 
1263       _cleanup_list->add_ordered(&local_cleanup_list);
1264       assert(local_cleanup_list.is_empty(), "post-condition");
1265 
1266       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1267     }
1268   }
1269 };
1270 
1271 void G1ConcurrentMark::cleanup() {
1272   // world is stopped at this checkpoint
1273   assert(SafepointSynchronize::is_at_safepoint(),
1274          "world should be stopped");
1275   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1276 
1277   // If a full collection has happened, we shouldn't do this.
1278   if (has_aborted()) {
1279     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1280     return;
1281   }
1282 
1283   g1h->verifier()->verify_region_sets_optional();
1284 
1285   if (VerifyDuringGC) {
1286     HandleMark hm;  // handle scope
1287     g1h->prepare_for_verify();
1288     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1289   }
1290   g1h->verifier()->check_bitmaps("Cleanup Start");
1291 
1292   G1Policy* g1p = g1h->g1_policy();
1293   g1p->record_concurrent_mark_cleanup_start();
1294 
1295   double start = os::elapsedTime();
1296 
1297   HeapRegionRemSet::reset_for_cleanup_tasks();
1298 
1299   {
1300     GCTraceTime(Debug, gc)("Finalize Live Data");
1301     finalize_live_data();
1302   }
1303 
1304   if (VerifyDuringGC) {
1305     GCTraceTime(Debug, gc)("Verify Live Data");
1306     verify_live_data();
1307   }
1308 
1309   g1h->collector_state()->set_mark_in_progress(false);
1310 
1311   double count_end = os::elapsedTime();
1312   double this_final_counting_time = (count_end - start);
1313   _total_counting_time += this_final_counting_time;
1314 
1315   if (log_is_enabled(Trace, gc, liveness)) {
1316     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1317     _g1h->heap_region_iterate(&cl);
1318   }
1319 
1320   // Install newly created mark bitMap as "prev".
1321   swapMarkBitMaps();
1322 
1323   g1h->reset_gc_time_stamp();
1324 
1325   uint n_workers = _g1h->workers()->active_workers();
1326 
1327   // Note end of marking in all heap regions.
1328   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1329   g1h->workers()->run_task(&g1_par_note_end_task);
1330   g1h->check_gc_time_stamps();
1331 
1332   if (!cleanup_list_is_empty()) {
1333     // The cleanup list is not empty, so we'll have to process it
1334     // concurrently. Notify anyone else that might be wanting free
1335     // regions that there will be more free regions coming soon.
1336     g1h->set_free_regions_coming();
1337   }
1338 
1339   // call below, since it affects the metric by which we sort the heap
1340   // regions.
1341   if (G1ScrubRemSets) {
1342     double rs_scrub_start = os::elapsedTime();
1343     g1h->scrub_rem_set();
1344     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1345   }
1346 
1347   // this will also free any regions totally full of garbage objects,
1348   // and sort the regions.
1349   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1350 
1351   // Statistics.
1352   double end = os::elapsedTime();
1353   _cleanup_times.add((end - start) * 1000.0);
1354 
1355   // Clean up will have freed any regions completely full of garbage.
1356   // Update the soft reference policy with the new heap occupancy.
1357   Universe::update_heap_info_at_gc();
1358 
1359   if (VerifyDuringGC) {
1360     HandleMark hm;  // handle scope
1361     g1h->prepare_for_verify();
1362     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1363   }
1364 
1365   g1h->verifier()->check_bitmaps("Cleanup End");
1366 
1367   g1h->verifier()->verify_region_sets_optional();
1368 
1369   // We need to make this be a "collection" so any collection pause that
1370   // races with it goes around and waits for completeCleanup to finish.
1371   g1h->increment_total_collections();
1372 
1373   // Clean out dead classes and update Metaspace sizes.
1374   if (ClassUnloadingWithConcurrentMark) {
1375     ClassLoaderDataGraph::purge();
1376   }
1377   MetaspaceGC::compute_new_size();
1378 
1379   // We reclaimed old regions so we should calculate the sizes to make
1380   // sure we update the old gen/space data.
1381   g1h->g1mm()->update_sizes();
1382   g1h->allocation_context_stats().update_after_mark();
1383 }
1384 
1385 void G1ConcurrentMark::complete_cleanup() {
1386   if (has_aborted()) return;
1387 
1388   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1389 
1390   _cleanup_list.verify_optional();
1391   FreeRegionList tmp_free_list("Tmp Free List");
1392 
1393   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1394                                   "cleanup list has %u entries",
1395                                   _cleanup_list.length());
1396 
1397   // No one else should be accessing the _cleanup_list at this point,
1398   // so it is not necessary to take any locks
1399   while (!_cleanup_list.is_empty()) {
1400     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1401     assert(hr != NULL, "Got NULL from a non-empty list");
1402     hr->par_clear();
1403     tmp_free_list.add_ordered(hr);
1404 
1405     // Instead of adding one region at a time to the secondary_free_list,
1406     // we accumulate them in the local list and move them a few at a
1407     // time. This also cuts down on the number of notify_all() calls
1408     // we do during this process. We'll also append the local list when
1409     // _cleanup_list is empty (which means we just removed the last
1410     // region from the _cleanup_list).
1411     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1412         _cleanup_list.is_empty()) {
1413       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1414                                       "appending %u entries to the secondary_free_list, "
1415                                       "cleanup list still has %u entries",
1416                                       tmp_free_list.length(),
1417                                       _cleanup_list.length());
1418 
1419       {
1420         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1421         g1h->secondary_free_list_add(&tmp_free_list);
1422         SecondaryFreeList_lock->notify_all();
1423       }
1424 #ifndef PRODUCT
1425       if (G1StressConcRegionFreeing) {
1426         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1427           os::sleep(Thread::current(), (jlong) 1, false);
1428         }
1429       }
1430 #endif
1431     }
1432   }
1433   assert(tmp_free_list.is_empty(), "post-condition");
1434 }
1435 
1436 // Supporting Object and Oop closures for reference discovery
1437 // and processing in during marking
1438 
1439 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1440   HeapWord* addr = (HeapWord*)obj;
1441   return addr != NULL &&
1442          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1443 }
1444 
1445 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1446 // Uses the G1CMTask associated with a worker thread (for serial reference
1447 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1448 // trace referent objects.
1449 //
1450 // Using the G1CMTask and embedded local queues avoids having the worker
1451 // threads operating on the global mark stack. This reduces the risk
1452 // of overflowing the stack - which we would rather avoid at this late
1453 // state. Also using the tasks' local queues removes the potential
1454 // of the workers interfering with each other that could occur if
1455 // operating on the global stack.
1456 
1457 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1458   G1ConcurrentMark* _cm;
1459   G1CMTask*         _task;
1460   int               _ref_counter_limit;
1461   int               _ref_counter;
1462   bool              _is_serial;
1463  public:
1464   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1465     _cm(cm), _task(task), _is_serial(is_serial),
1466     _ref_counter_limit(G1RefProcDrainInterval) {
1467     assert(_ref_counter_limit > 0, "sanity");
1468     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1469     _ref_counter = _ref_counter_limit;
1470   }
1471 
1472   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1473   virtual void do_oop(      oop* p) { do_oop_work(p); }
1474 
1475   template <class T> void do_oop_work(T* p) {
1476     if (!_cm->has_overflown()) {
1477       oop obj = oopDesc::load_decode_heap_oop(p);
1478       _task->deal_with_reference(obj);
1479       _ref_counter--;
1480 
1481       if (_ref_counter == 0) {
1482         // We have dealt with _ref_counter_limit references, pushing them
1483         // and objects reachable from them on to the local stack (and
1484         // possibly the global stack). Call G1CMTask::do_marking_step() to
1485         // process these entries.
1486         //
1487         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1488         // there's nothing more to do (i.e. we're done with the entries that
1489         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1490         // above) or we overflow.
1491         //
1492         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1493         // flag while there may still be some work to do. (See the comment at
1494         // the beginning of G1CMTask::do_marking_step() for those conditions -
1495         // one of which is reaching the specified time target.) It is only
1496         // when G1CMTask::do_marking_step() returns without setting the
1497         // has_aborted() flag that the marking step has completed.
1498         do {
1499           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1500           _task->do_marking_step(mark_step_duration_ms,
1501                                  false      /* do_termination */,
1502                                  _is_serial);
1503         } while (_task->has_aborted() && !_cm->has_overflown());
1504         _ref_counter = _ref_counter_limit;
1505       }
1506     }
1507   }
1508 };
1509 
1510 // 'Drain' oop closure used by both serial and parallel reference processing.
1511 // Uses the G1CMTask associated with a given worker thread (for serial
1512 // reference processing the G1CMtask for worker 0 is used). Calls the
1513 // do_marking_step routine, with an unbelievably large timeout value,
1514 // to drain the marking data structures of the remaining entries
1515 // added by the 'keep alive' oop closure above.
1516 
1517 class G1CMDrainMarkingStackClosure: public VoidClosure {
1518   G1ConcurrentMark* _cm;
1519   G1CMTask*         _task;
1520   bool              _is_serial;
1521  public:
1522   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1523     _cm(cm), _task(task), _is_serial(is_serial) {
1524     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1525   }
1526 
1527   void do_void() {
1528     do {
1529       // We call G1CMTask::do_marking_step() to completely drain the local
1530       // and global marking stacks of entries pushed by the 'keep alive'
1531       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1532       //
1533       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1534       // if there's nothing more to do (i.e. we've completely drained the
1535       // entries that were pushed as a a result of applying the 'keep alive'
1536       // closure to the entries on the discovered ref lists) or we overflow
1537       // the global marking stack.
1538       //
1539       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1540       // flag while there may still be some work to do. (See the comment at
1541       // the beginning of G1CMTask::do_marking_step() for those conditions -
1542       // one of which is reaching the specified time target.) It is only
1543       // when G1CMTask::do_marking_step() returns without setting the
1544       // has_aborted() flag that the marking step has completed.
1545 
1546       _task->do_marking_step(1000000000.0 /* something very large */,
1547                              true         /* do_termination */,
1548                              _is_serial);
1549     } while (_task->has_aborted() && !_cm->has_overflown());
1550   }
1551 };
1552 
1553 // Implementation of AbstractRefProcTaskExecutor for parallel
1554 // reference processing at the end of G1 concurrent marking
1555 
1556 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1557 private:
1558   G1CollectedHeap*  _g1h;
1559   G1ConcurrentMark* _cm;
1560   WorkGang*         _workers;
1561   uint              _active_workers;
1562 
1563 public:
1564   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1565                           G1ConcurrentMark* cm,
1566                           WorkGang* workers,
1567                           uint n_workers) :
1568     _g1h(g1h), _cm(cm),
1569     _workers(workers), _active_workers(n_workers) { }
1570 
1571   // Executes the given task using concurrent marking worker threads.
1572   virtual void execute(ProcessTask& task);
1573   virtual void execute(EnqueueTask& task);
1574 };
1575 
1576 class G1CMRefProcTaskProxy: public AbstractGangTask {
1577   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1578   ProcessTask&      _proc_task;
1579   G1CollectedHeap*  _g1h;
1580   G1ConcurrentMark* _cm;
1581 
1582 public:
1583   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1584                        G1CollectedHeap* g1h,
1585                        G1ConcurrentMark* cm) :
1586     AbstractGangTask("Process reference objects in parallel"),
1587     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1588     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1589     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1590   }
1591 
1592   virtual void work(uint worker_id) {
1593     ResourceMark rm;
1594     HandleMark hm;
1595     G1CMTask* task = _cm->task(worker_id);
1596     G1CMIsAliveClosure g1_is_alive(_g1h);
1597     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1598     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1599 
1600     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1601   }
1602 };
1603 
1604 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1605   assert(_workers != NULL, "Need parallel worker threads.");
1606   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1607 
1608   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1609 
1610   // We need to reset the concurrency level before each
1611   // proxy task execution, so that the termination protocol
1612   // and overflow handling in G1CMTask::do_marking_step() knows
1613   // how many workers to wait for.
1614   _cm->set_concurrency(_active_workers);
1615   _workers->run_task(&proc_task_proxy);
1616 }
1617 
1618 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1619   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1620   EnqueueTask& _enq_task;
1621 
1622 public:
1623   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1624     AbstractGangTask("Enqueue reference objects in parallel"),
1625     _enq_task(enq_task) { }
1626 
1627   virtual void work(uint worker_id) {
1628     _enq_task.work(worker_id);
1629   }
1630 };
1631 
1632 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1633   assert(_workers != NULL, "Need parallel worker threads.");
1634   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1635 
1636   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1637 
1638   // Not strictly necessary but...
1639   //
1640   // We need to reset the concurrency level before each
1641   // proxy task execution, so that the termination protocol
1642   // and overflow handling in G1CMTask::do_marking_step() knows
1643   // how many workers to wait for.
1644   _cm->set_concurrency(_active_workers);
1645   _workers->run_task(&enq_task_proxy);
1646 }
1647 
1648 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1649   if (has_overflown()) {
1650     // Skip processing the discovered references if we have
1651     // overflown the global marking stack. Reference objects
1652     // only get discovered once so it is OK to not
1653     // de-populate the discovered reference lists. We could have,
1654     // but the only benefit would be that, when marking restarts,
1655     // less reference objects are discovered.
1656     return;
1657   }
1658 
1659   ResourceMark rm;
1660   HandleMark   hm;
1661 
1662   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1663 
1664   // Is alive closure.
1665   G1CMIsAliveClosure g1_is_alive(g1h);
1666 
1667   // Inner scope to exclude the cleaning of the string and symbol
1668   // tables from the displayed time.
1669   {
1670     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1671 
1672     ReferenceProcessor* rp = g1h->ref_processor_cm();
1673 
1674     // See the comment in G1CollectedHeap::ref_processing_init()
1675     // about how reference processing currently works in G1.
1676 
1677     // Set the soft reference policy
1678     rp->setup_policy(clear_all_soft_refs);
1679     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1680 
1681     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1682     // in serial reference processing. Note these closures are also
1683     // used for serially processing (by the the current thread) the
1684     // JNI references during parallel reference processing.
1685     //
1686     // These closures do not need to synchronize with the worker
1687     // threads involved in parallel reference processing as these
1688     // instances are executed serially by the current thread (e.g.
1689     // reference processing is not multi-threaded and is thus
1690     // performed by the current thread instead of a gang worker).
1691     //
1692     // The gang tasks involved in parallel reference processing create
1693     // their own instances of these closures, which do their own
1694     // synchronization among themselves.
1695     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1696     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1697 
1698     // We need at least one active thread. If reference processing
1699     // is not multi-threaded we use the current (VMThread) thread,
1700     // otherwise we use the work gang from the G1CollectedHeap and
1701     // we utilize all the worker threads we can.
1702     bool processing_is_mt = rp->processing_is_mt();
1703     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1704     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1705 
1706     // Parallel processing task executor.
1707     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1708                                               g1h->workers(), active_workers);
1709     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1710 
1711     // Set the concurrency level. The phase was already set prior to
1712     // executing the remark task.
1713     set_concurrency(active_workers);
1714 
1715     // Set the degree of MT processing here.  If the discovery was done MT,
1716     // the number of threads involved during discovery could differ from
1717     // the number of active workers.  This is OK as long as the discovered
1718     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1719     rp->set_active_mt_degree(active_workers);
1720 
1721     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1722 
1723     // Process the weak references.
1724     const ReferenceProcessorStats& stats =
1725         rp->process_discovered_references(&g1_is_alive,
1726                                           &g1_keep_alive,
1727                                           &g1_drain_mark_stack,
1728                                           executor,
1729                                           &pt);
1730     _gc_tracer_cm->report_gc_reference_stats(stats);
1731     pt.print_all_references();
1732 
1733     // The do_oop work routines of the keep_alive and drain_marking_stack
1734     // oop closures will set the has_overflown flag if we overflow the
1735     // global marking stack.
1736 
1737     assert(has_overflown() || _global_mark_stack.is_empty(),
1738             "Mark stack should be empty (unless it has overflown)");
1739 
1740     assert(rp->num_q() == active_workers, "why not");
1741 
1742     rp->enqueue_discovered_references(executor, &pt);
1743 
1744     rp->verify_no_references_recorded();
1745 
1746     pt.print_enqueue_phase();
1747 
1748     assert(!rp->discovery_enabled(), "Post condition");
1749   }
1750 
1751   if (has_overflown()) {
1752     // We can not trust g1_is_alive if the marking stack overflowed
1753     return;
1754   }
1755 
1756   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1757 
1758   // Unload Klasses, String, Symbols, Code Cache, etc.
1759   if (ClassUnloadingWithConcurrentMark) {
1760     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1761     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1762     g1h->complete_cleaning(&g1_is_alive, purged_classes);
1763   } else {
1764     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1765     // No need to clean string table and symbol table as they are treated as strong roots when
1766     // class unloading is disabled.
1767     g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1768 
1769   }
1770 }
1771 
1772 void G1ConcurrentMark::swapMarkBitMaps() {
1773   G1CMBitMapRO* temp = _prevMarkBitMap;
1774   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
1775   _nextMarkBitMap    = (G1CMBitMap*)  temp;
1776 }
1777 
1778 // Closure for marking entries in SATB buffers.
1779 class G1CMSATBBufferClosure : public SATBBufferClosure {
1780 private:
1781   G1CMTask* _task;
1782   G1CollectedHeap* _g1h;
1783 
1784   // This is very similar to G1CMTask::deal_with_reference, but with
1785   // more relaxed requirements for the argument, so this must be more
1786   // circumspect about treating the argument as an object.
1787   void do_entry(void* entry) const {
1788     _task->increment_refs_reached();
1789     HeapRegion* hr = _g1h->heap_region_containing(entry);
1790     if (entry < hr->next_top_at_mark_start()) {
1791       // Until we get here, we don't know whether entry refers to a valid
1792       // object; it could instead have been a stale reference.
1793       oop obj = static_cast<oop>(entry);
1794       assert(obj->is_oop(true /* ignore mark word */),
1795              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
1796       _task->make_reference_grey(obj);
1797     }
1798   }
1799 
1800 public:
1801   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1802     : _task(task), _g1h(g1h) { }
1803 
1804   virtual void do_buffer(void** buffer, size_t size) {
1805     for (size_t i = 0; i < size; ++i) {
1806       do_entry(buffer[i]);
1807     }
1808   }
1809 };
1810 
1811 class G1RemarkThreadsClosure : public ThreadClosure {
1812   G1CMSATBBufferClosure _cm_satb_cl;
1813   G1CMOopClosure _cm_cl;
1814   MarkingCodeBlobClosure _code_cl;
1815   int _thread_parity;
1816 
1817  public:
1818   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1819     _cm_satb_cl(task, g1h),
1820     _cm_cl(g1h, g1h->concurrent_mark(), task),
1821     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1822     _thread_parity(Threads::thread_claim_parity()) {}
1823 
1824   void do_thread(Thread* thread) {
1825     if (thread->is_Java_thread()) {
1826       if (thread->claim_oops_do(true, _thread_parity)) {
1827         JavaThread* jt = (JavaThread*)thread;
1828 
1829         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1830         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1831         // * Alive if on the stack of an executing method
1832         // * Weakly reachable otherwise
1833         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1834         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1835         jt->nmethods_do(&_code_cl);
1836 
1837         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1838       }
1839     } else if (thread->is_VM_thread()) {
1840       if (thread->claim_oops_do(true, _thread_parity)) {
1841         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1842       }
1843     }
1844   }
1845 };
1846 
1847 class G1CMRemarkTask: public AbstractGangTask {
1848 private:
1849   G1ConcurrentMark* _cm;
1850 public:
1851   void work(uint worker_id) {
1852     // Since all available tasks are actually started, we should
1853     // only proceed if we're supposed to be active.
1854     if (worker_id < _cm->active_tasks()) {
1855       G1CMTask* task = _cm->task(worker_id);
1856       task->record_start_time();
1857       {
1858         ResourceMark rm;
1859         HandleMark hm;
1860 
1861         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1862         Threads::threads_do(&threads_f);
1863       }
1864 
1865       do {
1866         task->do_marking_step(1000000000.0 /* something very large */,
1867                               true         /* do_termination       */,
1868                               false        /* is_serial            */);
1869       } while (task->has_aborted() && !_cm->has_overflown());
1870       // If we overflow, then we do not want to restart. We instead
1871       // want to abort remark and do concurrent marking again.
1872       task->record_end_time();
1873     }
1874   }
1875 
1876   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1877     AbstractGangTask("Par Remark"), _cm(cm) {
1878     _cm->terminator()->reset_for_reuse(active_workers);
1879   }
1880 };
1881 
1882 void G1ConcurrentMark::checkpointRootsFinalWork() {
1883   ResourceMark rm;
1884   HandleMark   hm;
1885   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1886 
1887   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1888 
1889   g1h->ensure_parsability(false);
1890 
1891   // this is remark, so we'll use up all active threads
1892   uint active_workers = g1h->workers()->active_workers();
1893   set_concurrency_and_phase(active_workers, false /* concurrent */);
1894   // Leave _parallel_marking_threads at it's
1895   // value originally calculated in the G1ConcurrentMark
1896   // constructor and pass values of the active workers
1897   // through the gang in the task.
1898 
1899   {
1900     StrongRootsScope srs(active_workers);
1901 
1902     G1CMRemarkTask remarkTask(this, active_workers);
1903     // We will start all available threads, even if we decide that the
1904     // active_workers will be fewer. The extra ones will just bail out
1905     // immediately.
1906     g1h->workers()->run_task(&remarkTask);
1907   }
1908 
1909   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1910   guarantee(has_overflown() ||
1911             satb_mq_set.completed_buffers_num() == 0,
1912             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1913             BOOL_TO_STR(has_overflown()),
1914             satb_mq_set.completed_buffers_num());
1915 
1916   print_stats();
1917 }
1918 
1919 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1920   // Note we are overriding the read-only view of the prev map here, via
1921   // the cast.
1922   ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr);
1923 }
1924 
1925 HeapRegion*
1926 G1ConcurrentMark::claim_region(uint worker_id) {
1927   // "checkpoint" the finger
1928   HeapWord* finger = _finger;
1929 
1930   // _heap_end will not change underneath our feet; it only changes at
1931   // yield points.
1932   while (finger < _heap_end) {
1933     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1934 
1935     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1936     // Make sure that the reads below do not float before loading curr_region.
1937     OrderAccess::loadload();
1938     // Above heap_region_containing may return NULL as we always scan claim
1939     // until the end of the heap. In this case, just jump to the next region.
1940     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1941 
1942     // Is the gap between reading the finger and doing the CAS too long?
1943     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1944     if (res == finger && curr_region != NULL) {
1945       // we succeeded
1946       HeapWord*   bottom        = curr_region->bottom();
1947       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1948 
1949       // notice that _finger == end cannot be guaranteed here since,
1950       // someone else might have moved the finger even further
1951       assert(_finger >= end, "the finger should have moved forward");
1952 
1953       if (limit > bottom) {
1954         return curr_region;
1955       } else {
1956         assert(limit == bottom,
1957                "the region limit should be at bottom");
1958         // we return NULL and the caller should try calling
1959         // claim_region() again.
1960         return NULL;
1961       }
1962     } else {
1963       assert(_finger > finger, "the finger should have moved forward");
1964       // read it again
1965       finger = _finger;
1966     }
1967   }
1968 
1969   return NULL;
1970 }
1971 
1972 #ifndef PRODUCT
1973 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1974 private:
1975   G1CollectedHeap* _g1h;
1976   const char* _phase;
1977   int _info;
1978 
1979 public:
1980   VerifyNoCSetOops(const char* phase, int info = -1) :
1981     _g1h(G1CollectedHeap::heap()),
1982     _phase(phase),
1983     _info(info)
1984   { }
1985 
1986   void operator()(G1TaskQueueEntry task_entry) const {
1987     if (task_entry.is_array_slice()) {
1988       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1989       return;
1990     }
1991     guarantee(task_entry.obj()->is_oop(),
1992               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1993               p2i(task_entry.obj()), _phase, _info);
1994     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1995               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1996               p2i(task_entry.obj()), _phase, _info);
1997   }
1998 };
1999 
2000 void G1ConcurrentMark::verify_no_cset_oops() {
2001   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2002   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2003     return;
2004   }
2005 
2006   // Verify entries on the global mark stack
2007   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2008 
2009   // Verify entries on the task queues
2010   for (uint i = 0; i < _max_worker_id; ++i) {
2011     G1CMTaskQueue* queue = _task_queues->queue(i);
2012     queue->iterate(VerifyNoCSetOops("Queue", i));
2013   }
2014 
2015   // Verify the global finger
2016   HeapWord* global_finger = finger();
2017   if (global_finger != NULL && global_finger < _heap_end) {
2018     // Since we always iterate over all regions, we might get a NULL HeapRegion
2019     // here.
2020     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2021     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2022               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2023               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2024   }
2025 
2026   // Verify the task fingers
2027   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2028   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2029     G1CMTask* task = _tasks[i];
2030     HeapWord* task_finger = task->finger();
2031     if (task_finger != NULL && task_finger < _heap_end) {
2032       // See above note on the global finger verification.
2033       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2034       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2035                 !task_hr->in_collection_set(),
2036                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2037                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2038     }
2039   }
2040 }
2041 #endif // PRODUCT
2042 void G1ConcurrentMark::create_live_data() {
2043   _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
2044 }
2045 
2046 void G1ConcurrentMark::finalize_live_data() {
2047   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
2048 }
2049 
2050 void G1ConcurrentMark::verify_live_data() {
2051   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
2052 }
2053 
2054 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
2055   _g1h->g1_rem_set()->clear_card_live_data(workers);
2056 }
2057 
2058 #ifdef ASSERT
2059 void G1ConcurrentMark::verify_live_data_clear() {
2060   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
2061 }
2062 #endif
2063 
2064 void G1ConcurrentMark::print_stats() {
2065   if (!log_is_enabled(Debug, gc, stats)) {
2066     return;
2067   }
2068   log_debug(gc, stats)("---------------------------------------------------------------------");
2069   for (size_t i = 0; i < _active_tasks; ++i) {
2070     _tasks[i]->print_stats();
2071     log_debug(gc, stats)("---------------------------------------------------------------------");
2072   }
2073 }
2074 
2075 void G1ConcurrentMark::abort() {
2076   if (!cmThread()->during_cycle() || _has_aborted) {
2077     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2078     return;
2079   }
2080 
2081   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2082   // concurrent bitmap clearing.
2083   {
2084     GCTraceTime(Debug, gc)("Clear Next Bitmap");
2085     clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2086   }
2087   // Note we cannot clear the previous marking bitmap here
2088   // since VerifyDuringGC verifies the objects marked during
2089   // a full GC against the previous bitmap.
2090 
2091   {
2092     GCTraceTime(Debug, gc)("Clear Live Data");
2093     clear_live_data(_g1h->workers());
2094   }
2095   DEBUG_ONLY({
2096     GCTraceTime(Debug, gc)("Verify Live Data Clear");
2097     verify_live_data_clear();
2098   })
2099   // Empty mark stack
2100   reset_marking_state();
2101   for (uint i = 0; i < _max_worker_id; ++i) {
2102     _tasks[i]->clear_region_fields();
2103   }
2104   _first_overflow_barrier_sync.abort();
2105   _second_overflow_barrier_sync.abort();
2106   _has_aborted = true;
2107 
2108   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2109   satb_mq_set.abandon_partial_marking();
2110   // This can be called either during or outside marking, we'll read
2111   // the expected_active value from the SATB queue set.
2112   satb_mq_set.set_active_all_threads(
2113                                  false, /* new active value */
2114                                  satb_mq_set.is_active() /* expected_active */);
2115 }
2116 
2117 static void print_ms_time_info(const char* prefix, const char* name,
2118                                NumberSeq& ns) {
2119   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2120                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2121   if (ns.num() > 0) {
2122     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2123                            prefix, ns.sd(), ns.maximum());
2124   }
2125 }
2126 
2127 void G1ConcurrentMark::print_summary_info() {
2128   Log(gc, marking) log;
2129   if (!log.is_trace()) {
2130     return;
2131   }
2132 
2133   log.trace(" Concurrent marking:");
2134   print_ms_time_info("  ", "init marks", _init_times);
2135   print_ms_time_info("  ", "remarks", _remark_times);
2136   {
2137     print_ms_time_info("     ", "final marks", _remark_mark_times);
2138     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2139 
2140   }
2141   print_ms_time_info("  ", "cleanups", _cleanup_times);
2142   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2143             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2144   if (G1ScrubRemSets) {
2145     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2146               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2147   }
2148   log.trace("  Total stop_world time = %8.2f s.",
2149             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2150   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2151             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2152 }
2153 
2154 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2155   _parallel_workers->print_worker_threads_on(st);
2156 }
2157 
2158 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2159   _parallel_workers->threads_do(tc);
2160 }
2161 
2162 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2163   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2164       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2165   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2166   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2167 }
2168 
2169 // Closure for iteration over bitmaps
2170 class G1CMBitMapClosure : public BitMapClosure {
2171 private:
2172   // the bitmap that is being iterated over
2173   G1CMBitMap*                 _nextMarkBitMap;
2174   G1ConcurrentMark*           _cm;
2175   G1CMTask*                   _task;
2176 
2177 public:
2178   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2179     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2180 
2181   bool do_bit(size_t offset) {
2182     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2183     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2184     assert( addr < _cm->finger(), "invariant");
2185     assert(addr >= _task->finger(), "invariant");
2186 
2187     // We move that task's local finger along.
2188     _task->move_finger_to(addr);
2189 
2190     _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
2191     // we only partially drain the local queue and global stack
2192     _task->drain_local_queue(true);
2193     _task->drain_global_stack(true);
2194 
2195     // if the has_aborted flag has been raised, we need to bail out of
2196     // the iteration
2197     return !_task->has_aborted();
2198   }
2199 };
2200 
2201 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2202   ReferenceProcessor* result = g1h->ref_processor_cm();
2203   assert(result != NULL, "CM reference processor should not be NULL");
2204   return result;
2205 }
2206 
2207 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2208                                G1ConcurrentMark* cm,
2209                                G1CMTask* task)
2210   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2211     _g1h(g1h), _cm(cm), _task(task)
2212 { }
2213 
2214 void G1CMTask::setup_for_region(HeapRegion* hr) {
2215   assert(hr != NULL,
2216         "claim_region() should have filtered out NULL regions");
2217   _curr_region  = hr;
2218   _finger       = hr->bottom();
2219   update_region_limit();
2220 }
2221 
2222 void G1CMTask::update_region_limit() {
2223   HeapRegion* hr            = _curr_region;
2224   HeapWord* bottom          = hr->bottom();
2225   HeapWord* limit           = hr->next_top_at_mark_start();
2226 
2227   if (limit == bottom) {
2228     // The region was collected underneath our feet.
2229     // We set the finger to bottom to ensure that the bitmap
2230     // iteration that will follow this will not do anything.
2231     // (this is not a condition that holds when we set the region up,
2232     // as the region is not supposed to be empty in the first place)
2233     _finger = bottom;
2234   } else if (limit >= _region_limit) {
2235     assert(limit >= _finger, "peace of mind");
2236   } else {
2237     assert(limit < _region_limit, "only way to get here");
2238     // This can happen under some pretty unusual circumstances.  An
2239     // evacuation pause empties the region underneath our feet (NTAMS
2240     // at bottom). We then do some allocation in the region (NTAMS
2241     // stays at bottom), followed by the region being used as a GC
2242     // alloc region (NTAMS will move to top() and the objects
2243     // originally below it will be grayed). All objects now marked in
2244     // the region are explicitly grayed, if below the global finger,
2245     // and we do not need in fact to scan anything else. So, we simply
2246     // set _finger to be limit to ensure that the bitmap iteration
2247     // doesn't do anything.
2248     _finger = limit;
2249   }
2250 
2251   _region_limit = limit;
2252 }
2253 
2254 void G1CMTask::giveup_current_region() {
2255   assert(_curr_region != NULL, "invariant");
2256   clear_region_fields();
2257 }
2258 
2259 void G1CMTask::clear_region_fields() {
2260   // Values for these three fields that indicate that we're not
2261   // holding on to a region.
2262   _curr_region   = NULL;
2263   _finger        = NULL;
2264   _region_limit  = NULL;
2265 }
2266 
2267 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2268   if (cm_oop_closure == NULL) {
2269     assert(_cm_oop_closure != NULL, "invariant");
2270   } else {
2271     assert(_cm_oop_closure == NULL, "invariant");
2272   }
2273   _cm_oop_closure = cm_oop_closure;
2274 }
2275 
2276 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2277   guarantee(nextMarkBitMap != NULL, "invariant");
2278   _nextMarkBitMap                = nextMarkBitMap;
2279   clear_region_fields();
2280 
2281   _calls                         = 0;
2282   _elapsed_time_ms               = 0.0;
2283   _termination_time_ms           = 0.0;
2284   _termination_start_time_ms     = 0.0;
2285 }
2286 
2287 bool G1CMTask::should_exit_termination() {
2288   regular_clock_call();
2289   // This is called when we are in the termination protocol. We should
2290   // quit if, for some reason, this task wants to abort or the global
2291   // stack is not empty (this means that we can get work from it).
2292   return !_cm->mark_stack_empty() || has_aborted();
2293 }
2294 
2295 void G1CMTask::reached_limit() {
2296   assert(_words_scanned >= _words_scanned_limit ||
2297          _refs_reached >= _refs_reached_limit ,
2298          "shouldn't have been called otherwise");
2299   regular_clock_call();
2300 }
2301 
2302 void G1CMTask::regular_clock_call() {
2303   if (has_aborted()) return;
2304 
2305   // First, we need to recalculate the words scanned and refs reached
2306   // limits for the next clock call.
2307   recalculate_limits();
2308 
2309   // During the regular clock call we do the following
2310 
2311   // (1) If an overflow has been flagged, then we abort.
2312   if (_cm->has_overflown()) {
2313     set_has_aborted();
2314     return;
2315   }
2316 
2317   // If we are not concurrent (i.e. we're doing remark) we don't need
2318   // to check anything else. The other steps are only needed during
2319   // the concurrent marking phase.
2320   if (!concurrent()) return;
2321 
2322   // (2) If marking has been aborted for Full GC, then we also abort.
2323   if (_cm->has_aborted()) {
2324     set_has_aborted();
2325     return;
2326   }
2327 
2328   double curr_time_ms = os::elapsedVTime() * 1000.0;
2329 
2330   // (4) We check whether we should yield. If we have to, then we abort.
2331   if (SuspendibleThreadSet::should_yield()) {
2332     // We should yield. To do this we abort the task. The caller is
2333     // responsible for yielding.
2334     set_has_aborted();
2335     return;
2336   }
2337 
2338   // (5) We check whether we've reached our time quota. If we have,
2339   // then we abort.
2340   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2341   if (elapsed_time_ms > _time_target_ms) {
2342     set_has_aborted();
2343     _has_timed_out = true;
2344     return;
2345   }
2346 
2347   // (6) Finally, we check whether there are enough completed STAB
2348   // buffers available for processing. If there are, we abort.
2349   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2350   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2351     // we do need to process SATB buffers, we'll abort and restart
2352     // the marking task to do so
2353     set_has_aborted();
2354     return;
2355   }
2356 }
2357 
2358 void G1CMTask::recalculate_limits() {
2359   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2360   _words_scanned_limit      = _real_words_scanned_limit;
2361 
2362   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2363   _refs_reached_limit       = _real_refs_reached_limit;
2364 }
2365 
2366 void G1CMTask::decrease_limits() {
2367   // This is called when we believe that we're going to do an infrequent
2368   // operation which will increase the per byte scanned cost (i.e. move
2369   // entries to/from the global stack). It basically tries to decrease the
2370   // scanning limit so that the clock is called earlier.
2371 
2372   _words_scanned_limit = _real_words_scanned_limit -
2373     3 * words_scanned_period / 4;
2374   _refs_reached_limit  = _real_refs_reached_limit -
2375     3 * refs_reached_period / 4;
2376 }
2377 
2378 void G1CMTask::move_entries_to_global_stack() {
2379   // Local array where we'll store the entries that will be popped
2380   // from the local queue.
2381   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2382 
2383   size_t n = 0;
2384   G1TaskQueueEntry task_entry;
2385   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2386     buffer[n] = task_entry;
2387     ++n;
2388   }
2389   if (n < G1CMMarkStack::EntriesPerChunk) {
2390     buffer[n] = G1TaskQueueEntry();
2391   }
2392 
2393   if (n > 0) {
2394     if (!_cm->mark_stack_push(buffer)) {
2395       set_has_aborted();
2396     }
2397   }
2398 
2399   // This operation was quite expensive, so decrease the limits.
2400   decrease_limits();
2401 }
2402 
2403 bool G1CMTask::get_entries_from_global_stack() {
2404   // Local array where we'll store the entries that will be popped
2405   // from the global stack.
2406   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2407 
2408   if (!_cm->mark_stack_pop(buffer)) {
2409     return false;
2410   }
2411 
2412   // We did actually pop at least one entry.
2413   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2414     G1TaskQueueEntry task_entry = buffer[i];
2415     if (task_entry.is_null()) {
2416       break;
2417     }
2418     assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2419     bool success = _task_queue->push(task_entry);
2420     // We only call this when the local queue is empty or under a
2421     // given target limit. So, we do not expect this push to fail.
2422     assert(success, "invariant");
2423   }
2424 
2425   // This operation was quite expensive, so decrease the limits
2426   decrease_limits();
2427   return true;
2428 }
2429 
2430 void G1CMTask::drain_local_queue(bool partially) {
2431   if (has_aborted()) {
2432     return;
2433   }
2434 
2435   // Decide what the target size is, depending whether we're going to
2436   // drain it partially (so that other tasks can steal if they run out
2437   // of things to do) or totally (at the very end).
2438   size_t target_size;
2439   if (partially) {
2440     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2441   } else {
2442     target_size = 0;
2443   }
2444 
2445   if (_task_queue->size() > target_size) {
2446     G1TaskQueueEntry entry;
2447     bool ret = _task_queue->pop_local(entry);
2448     while (ret) {
2449       scan_task_entry(entry);
2450       if (_task_queue->size() <= target_size || has_aborted()) {
2451         ret = false;
2452       } else {
2453         ret = _task_queue->pop_local(entry);
2454       }
2455     }
2456   }
2457 }
2458 
2459 void G1CMTask::drain_global_stack(bool partially) {
2460   if (has_aborted()) return;
2461 
2462   // We have a policy to drain the local queue before we attempt to
2463   // drain the global stack.
2464   assert(partially || _task_queue->size() == 0, "invariant");
2465 
2466   // Decide what the target size is, depending whether we're going to
2467   // drain it partially (so that other tasks can steal if they run out
2468   // of things to do) or totally (at the very end).
2469   // Notice that when draining the global mark stack partially, due to the racyness
2470   // of the mark stack size update we might in fact drop below the target. But,
2471   // this is not a problem.
2472   // In case of total draining, we simply process until the global mark stack is
2473   // totally empty, disregarding the size counter.
2474   if (partially) {
2475     size_t const target_size = _cm->partial_mark_stack_size_target();
2476     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2477       if (get_entries_from_global_stack()) {
2478         drain_local_queue(partially);
2479       }
2480     }
2481   } else {
2482     while (!has_aborted() && get_entries_from_global_stack()) {
2483       drain_local_queue(partially);
2484     }
2485   }
2486 }
2487 
2488 // SATB Queue has several assumptions on whether to call the par or
2489 // non-par versions of the methods. this is why some of the code is
2490 // replicated. We should really get rid of the single-threaded version
2491 // of the code to simplify things.
2492 void G1CMTask::drain_satb_buffers() {
2493   if (has_aborted()) return;
2494 
2495   // We set this so that the regular clock knows that we're in the
2496   // middle of draining buffers and doesn't set the abort flag when it
2497   // notices that SATB buffers are available for draining. It'd be
2498   // very counter productive if it did that. :-)
2499   _draining_satb_buffers = true;
2500 
2501   G1CMSATBBufferClosure satb_cl(this, _g1h);
2502   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2503 
2504   // This keeps claiming and applying the closure to completed buffers
2505   // until we run out of buffers or we need to abort.
2506   while (!has_aborted() &&
2507          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2508     regular_clock_call();
2509   }
2510 
2511   _draining_satb_buffers = false;
2512 
2513   assert(has_aborted() ||
2514          concurrent() ||
2515          satb_mq_set.completed_buffers_num() == 0, "invariant");
2516 
2517   // again, this was a potentially expensive operation, decrease the
2518   // limits to get the regular clock call early
2519   decrease_limits();
2520 }
2521 
2522 void G1CMTask::print_stats() {
2523   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2524                        _worker_id, _calls);
2525   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2526                        _elapsed_time_ms, _termination_time_ms);
2527   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2528                        _step_times_ms.num(), _step_times_ms.avg(),
2529                        _step_times_ms.sd());
2530   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2531                        _step_times_ms.maximum(), _step_times_ms.sum());
2532 }
2533 
2534 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2535   return _task_queues->steal(worker_id, hash_seed, task_entry);
2536 }
2537 
2538 /*****************************************************************************
2539 
2540     The do_marking_step(time_target_ms, ...) method is the building
2541     block of the parallel marking framework. It can be called in parallel
2542     with other invocations of do_marking_step() on different tasks
2543     (but only one per task, obviously) and concurrently with the
2544     mutator threads, or during remark, hence it eliminates the need
2545     for two versions of the code. When called during remark, it will
2546     pick up from where the task left off during the concurrent marking
2547     phase. Interestingly, tasks are also claimable during evacuation
2548     pauses too, since do_marking_step() ensures that it aborts before
2549     it needs to yield.
2550 
2551     The data structures that it uses to do marking work are the
2552     following:
2553 
2554       (1) Marking Bitmap. If there are gray objects that appear only
2555       on the bitmap (this happens either when dealing with an overflow
2556       or when the initial marking phase has simply marked the roots
2557       and didn't push them on the stack), then tasks claim heap
2558       regions whose bitmap they then scan to find gray objects. A
2559       global finger indicates where the end of the last claimed region
2560       is. A local finger indicates how far into the region a task has
2561       scanned. The two fingers are used to determine how to gray an
2562       object (i.e. whether simply marking it is OK, as it will be
2563       visited by a task in the future, or whether it needs to be also
2564       pushed on a stack).
2565 
2566       (2) Local Queue. The local queue of the task which is accessed
2567       reasonably efficiently by the task. Other tasks can steal from
2568       it when they run out of work. Throughout the marking phase, a
2569       task attempts to keep its local queue short but not totally
2570       empty, so that entries are available for stealing by other
2571       tasks. Only when there is no more work, a task will totally
2572       drain its local queue.
2573 
2574       (3) Global Mark Stack. This handles local queue overflow. During
2575       marking only sets of entries are moved between it and the local
2576       queues, as access to it requires a mutex and more fine-grain
2577       interaction with it which might cause contention. If it
2578       overflows, then the marking phase should restart and iterate
2579       over the bitmap to identify gray objects. Throughout the marking
2580       phase, tasks attempt to keep the global mark stack at a small
2581       length but not totally empty, so that entries are available for
2582       popping by other tasks. Only when there is no more work, tasks
2583       will totally drain the global mark stack.
2584 
2585       (4) SATB Buffer Queue. This is where completed SATB buffers are
2586       made available. Buffers are regularly removed from this queue
2587       and scanned for roots, so that the queue doesn't get too
2588       long. During remark, all completed buffers are processed, as
2589       well as the filled in parts of any uncompleted buffers.
2590 
2591     The do_marking_step() method tries to abort when the time target
2592     has been reached. There are a few other cases when the
2593     do_marking_step() method also aborts:
2594 
2595       (1) When the marking phase has been aborted (after a Full GC).
2596 
2597       (2) When a global overflow (on the global stack) has been
2598       triggered. Before the task aborts, it will actually sync up with
2599       the other tasks to ensure that all the marking data structures
2600       (local queues, stacks, fingers etc.)  are re-initialized so that
2601       when do_marking_step() completes, the marking phase can
2602       immediately restart.
2603 
2604       (3) When enough completed SATB buffers are available. The
2605       do_marking_step() method only tries to drain SATB buffers right
2606       at the beginning. So, if enough buffers are available, the
2607       marking step aborts and the SATB buffers are processed at
2608       the beginning of the next invocation.
2609 
2610       (4) To yield. when we have to yield then we abort and yield
2611       right at the end of do_marking_step(). This saves us from a lot
2612       of hassle as, by yielding we might allow a Full GC. If this
2613       happens then objects will be compacted underneath our feet, the
2614       heap might shrink, etc. We save checking for this by just
2615       aborting and doing the yield right at the end.
2616 
2617     From the above it follows that the do_marking_step() method should
2618     be called in a loop (or, otherwise, regularly) until it completes.
2619 
2620     If a marking step completes without its has_aborted() flag being
2621     true, it means it has completed the current marking phase (and
2622     also all other marking tasks have done so and have all synced up).
2623 
2624     A method called regular_clock_call() is invoked "regularly" (in
2625     sub ms intervals) throughout marking. It is this clock method that
2626     checks all the abort conditions which were mentioned above and
2627     decides when the task should abort. A work-based scheme is used to
2628     trigger this clock method: when the number of object words the
2629     marking phase has scanned or the number of references the marking
2630     phase has visited reach a given limit. Additional invocations to
2631     the method clock have been planted in a few other strategic places
2632     too. The initial reason for the clock method was to avoid calling
2633     vtime too regularly, as it is quite expensive. So, once it was in
2634     place, it was natural to piggy-back all the other conditions on it
2635     too and not constantly check them throughout the code.
2636 
2637     If do_termination is true then do_marking_step will enter its
2638     termination protocol.
2639 
2640     The value of is_serial must be true when do_marking_step is being
2641     called serially (i.e. by the VMThread) and do_marking_step should
2642     skip any synchronization in the termination and overflow code.
2643     Examples include the serial remark code and the serial reference
2644     processing closures.
2645 
2646     The value of is_serial must be false when do_marking_step is
2647     being called by any of the worker threads in a work gang.
2648     Examples include the concurrent marking code (CMMarkingTask),
2649     the MT remark code, and the MT reference processing closures.
2650 
2651  *****************************************************************************/
2652 
2653 void G1CMTask::do_marking_step(double time_target_ms,
2654                                bool do_termination,
2655                                bool is_serial) {
2656   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2657   assert(concurrent() == _cm->concurrent(), "they should be the same");
2658 
2659   G1Policy* g1_policy = _g1h->g1_policy();
2660   assert(_task_queues != NULL, "invariant");
2661   assert(_task_queue != NULL, "invariant");
2662   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2663 
2664   assert(!_claimed,
2665          "only one thread should claim this task at any one time");
2666 
2667   // OK, this doesn't safeguard again all possible scenarios, as it is
2668   // possible for two threads to set the _claimed flag at the same
2669   // time. But it is only for debugging purposes anyway and it will
2670   // catch most problems.
2671   _claimed = true;
2672 
2673   _start_time_ms = os::elapsedVTime() * 1000.0;
2674 
2675   // If do_stealing is true then do_marking_step will attempt to
2676   // steal work from the other G1CMTasks. It only makes sense to
2677   // enable stealing when the termination protocol is enabled
2678   // and do_marking_step() is not being called serially.
2679   bool do_stealing = do_termination && !is_serial;
2680 
2681   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2682   _time_target_ms = time_target_ms - diff_prediction_ms;
2683 
2684   // set up the variables that are used in the work-based scheme to
2685   // call the regular clock method
2686   _words_scanned = 0;
2687   _refs_reached  = 0;
2688   recalculate_limits();
2689 
2690   // clear all flags
2691   clear_has_aborted();
2692   _has_timed_out = false;
2693   _draining_satb_buffers = false;
2694 
2695   ++_calls;
2696 
2697   // Set up the bitmap and oop closures. Anything that uses them is
2698   // eventually called from this method, so it is OK to allocate these
2699   // statically.
2700   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
2701   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2702   set_cm_oop_closure(&cm_oop_closure);
2703 
2704   if (_cm->has_overflown()) {
2705     // This can happen if the mark stack overflows during a GC pause
2706     // and this task, after a yield point, restarts. We have to abort
2707     // as we need to get into the overflow protocol which happens
2708     // right at the end of this task.
2709     set_has_aborted();
2710   }
2711 
2712   // First drain any available SATB buffers. After this, we will not
2713   // look at SATB buffers before the next invocation of this method.
2714   // If enough completed SATB buffers are queued up, the regular clock
2715   // will abort this task so that it restarts.
2716   drain_satb_buffers();
2717   // ...then partially drain the local queue and the global stack
2718   drain_local_queue(true);
2719   drain_global_stack(true);
2720 
2721   do {
2722     if (!has_aborted() && _curr_region != NULL) {
2723       // This means that we're already holding on to a region.
2724       assert(_finger != NULL, "if region is not NULL, then the finger "
2725              "should not be NULL either");
2726 
2727       // We might have restarted this task after an evacuation pause
2728       // which might have evacuated the region we're holding on to
2729       // underneath our feet. Let's read its limit again to make sure
2730       // that we do not iterate over a region of the heap that
2731       // contains garbage (update_region_limit() will also move
2732       // _finger to the start of the region if it is found empty).
2733       update_region_limit();
2734       // We will start from _finger not from the start of the region,
2735       // as we might be restarting this task after aborting half-way
2736       // through scanning this region. In this case, _finger points to
2737       // the address where we last found a marked object. If this is a
2738       // fresh region, _finger points to start().
2739       MemRegion mr = MemRegion(_finger, _region_limit);
2740 
2741       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2742              "humongous regions should go around loop once only");
2743 
2744       // Some special cases:
2745       // If the memory region is empty, we can just give up the region.
2746       // If the current region is humongous then we only need to check
2747       // the bitmap for the bit associated with the start of the object,
2748       // scan the object if it's live, and give up the region.
2749       // Otherwise, let's iterate over the bitmap of the part of the region
2750       // that is left.
2751       // If the iteration is successful, give up the region.
2752       if (mr.is_empty()) {
2753         giveup_current_region();
2754         regular_clock_call();
2755       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2756         if (_nextMarkBitMap->isMarked(mr.start())) {
2757           // The object is marked - apply the closure
2758           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
2759           bitmap_closure.do_bit(offset);
2760         }
2761         // Even if this task aborted while scanning the humongous object
2762         // we can (and should) give up the current region.
2763         giveup_current_region();
2764         regular_clock_call();
2765       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2766         giveup_current_region();
2767         regular_clock_call();
2768       } else {
2769         assert(has_aborted(), "currently the only way to do so");
2770         // The only way to abort the bitmap iteration is to return
2771         // false from the do_bit() method. However, inside the
2772         // do_bit() method we move the _finger to point to the
2773         // object currently being looked at. So, if we bail out, we
2774         // have definitely set _finger to something non-null.
2775         assert(_finger != NULL, "invariant");
2776 
2777         // Region iteration was actually aborted. So now _finger
2778         // points to the address of the object we last scanned. If we
2779         // leave it there, when we restart this task, we will rescan
2780         // the object. It is easy to avoid this. We move the finger by
2781         // enough to point to the next possible object header (the
2782         // bitmap knows by how much we need to move it as it knows its
2783         // granularity).
2784         assert(_finger < _region_limit, "invariant");
2785         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
2786         // Check if bitmap iteration was aborted while scanning the last object
2787         if (new_finger >= _region_limit) {
2788           giveup_current_region();
2789         } else {
2790           move_finger_to(new_finger);
2791         }
2792       }
2793     }
2794     // At this point we have either completed iterating over the
2795     // region we were holding on to, or we have aborted.
2796 
2797     // We then partially drain the local queue and the global stack.
2798     // (Do we really need this?)
2799     drain_local_queue(true);
2800     drain_global_stack(true);
2801 
2802     // Read the note on the claim_region() method on why it might
2803     // return NULL with potentially more regions available for
2804     // claiming and why we have to check out_of_regions() to determine
2805     // whether we're done or not.
2806     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2807       // We are going to try to claim a new region. We should have
2808       // given up on the previous one.
2809       // Separated the asserts so that we know which one fires.
2810       assert(_curr_region  == NULL, "invariant");
2811       assert(_finger       == NULL, "invariant");
2812       assert(_region_limit == NULL, "invariant");
2813       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2814       if (claimed_region != NULL) {
2815         // Yes, we managed to claim one
2816         setup_for_region(claimed_region);
2817         assert(_curr_region == claimed_region, "invariant");
2818       }
2819       // It is important to call the regular clock here. It might take
2820       // a while to claim a region if, for example, we hit a large
2821       // block of empty regions. So we need to call the regular clock
2822       // method once round the loop to make sure it's called
2823       // frequently enough.
2824       regular_clock_call();
2825     }
2826 
2827     if (!has_aborted() && _curr_region == NULL) {
2828       assert(_cm->out_of_regions(),
2829              "at this point we should be out of regions");
2830     }
2831   } while ( _curr_region != NULL && !has_aborted());
2832 
2833   if (!has_aborted()) {
2834     // We cannot check whether the global stack is empty, since other
2835     // tasks might be pushing objects to it concurrently.
2836     assert(_cm->out_of_regions(),
2837            "at this point we should be out of regions");
2838     // Try to reduce the number of available SATB buffers so that
2839     // remark has less work to do.
2840     drain_satb_buffers();
2841   }
2842 
2843   // Since we've done everything else, we can now totally drain the
2844   // local queue and global stack.
2845   drain_local_queue(false);
2846   drain_global_stack(false);
2847 
2848   // Attempt at work stealing from other task's queues.
2849   if (do_stealing && !has_aborted()) {
2850     // We have not aborted. This means that we have finished all that
2851     // we could. Let's try to do some stealing...
2852 
2853     // We cannot check whether the global stack is empty, since other
2854     // tasks might be pushing objects to it concurrently.
2855     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2856            "only way to reach here");
2857     while (!has_aborted()) {
2858       G1TaskQueueEntry entry;
2859       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2860         scan_task_entry(entry);
2861 
2862         // And since we're towards the end, let's totally drain the
2863         // local queue and global stack.
2864         drain_local_queue(false);
2865         drain_global_stack(false);
2866       } else {
2867         break;
2868       }
2869     }
2870   }
2871 
2872   // We still haven't aborted. Now, let's try to get into the
2873   // termination protocol.
2874   if (do_termination && !has_aborted()) {
2875     // We cannot check whether the global stack is empty, since other
2876     // tasks might be concurrently pushing objects on it.
2877     // Separated the asserts so that we know which one fires.
2878     assert(_cm->out_of_regions(), "only way to reach here");
2879     assert(_task_queue->size() == 0, "only way to reach here");
2880     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2881 
2882     // The G1CMTask class also extends the TerminatorTerminator class,
2883     // hence its should_exit_termination() method will also decide
2884     // whether to exit the termination protocol or not.
2885     bool finished = (is_serial ||
2886                      _cm->terminator()->offer_termination(this));
2887     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2888     _termination_time_ms +=
2889       termination_end_time_ms - _termination_start_time_ms;
2890 
2891     if (finished) {
2892       // We're all done.
2893 
2894       if (_worker_id == 0) {
2895         // let's allow task 0 to do this
2896         if (concurrent()) {
2897           assert(_cm->concurrent_marking_in_progress(), "invariant");
2898           // we need to set this to false before the next
2899           // safepoint. This way we ensure that the marking phase
2900           // doesn't observe any more heap expansions.
2901           _cm->clear_concurrent_marking_in_progress();
2902         }
2903       }
2904 
2905       // We can now guarantee that the global stack is empty, since
2906       // all other tasks have finished. We separated the guarantees so
2907       // that, if a condition is false, we can immediately find out
2908       // which one.
2909       guarantee(_cm->out_of_regions(), "only way to reach here");
2910       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2911       guarantee(_task_queue->size() == 0, "only way to reach here");
2912       guarantee(!_cm->has_overflown(), "only way to reach here");
2913     } else {
2914       // Apparently there's more work to do. Let's abort this task. It
2915       // will restart it and we can hopefully find more things to do.
2916       set_has_aborted();
2917     }
2918   }
2919 
2920   // Mainly for debugging purposes to make sure that a pointer to the
2921   // closure which was statically allocated in this frame doesn't
2922   // escape it by accident.
2923   set_cm_oop_closure(NULL);
2924   double end_time_ms = os::elapsedVTime() * 1000.0;
2925   double elapsed_time_ms = end_time_ms - _start_time_ms;
2926   // Update the step history.
2927   _step_times_ms.add(elapsed_time_ms);
2928 
2929   if (has_aborted()) {
2930     // The task was aborted for some reason.
2931     if (_has_timed_out) {
2932       double diff_ms = elapsed_time_ms - _time_target_ms;
2933       // Keep statistics of how well we did with respect to hitting
2934       // our target only if we actually timed out (if we aborted for
2935       // other reasons, then the results might get skewed).
2936       _marking_step_diffs_ms.add(diff_ms);
2937     }
2938 
2939     if (_cm->has_overflown()) {
2940       // This is the interesting one. We aborted because a global
2941       // overflow was raised. This means we have to restart the
2942       // marking phase and start iterating over regions. However, in
2943       // order to do this we have to make sure that all tasks stop
2944       // what they are doing and re-initialize in a safe manner. We
2945       // will achieve this with the use of two barrier sync points.
2946 
2947       if (!is_serial) {
2948         // We only need to enter the sync barrier if being called
2949         // from a parallel context
2950         _cm->enter_first_sync_barrier(_worker_id);
2951 
2952         // When we exit this sync barrier we know that all tasks have
2953         // stopped doing marking work. So, it's now safe to
2954         // re-initialize our data structures. At the end of this method,
2955         // task 0 will clear the global data structures.
2956       }
2957 
2958       // We clear the local state of this task...
2959       clear_region_fields();
2960 
2961       if (!is_serial) {
2962         // ...and enter the second barrier.
2963         _cm->enter_second_sync_barrier(_worker_id);
2964       }
2965       // At this point, if we're during the concurrent phase of
2966       // marking, everything has been re-initialized and we're
2967       // ready to restart.
2968     }
2969   }
2970 
2971   _claimed = false;
2972 }
2973 
2974 G1CMTask::G1CMTask(uint worker_id,
2975                    G1ConcurrentMark* cm,
2976                    G1CMTaskQueue* task_queue,
2977                    G1CMTaskQueueSet* task_queues)
2978   : _g1h(G1CollectedHeap::heap()),
2979     _worker_id(worker_id), _cm(cm),
2980     _objArray_processor(this),
2981     _claimed(false),
2982     _nextMarkBitMap(NULL), _hash_seed(17),
2983     _task_queue(task_queue),
2984     _task_queues(task_queues),
2985     _cm_oop_closure(NULL) {
2986   guarantee(task_queue != NULL, "invariant");
2987   guarantee(task_queues != NULL, "invariant");
2988 
2989   _marking_step_diffs_ms.add(0.5);
2990 }
2991 
2992 // These are formatting macros that are used below to ensure
2993 // consistent formatting. The *_H_* versions are used to format the
2994 // header for a particular value and they should be kept consistent
2995 // with the corresponding macro. Also note that most of the macros add
2996 // the necessary white space (as a prefix) which makes them a bit
2997 // easier to compose.
2998 
2999 // All the output lines are prefixed with this string to be able to
3000 // identify them easily in a large log file.
3001 #define G1PPRL_LINE_PREFIX            "###"
3002 
3003 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3004 #ifdef _LP64
3005 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3006 #else // _LP64
3007 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3008 #endif // _LP64
3009 
3010 // For per-region info
3011 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3012 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3013 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3014 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3015 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3016 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3017 
3018 // For summary info
3019 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3020 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3021 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3022 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3023 
3024 G1PrintRegionLivenessInfoClosure::
3025 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3026   : _total_used_bytes(0), _total_capacity_bytes(0),
3027     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3028     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3029   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3030   MemRegion g1_reserved = g1h->g1_reserved();
3031   double now = os::elapsedTime();
3032 
3033   // Print the header of the output.
3034   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3035   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3036                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3037                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3038                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3039                           HeapRegion::GrainBytes);
3040   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3041   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3042                           G1PPRL_TYPE_H_FORMAT
3043                           G1PPRL_ADDR_BASE_H_FORMAT
3044                           G1PPRL_BYTE_H_FORMAT
3045                           G1PPRL_BYTE_H_FORMAT
3046                           G1PPRL_BYTE_H_FORMAT
3047                           G1PPRL_DOUBLE_H_FORMAT
3048                           G1PPRL_BYTE_H_FORMAT
3049                           G1PPRL_BYTE_H_FORMAT,
3050                           "type", "address-range",
3051                           "used", "prev-live", "next-live", "gc-eff",
3052                           "remset", "code-roots");
3053   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3054                           G1PPRL_TYPE_H_FORMAT
3055                           G1PPRL_ADDR_BASE_H_FORMAT
3056                           G1PPRL_BYTE_H_FORMAT
3057                           G1PPRL_BYTE_H_FORMAT
3058                           G1PPRL_BYTE_H_FORMAT
3059                           G1PPRL_DOUBLE_H_FORMAT
3060                           G1PPRL_BYTE_H_FORMAT
3061                           G1PPRL_BYTE_H_FORMAT,
3062                           "", "",
3063                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3064                           "(bytes)", "(bytes)");
3065 }
3066 
3067 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3068   const char* type       = r->get_type_str();
3069   HeapWord* bottom       = r->bottom();
3070   HeapWord* end          = r->end();
3071   size_t capacity_bytes  = r->capacity();
3072   size_t used_bytes      = r->used();
3073   size_t prev_live_bytes = r->live_bytes();
3074   size_t next_live_bytes = r->next_live_bytes();
3075   double gc_eff          = r->gc_efficiency();
3076   size_t remset_bytes    = r->rem_set()->mem_size();
3077   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3078 
3079   _total_used_bytes      += used_bytes;
3080   _total_capacity_bytes  += capacity_bytes;
3081   _total_prev_live_bytes += prev_live_bytes;
3082   _total_next_live_bytes += next_live_bytes;
3083   _total_remset_bytes    += remset_bytes;
3084   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3085 
3086   // Print a line for this particular region.
3087   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3088                           G1PPRL_TYPE_FORMAT
3089                           G1PPRL_ADDR_BASE_FORMAT
3090                           G1PPRL_BYTE_FORMAT
3091                           G1PPRL_BYTE_FORMAT
3092                           G1PPRL_BYTE_FORMAT
3093                           G1PPRL_DOUBLE_FORMAT
3094                           G1PPRL_BYTE_FORMAT
3095                           G1PPRL_BYTE_FORMAT,
3096                           type, p2i(bottom), p2i(end),
3097                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3098                           remset_bytes, strong_code_roots_bytes);
3099 
3100   return false;
3101 }
3102 
3103 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3104   // add static memory usages to remembered set sizes
3105   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3106   // Print the footer of the output.
3107   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3108   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3109                          " SUMMARY"
3110                          G1PPRL_SUM_MB_FORMAT("capacity")
3111                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3112                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3113                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3114                          G1PPRL_SUM_MB_FORMAT("remset")
3115                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3116                          bytes_to_mb(_total_capacity_bytes),
3117                          bytes_to_mb(_total_used_bytes),
3118                          perc(_total_used_bytes, _total_capacity_bytes),
3119                          bytes_to_mb(_total_prev_live_bytes),
3120                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3121                          bytes_to_mb(_total_next_live_bytes),
3122                          perc(_total_next_live_bytes, _total_capacity_bytes),
3123                          bytes_to_mb(_total_remset_bytes),
3124                          bytes_to_mb(_total_strong_code_roots_bytes));
3125 }