Old src/share/vm/gc/g1/g1ConcurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 #include "utilities/align.hpp"
  61 #include "utilities/growableArray.hpp"
  62 
  63 // Concurrent marking bit map wrapper
  64 
  65 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  66   _bm(),
  67   _shifter(shifter) {
  68   _bmStartWord = 0;
  69   _bmWordSize = 0;
  70 }
  71 
  72 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  73                                                  const HeapWord* limit) const {
  74   // First we must round addr *up* to a possible object boundary.
  75   addr = align_up(addr, HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   assert(limit != NULL, "limit must not be NULL");
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 #ifndef PRODUCT
  88 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  89   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  90   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  91          "size inconsistency");
  92   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  93          _bmWordSize  == heap_rs.word_size();
  94 }
  95 #endif
  96 
  97 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  98   _bm.print_on_error(st, prefix);
  99 }
 100 
 101 size_t G1CMBitMap::compute_size(size_t heap_size) {
 102   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 103 }
 104 
 105 size_t G1CMBitMap::mark_distance() {
 106   return MinObjAlignmentInBytes * BitsPerByte;
 107 }
 108 
 109 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 110   _bmStartWord = heap.start();
 111   _bmWordSize = heap.word_size();
 112 
 113   _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter);
 114 
 115   storage->set_mapping_changed_listener(&_listener);
 116 }
 117 
 118 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 119   if (zero_filled) {
 120     return;
 121   }
 122   // We need to clear the bitmap on commit, removing any existing information.
 123   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 124   _bm->clear_range(mr);
 125 }
 126 
 127 void G1CMBitMap::clear_range(MemRegion mr) {
 128   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 129   assert(!mr.is_empty(), "unexpected empty region");
 130   // convert address range into offset range
 131   _bm.at_put_range(heapWordToOffset(mr.start()),
 132                    heapWordToOffset(mr.end()), false);
 133 }
 134 
 135 G1CMMarkStack::G1CMMarkStack() :
 136   _max_chunk_capacity(0),
 137   _base(NULL),
 138   _chunk_capacity(0) {
 139   set_empty();
 140 }
 141 
 142 bool G1CMMarkStack::resize(size_t new_capacity) {
 143   assert(is_empty(), "Only resize when stack is empty.");
 144   assert(new_capacity <= _max_chunk_capacity,
 145          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
 146 
 147   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
 148 
 149   if (new_base == NULL) {
 150     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 151     return false;
 152   }
 153   // Release old mapping.
 154   if (_base != NULL) {
 155     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 156   }
 157 
 158   _base = new_base;
 159   _chunk_capacity = new_capacity;
 160   set_empty();
 161 
 162   return true;
 163 }
 164 
 165 size_t G1CMMarkStack::capacity_alignment() {
 166   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 167 }
 168 
 169 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 170   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 171 
 172   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 173 
 174   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 175   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 176 
 177   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 178             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 179             _max_chunk_capacity,
 180             initial_chunk_capacity);
 181 
 182   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 183                 initial_chunk_capacity, _max_chunk_capacity);
 184 
 185   return resize(initial_chunk_capacity);
 186 }
 187 
 188 void G1CMMarkStack::expand() {
 189   if (_chunk_capacity == _max_chunk_capacity) {
 190     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 191     return;
 192   }
 193   size_t old_capacity = _chunk_capacity;
 194   // Double capacity if possible
 195   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 196 
 197   if (resize(new_capacity)) {
 198     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 199                   old_capacity, new_capacity);
 200   } else {
 201     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 202                     old_capacity, new_capacity);
 203   }
 204 }
 205 
 206 G1CMMarkStack::~G1CMMarkStack() {
 207   if (_base != NULL) {
 208     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 209   }
 210 }
 211 
 212 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 213   elem->next = *list;
 214   *list = elem;
 215 }
 216 
 217 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 218   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 219   add_chunk_to_list(&_chunk_list, elem);
 220   _chunks_in_chunk_list++;
 221 }
 222 
 223 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 224   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 225   add_chunk_to_list(&_free_list, elem);
 226 }
 227 
 228 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 229   TaskQueueEntryChunk* result = *list;
 230   if (result != NULL) {
 231     *list = (*list)->next;
 232   }
 233   return result;
 234 }
 235 
 236 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 237   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 238   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 239   if (result != NULL) {
 240     _chunks_in_chunk_list--;
 241   }
 242   return result;
 243 }
 244 
 245 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 246   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 247   return remove_chunk_from_list(&_free_list);
 248 }
 249 
 250 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 251   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 252   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 253   // wraparound of _hwm.
 254   if (_hwm >= _chunk_capacity) {
 255     return NULL;
 256   }
 257 
 258   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 259   if (cur_idx >= _chunk_capacity) {
 260     return NULL;
 261   }
 262 
 263   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 264   result->next = NULL;
 265   return result;
 266 }
 267 
 268 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 269   // Get a new chunk.
 270   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 271 
 272   if (new_chunk == NULL) {
 273     // Did not get a chunk from the free list. Allocate from backing memory.
 274     new_chunk = allocate_new_chunk();
 275 
 276     if (new_chunk == NULL) {
 277       return false;
 278     }
 279   }
 280 
 281   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 282 
 283   add_chunk_to_chunk_list(new_chunk);
 284 
 285   return true;
 286 }
 287 
 288 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 289   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 290 
 291   if (cur == NULL) {
 292     return false;
 293   }
 294 
 295   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 296 
 297   add_chunk_to_free_list(cur);
 298   return true;
 299 }
 300 
 301 void G1CMMarkStack::set_empty() {
 302   _chunks_in_chunk_list = 0;
 303   _hwm = 0;
 304   _chunk_list = NULL;
 305   _free_list = NULL;
 306 }
 307 
 308 G1CMRootRegions::G1CMRootRegions() :
 309   _cm(NULL), _scan_in_progress(false),
 310   _should_abort(false), _claimed_survivor_index(0) { }
 311 
 312 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 313   _survivors = survivors;
 314   _cm = cm;
 315 }
 316 
 317 void G1CMRootRegions::prepare_for_scan() {
 318   assert(!scan_in_progress(), "pre-condition");
 319 
 320   // Currently, only survivors can be root regions.
 321   _claimed_survivor_index = 0;
 322   _scan_in_progress = _survivors->regions()->is_nonempty();
 323   _should_abort = false;
 324 }
 325 
 326 HeapRegion* G1CMRootRegions::claim_next() {
 327   if (_should_abort) {
 328     // If someone has set the should_abort flag, we return NULL to
 329     // force the caller to bail out of their loop.
 330     return NULL;
 331   }
 332 
 333   // Currently, only survivors can be root regions.
 334   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 335 
 336   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 337   if (claimed_index < survivor_regions->length()) {
 338     return survivor_regions->at(claimed_index);
 339   }
 340   return NULL;
 341 }
 342 
 343 uint G1CMRootRegions::num_root_regions() const {
 344   return (uint)_survivors->regions()->length();
 345 }
 346 
 347 void G1CMRootRegions::notify_scan_done() {
 348   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 349   _scan_in_progress = false;
 350   RootRegionScan_lock->notify_all();
 351 }
 352 
 353 void G1CMRootRegions::cancel_scan() {
 354   notify_scan_done();
 355 }
 356 
 357 void G1CMRootRegions::scan_finished() {
 358   assert(scan_in_progress(), "pre-condition");
 359 
 360   // Currently, only survivors can be root regions.
 361   if (!_should_abort) {
 362     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 363     assert((uint)_claimed_survivor_index >= _survivors->length(),
 364            "we should have claimed all survivors, claimed index = %u, length = %u",
 365            (uint)_claimed_survivor_index, _survivors->length());
 366   }
 367 
 368   notify_scan_done();
 369 }
 370 
 371 bool G1CMRootRegions::wait_until_scan_finished() {
 372   if (!scan_in_progress()) return false;
 373 
 374   {
 375     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 376     while (scan_in_progress()) {
 377       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 378     }
 379   }
 380   return true;
 381 }
 382 
 383 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 384   return MAX2((n_par_threads + 2) / 4, 1U);
 385 }
 386 
 387 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 388   _g1h(g1h),
 389   _markBitMap1(),
 390   _markBitMap2(),
 391   _parallel_marking_threads(0),
 392   _max_parallel_marking_threads(0),
 393   _sleep_factor(0.0),
 394   _marking_task_overhead(1.0),
 395   _cleanup_list("Cleanup List"),
 396 
 397   _prevMarkBitMap(&_markBitMap1),
 398   _nextMarkBitMap(&_markBitMap2),
 399 
 400   _global_mark_stack(),
 401   // _finger set in set_non_marking_state
 402 
 403   _max_worker_id(ParallelGCThreads),
 404   // _active_tasks set in set_non_marking_state
 405   // _tasks set inside the constructor
 406   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 407   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 408 
 409   _has_overflown(false),
 410   _concurrent(false),
 411   _has_aborted(false),
 412   _restart_for_overflow(false),
 413   _concurrent_marking_in_progress(false),
 414   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 415   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 416 
 417   // _verbose_level set below
 418 
 419   _init_times(),
 420   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 421   _cleanup_times(),
 422   _total_counting_time(0.0),
 423   _total_rs_scrub_time(0.0),
 424 
 425   _parallel_workers(NULL),
 426 
 427   _completed_initialization(false) {
 428 
 429   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 430   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 431 
 432   // Create & start a ConcurrentMark thread.
 433   _cmThread = new ConcurrentMarkThread(this);
 434   assert(cmThread() != NULL, "CM Thread should have been created");
 435   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 436   if (_cmThread->osthread() == NULL) {
 437       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 438   }
 439 
 440   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 441   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 442   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 443 
 444   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 445   satb_qs.set_buffer_size(G1SATBBufferSize);
 446 
 447   _root_regions.init(_g1h->survivor(), this);
 448 
 449   if (ConcGCThreads > ParallelGCThreads) {
 450     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 451                     ConcGCThreads, ParallelGCThreads);
 452     return;
 453   }
 454   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 455     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 456     // if both are set
 457     _sleep_factor             = 0.0;
 458     _marking_task_overhead    = 1.0;
 459   } else if (G1MarkingOverheadPercent > 0) {
 460     // We will calculate the number of parallel marking threads based
 461     // on a target overhead with respect to the soft real-time goal
 462     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 463     double overall_cm_overhead =
 464       (double) MaxGCPauseMillis * marking_overhead /
 465       (double) GCPauseIntervalMillis;
 466     double cpu_ratio = 1.0 / os::initial_active_processor_count();
 467     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 468     double marking_task_overhead =
 469       overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
 470     double sleep_factor =
 471                        (1.0 - marking_task_overhead) / marking_task_overhead;
 472 
 473     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 474     _sleep_factor             = sleep_factor;
 475     _marking_task_overhead    = marking_task_overhead;
 476   } else {
 477     // Calculate the number of parallel marking threads by scaling
 478     // the number of parallel GC threads.
 479     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 480     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 481     _sleep_factor             = 0.0;
 482     _marking_task_overhead    = 1.0;
 483   }
 484 
 485   assert(ConcGCThreads > 0, "Should have been set");
 486   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 487   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 488   _parallel_marking_threads = ConcGCThreads;
 489   _max_parallel_marking_threads = _parallel_marking_threads;
 490 
 491   _parallel_workers = new WorkGang("G1 Marker",
 492        _max_parallel_marking_threads, false, true);
 493   if (_parallel_workers == NULL) {
 494     vm_exit_during_initialization("Failed necessary allocation.");
 495   } else {
 496     _parallel_workers->initialize_workers();
 497   }
 498 
 499   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 500     size_t mark_stack_size =
 501       MIN2(MarkStackSizeMax,
 502           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 503     // Verify that the calculated value for MarkStackSize is in range.
 504     // It would be nice to use the private utility routine from Arguments.
 505     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 506       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 507                       "must be between 1 and " SIZE_FORMAT,
 508                       mark_stack_size, MarkStackSizeMax);
 509       return;
 510     }
 511     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 512   } else {
 513     // Verify MarkStackSize is in range.
 514     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 515       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 516         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 517           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 518                           "must be between 1 and " SIZE_FORMAT,
 519                           MarkStackSize, MarkStackSizeMax);
 520           return;
 521         }
 522       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 523         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 524           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 525                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 526                           MarkStackSize, MarkStackSizeMax);
 527           return;
 528         }
 529       }
 530     }
 531   }
 532 
 533   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 534     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 535   }
 536 
 537   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 538   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 539 
 540   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 541   _active_tasks = _max_worker_id;
 542 
 543   for (uint i = 0; i < _max_worker_id; ++i) {
 544     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 545     task_queue->initialize();
 546     _task_queues->register_queue(i, task_queue);
 547 
 548     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 549 
 550     _accum_task_vtime[i] = 0.0;
 551   }
 552 
 553   // so that the call below can read a sensible value
 554   _heap_start = g1h->reserved_region().start();
 555   set_non_marking_state();
 556   _completed_initialization = true;
 557 }
 558 
 559 void G1ConcurrentMark::reset() {
 560   // Starting values for these two. This should be called in a STW
 561   // phase.
 562   MemRegion reserved = _g1h->g1_reserved();
 563   _heap_start = reserved.start();
 564   _heap_end   = reserved.end();
 565 
 566   // Separated the asserts so that we know which one fires.
 567   assert(_heap_start != NULL, "heap bounds should look ok");
 568   assert(_heap_end != NULL, "heap bounds should look ok");
 569   assert(_heap_start < _heap_end, "heap bounds should look ok");
 570 
 571   // Reset all the marking data structures and any necessary flags
 572   reset_marking_state();
 573 
 574   // We do reset all of them, since different phases will use
 575   // different number of active threads. So, it's easiest to have all
 576   // of them ready.
 577   for (uint i = 0; i < _max_worker_id; ++i) {
 578     _tasks[i]->reset(_nextMarkBitMap);
 579   }
 580 
 581   // we need this to make sure that the flag is on during the evac
 582   // pause with initial mark piggy-backed
 583   set_concurrent_marking_in_progress();
 584 }
 585 
 586 
 587 void G1ConcurrentMark::reset_marking_state() {
 588   _global_mark_stack.set_empty();
 589 
 590   // Expand the marking stack, if we have to and if we can.
 591   if (has_overflown()) {
 592     _global_mark_stack.expand();
 593   }
 594 
 595   clear_has_overflown();
 596   _finger = _heap_start;
 597 
 598   for (uint i = 0; i < _max_worker_id; ++i) {
 599     G1CMTaskQueue* queue = _task_queues->queue(i);
 600     queue->set_empty();
 601   }
 602 }
 603 
 604 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 605   assert(active_tasks <= _max_worker_id, "we should not have more");
 606 
 607   _active_tasks = active_tasks;
 608   // Need to update the three data structures below according to the
 609   // number of active threads for this phase.
 610   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 611   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 612   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 613 }
 614 
 615 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 616   set_concurrency(active_tasks);
 617 
 618   _concurrent = concurrent;
 619   // We propagate this to all tasks, not just the active ones.
 620   for (uint i = 0; i < _max_worker_id; ++i)
 621     _tasks[i]->set_concurrent(concurrent);
 622 
 623   if (concurrent) {
 624     set_concurrent_marking_in_progress();
 625   } else {
 626     // We currently assume that the concurrent flag has been set to
 627     // false before we start remark. At this point we should also be
 628     // in a STW phase.
 629     assert(!concurrent_marking_in_progress(), "invariant");
 630     assert(out_of_regions(),
 631            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 632            p2i(_finger), p2i(_heap_end));
 633   }
 634 }
 635 
 636 void G1ConcurrentMark::set_non_marking_state() {
 637   // We set the global marking state to some default values when we're
 638   // not doing marking.
 639   reset_marking_state();
 640   _active_tasks = 0;
 641   clear_concurrent_marking_in_progress();
 642 }
 643 
 644 G1ConcurrentMark::~G1ConcurrentMark() {
 645   // The G1ConcurrentMark instance is never freed.
 646   ShouldNotReachHere();
 647 }
 648 
 649 class G1ClearBitMapTask : public AbstractGangTask {
 650 public:
 651   static size_t chunk_size() { return M; }
 652 
 653 private:
 654   // Heap region closure used for clearing the given mark bitmap.
 655   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 656   private:
 657     G1CMBitMap* _bitmap;
 658     G1ConcurrentMark* _cm;
 659   public:
 660     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 661     }
 662 
 663     virtual bool doHeapRegion(HeapRegion* r) {
 664       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 665 
 666       HeapWord* cur = r->bottom();
 667       HeapWord* const end = r->end();
 668 
 669       while (cur < end) {
 670         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 671         _bitmap->clear_range(mr);
 672 
 673         cur += chunk_size_in_words;
 674 
 675         // Abort iteration if after yielding the marking has been aborted.
 676         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 677           return true;
 678         }
 679         // Repeat the asserts from before the start of the closure. We will do them
 680         // as asserts here to minimize their overhead on the product. However, we
 681         // will have them as guarantees at the beginning / end of the bitmap
 682         // clearing to get some checking in the product.
 683         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 684         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 685       }
 686       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 687 
 688       return false;
 689     }
 690   };
 691 
 692   G1ClearBitmapHRClosure _cl;
 693   HeapRegionClaimer _hr_claimer;
 694   bool _suspendible; // If the task is suspendible, workers must join the STS.
 695 
 696 public:
 697   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 698     AbstractGangTask("G1 Clear Bitmap"),
 699     _cl(bitmap, suspendible ? cm : NULL),
 700     _hr_claimer(n_workers),
 701     _suspendible(suspendible)
 702   { }
 703 
 704   void work(uint worker_id) {
 705     SuspendibleThreadSetJoiner sts_join(_suspendible);
 706     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
 707   }
 708 
 709   bool is_complete() {
 710     return _cl.complete();
 711   }
 712 };
 713 
 714 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 715   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 716 
 717   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 718   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 719 
 720   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 721 
 722   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 723 
 724   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 725   workers->run_task(&cl, num_workers);
 726   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 727 }
 728 
 729 void G1ConcurrentMark::cleanup_for_next_mark() {
 730   // Make sure that the concurrent mark thread looks to still be in
 731   // the current cycle.
 732   guarantee(cmThread()->during_cycle(), "invariant");
 733 
 734   // We are finishing up the current cycle by clearing the next
 735   // marking bitmap and getting it ready for the next cycle. During
 736   // this time no other cycle can start. So, let's make sure that this
 737   // is the case.
 738   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 739 
 740   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 741 
 742   // Clear the live count data. If the marking has been aborted, the abort()
 743   // call already did that.
 744   if (!has_aborted()) {
 745     clear_live_data(_parallel_workers);
 746     DEBUG_ONLY(verify_live_data_clear());
 747   }
 748 
 749   // Repeat the asserts from above.
 750   guarantee(cmThread()->during_cycle(), "invariant");
 751   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 752 }
 753 
 754 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 755   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 756   clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false);
 757 }
 758 
 759 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 760   G1CMBitMap* _bitmap;
 761   bool _error;
 762  public:
 763   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 764   }
 765 
 766   virtual bool doHeapRegion(HeapRegion* r) {
 767     // This closure can be called concurrently to the mutator, so we must make sure
 768     // that the result of the getNextMarkedWordAddress() call is compared to the
 769     // value passed to it as limit to detect any found bits.
 770     // end never changes in G1.
 771     HeapWord* end = r->end();
 772     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 773   }
 774 };
 775 
 776 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 777   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 778   _g1h->heap_region_iterate(&cl);
 779   return cl.complete();
 780 }
 781 
 782 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 783 public:
 784   bool doHeapRegion(HeapRegion* r) {
 785     r->note_start_of_marking();
 786     return false;
 787   }
 788 };
 789 
 790 void G1ConcurrentMark::checkpointRootsInitialPre() {
 791   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 792   G1Policy* g1p = g1h->g1_policy();
 793 
 794   _has_aborted = false;
 795 
 796   // Initialize marking structures. This has to be done in a STW phase.
 797   reset();
 798 
 799   // For each region note start of marking.
 800   NoteStartOfMarkHRClosure startcl;
 801   g1h->heap_region_iterate(&startcl);
 802 }
 803 
 804 
 805 void G1ConcurrentMark::checkpointRootsInitialPost() {
 806   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 807 
 808   // Start Concurrent Marking weak-reference discovery.
 809   ReferenceProcessor* rp = g1h->ref_processor_cm();
 810   // enable ("weak") refs discovery
 811   rp->enable_discovery();
 812   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 813 
 814   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 815   // This is the start of  the marking cycle, we're expected all
 816   // threads to have SATB queues with active set to false.
 817   satb_mq_set.set_active_all_threads(true, /* new active value */
 818                                      false /* expected_active */);
 819 
 820   _root_regions.prepare_for_scan();
 821 
 822   // update_g1_committed() will be called at the end of an evac pause
 823   // when marking is on. So, it's also called at the end of the
 824   // initial-mark pause to update the heap end, if the heap expands
 825   // during it. No need to call it here.
 826 }
 827 
 828 /*
 829  * Notice that in the next two methods, we actually leave the STS
 830  * during the barrier sync and join it immediately afterwards. If we
 831  * do not do this, the following deadlock can occur: one thread could
 832  * be in the barrier sync code, waiting for the other thread to also
 833  * sync up, whereas another one could be trying to yield, while also
 834  * waiting for the other threads to sync up too.
 835  *
 836  * Note, however, that this code is also used during remark and in
 837  * this case we should not attempt to leave / enter the STS, otherwise
 838  * we'll either hit an assert (debug / fastdebug) or deadlock
 839  * (product). So we should only leave / enter the STS if we are
 840  * operating concurrently.
 841  *
 842  * Because the thread that does the sync barrier has left the STS, it
 843  * is possible to be suspended for a Full GC or an evacuation pause
 844  * could occur. This is actually safe, since the entering the sync
 845  * barrier is one of the last things do_marking_step() does, and it
 846  * doesn't manipulate any data structures afterwards.
 847  */
 848 
 849 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 850   bool barrier_aborted;
 851   {
 852     SuspendibleThreadSetLeaver sts_leave(concurrent());
 853     barrier_aborted = !_first_overflow_barrier_sync.enter();
 854   }
 855 
 856   // at this point everyone should have synced up and not be doing any
 857   // more work
 858 
 859   if (barrier_aborted) {
 860     // If the barrier aborted we ignore the overflow condition and
 861     // just abort the whole marking phase as quickly as possible.
 862     return;
 863   }
 864 
 865   // If we're executing the concurrent phase of marking, reset the marking
 866   // state; otherwise the marking state is reset after reference processing,
 867   // during the remark pause.
 868   // If we reset here as a result of an overflow during the remark we will
 869   // see assertion failures from any subsequent set_concurrency_and_phase()
 870   // calls.
 871   if (concurrent()) {
 872     // let the task associated with with worker 0 do this
 873     if (worker_id == 0) {
 874       // task 0 is responsible for clearing the global data structures
 875       // We should be here because of an overflow. During STW we should
 876       // not clear the overflow flag since we rely on it being true when
 877       // we exit this method to abort the pause and restart concurrent
 878       // marking.
 879       reset_marking_state();
 880 
 881       log_info(gc, marking)("Concurrent Mark reset for overflow");
 882     }
 883   }
 884 
 885   // after this, each task should reset its own data structures then
 886   // then go into the second barrier
 887 }
 888 
 889 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 890   SuspendibleThreadSetLeaver sts_leave(concurrent());
 891   _second_overflow_barrier_sync.enter();
 892 
 893   // at this point everything should be re-initialized and ready to go
 894 }
 895 
 896 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 897 private:
 898   G1ConcurrentMark*     _cm;
 899   ConcurrentMarkThread* _cmt;
 900 
 901 public:
 902   void work(uint worker_id) {
 903     assert(Thread::current()->is_ConcurrentGC_thread(),
 904            "this should only be done by a conc GC thread");
 905     ResourceMark rm;
 906 
 907     double start_vtime = os::elapsedVTime();
 908 
 909     {
 910       SuspendibleThreadSetJoiner sts_join;
 911 
 912       assert(worker_id < _cm->active_tasks(), "invariant");
 913       G1CMTask* the_task = _cm->task(worker_id);
 914       the_task->record_start_time();
 915       if (!_cm->has_aborted()) {
 916         do {
 917           double start_vtime_sec = os::elapsedVTime();
 918           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 919 
 920           the_task->do_marking_step(mark_step_duration_ms,
 921                                     true  /* do_termination */,
 922                                     false /* is_serial*/);
 923 
 924           double end_vtime_sec = os::elapsedVTime();
 925           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 926           _cm->do_yield_check();
 927 
 928           jlong sleep_time_ms;
 929           if (!_cm->has_aborted() && the_task->has_aborted()) {
 930             sleep_time_ms =
 931               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 932             {
 933               SuspendibleThreadSetLeaver sts_leave;
 934               os::sleep(Thread::current(), sleep_time_ms, false);
 935             }
 936           }
 937         } while (!_cm->has_aborted() && the_task->has_aborted());
 938       }
 939       the_task->record_end_time();
 940       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 941     }
 942 
 943     double end_vtime = os::elapsedVTime();
 944     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 945   }
 946 
 947   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 948                             ConcurrentMarkThread* cmt) :
 949       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 950 
 951   ~G1CMConcurrentMarkingTask() { }
 952 };
 953 
 954 // Calculates the number of active workers for a concurrent
 955 // phase.
 956 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 957   uint n_conc_workers = 0;
 958   if (!UseDynamicNumberOfGCThreads ||
 959       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 960        !ForceDynamicNumberOfGCThreads)) {
 961     n_conc_workers = max_parallel_marking_threads();
 962   } else {
 963     n_conc_workers =
 964       AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
 965                                                       1, /* Minimum workers */
 966                                                       parallel_marking_threads(),
 967                                                       Threads::number_of_non_daemon_threads());
 968     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 969     // that scaling has already gone into "_max_parallel_marking_threads".
 970   }
 971   assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
 972          "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
 973          max_parallel_marking_threads(), n_conc_workers);
 974   return n_conc_workers;
 975 }
 976 
 977 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
 978   // Currently, only survivors can be root regions.
 979   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 980   G1RootRegionScanClosure cl(_g1h, this);
 981 
 982   const uintx interval = PrefetchScanIntervalInBytes;
 983   HeapWord* curr = hr->bottom();
 984   const HeapWord* end = hr->top();
 985   while (curr < end) {
 986     Prefetch::read(curr, interval);
 987     oop obj = oop(curr);
 988     int size = obj->oop_iterate_size(&cl);
 989     assert(size == obj->size(), "sanity");
 990     curr += size;
 991   }
 992 }
 993 
 994 class G1CMRootRegionScanTask : public AbstractGangTask {
 995 private:
 996   G1ConcurrentMark* _cm;
 997 
 998 public:
 999   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1000     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
1001 
1002   void work(uint worker_id) {
1003     assert(Thread::current()->is_ConcurrentGC_thread(),
1004            "this should only be done by a conc GC thread");
1005 
1006     G1CMRootRegions* root_regions = _cm->root_regions();
1007     HeapRegion* hr = root_regions->claim_next();
1008     while (hr != NULL) {
1009       _cm->scanRootRegion(hr);
1010       hr = root_regions->claim_next();
1011     }
1012   }
1013 };
1014 
1015 void G1ConcurrentMark::scan_root_regions() {
1016   // scan_in_progress() will have been set to true only if there was
1017   // at least one root region to scan. So, if it's false, we
1018   // should not attempt to do any further work.
1019   if (root_regions()->scan_in_progress()) {
1020     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1021 
1022     _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
1023                                      // We distribute work on a per-region basis, so starting
1024                                      // more threads than that is useless.
1025                                      root_regions()->num_root_regions());
1026     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1027            "Maximum number of marking threads exceeded");
1028 
1029     G1CMRootRegionScanTask task(this);
1030     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1031                         task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
1032     _parallel_workers->run_task(&task, _parallel_marking_threads);
1033 
1034     // It's possible that has_aborted() is true here without actually
1035     // aborting the survivor scan earlier. This is OK as it's
1036     // mainly used for sanity checking.
1037     root_regions()->scan_finished();
1038   }
1039 }
1040 
1041 void G1ConcurrentMark::concurrent_cycle_start() {
1042   _gc_timer_cm->register_gc_start();
1043 
1044   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1045 
1046   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1047 }
1048 
1049 void G1ConcurrentMark::concurrent_cycle_end() {
1050   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1051 
1052   if (has_aborted()) {
1053     _gc_tracer_cm->report_concurrent_mode_failure();
1054   }
1055 
1056   _gc_timer_cm->register_gc_end();
1057 
1058   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1059 }
1060 
1061 void G1ConcurrentMark::mark_from_roots() {
1062   // we might be tempted to assert that:
1063   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1064   //        "inconsistent argument?");
1065   // However that wouldn't be right, because it's possible that
1066   // a safepoint is indeed in progress as a younger generation
1067   // stop-the-world GC happens even as we mark in this generation.
1068 
1069   _restart_for_overflow = false;
1070 
1071   // _g1h has _n_par_threads
1072   _parallel_marking_threads = calc_parallel_marking_threads();
1073   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1074     "Maximum number of marking threads exceeded");
1075 
1076   uint active_workers = MAX2(1U, parallel_marking_threads());
1077   assert(active_workers > 0, "Should have been set");
1078 
1079   // Setting active workers is not guaranteed since fewer
1080   // worker threads may currently exist and more may not be
1081   // available.
1082   active_workers = _parallel_workers->update_active_workers(active_workers);
1083   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1084 
1085   // Parallel task terminator is set in "set_concurrency_and_phase()"
1086   set_concurrency_and_phase(active_workers, true /* concurrent */);
1087 
1088   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1089   _parallel_workers->run_task(&markingTask);
1090   print_stats();
1091 }
1092 
1093 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1094   // world is stopped at this checkpoint
1095   assert(SafepointSynchronize::is_at_safepoint(),
1096          "world should be stopped");
1097 
1098   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1099 
1100   // If a full collection has happened, we shouldn't do this.
1101   if (has_aborted()) {
1102     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1103     return;
1104   }
1105 
1106   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1107 
1108   if (VerifyDuringGC) {
1109     HandleMark hm;  // handle scope
1110     g1h->prepare_for_verify();
1111     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1112   }
1113   g1h->verifier()->check_bitmaps("Remark Start");
1114 
1115   G1Policy* g1p = g1h->g1_policy();
1116   g1p->record_concurrent_mark_remark_start();
1117 
1118   double start = os::elapsedTime();
1119 
1120   checkpointRootsFinalWork();
1121 
1122   double mark_work_end = os::elapsedTime();
1123 
1124   weakRefsWork(clear_all_soft_refs);
1125 
1126   if (has_overflown()) {
1127     // We overflowed.  Restart concurrent marking.
1128     _restart_for_overflow = true;
1129 
1130     // Verify the heap w.r.t. the previous marking bitmap.
1131     if (VerifyDuringGC) {
1132       HandleMark hm;  // handle scope
1133       g1h->prepare_for_verify();
1134       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1135     }
1136 
1137     // Clear the marking state because we will be restarting
1138     // marking due to overflowing the global mark stack.
1139     reset_marking_state();
1140   } else {
1141     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1142     // We're done with marking.
1143     // This is the end of  the marking cycle, we're expected all
1144     // threads to have SATB queues with active set to true.
1145     satb_mq_set.set_active_all_threads(false, /* new active value */
1146                                        true /* expected_active */);
1147 
1148     if (VerifyDuringGC) {
1149       HandleMark hm;  // handle scope
1150       g1h->prepare_for_verify();
1151       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1152     }
1153     g1h->verifier()->check_bitmaps("Remark End");
1154     assert(!restart_for_overflow(), "sanity");
1155     // Completely reset the marking state since marking completed
1156     set_non_marking_state();
1157   }
1158 
1159   // Statistics
1160   double now = os::elapsedTime();
1161   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1162   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1163   _remark_times.add((now - start) * 1000.0);
1164 
1165   g1p->record_concurrent_mark_remark_end();
1166 
1167   G1CMIsAliveClosure is_alive(g1h);
1168   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1169 }
1170 
1171 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1172   G1CollectedHeap* _g1;
1173   size_t _freed_bytes;
1174   FreeRegionList* _local_cleanup_list;
1175   uint _old_regions_removed;
1176   uint _humongous_regions_removed;
1177   HRRSCleanupTask* _hrrs_cleanup_task;
1178 
1179 public:
1180   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1181                              FreeRegionList* local_cleanup_list,
1182                              HRRSCleanupTask* hrrs_cleanup_task) :
1183     _g1(g1),
1184     _freed_bytes(0),
1185     _local_cleanup_list(local_cleanup_list),
1186     _old_regions_removed(0),
1187     _humongous_regions_removed(0),
1188     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1189 
1190   size_t freed_bytes() { return _freed_bytes; }
1191   const uint old_regions_removed() { return _old_regions_removed; }
1192   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1193 
1194   bool doHeapRegion(HeapRegion *hr) {
1195     if (hr->is_archive()) {
1196       return false;
1197     }
1198     _g1->reset_gc_time_stamps(hr);
1199     hr->note_end_of_marking();
1200 
1201     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1202       _freed_bytes += hr->used();
1203       hr->set_containing_set(NULL);
1204       if (hr->is_humongous()) {
1205         _humongous_regions_removed++;
1206         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1207       } else {
1208         _old_regions_removed++;
1209         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1210       }
1211     } else {
1212       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1213     }
1214 
1215     return false;
1216   }
1217 };
1218 
1219 class G1ParNoteEndTask: public AbstractGangTask {
1220   friend class G1NoteEndOfConcMarkClosure;
1221 
1222 protected:
1223   G1CollectedHeap* _g1h;
1224   FreeRegionList* _cleanup_list;
1225   HeapRegionClaimer _hrclaimer;
1226 
1227 public:
1228   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1229       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1230   }
1231 
1232   void work(uint worker_id) {
1233     FreeRegionList local_cleanup_list("Local Cleanup List");
1234     HRRSCleanupTask hrrs_cleanup_task;
1235     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1236                                            &hrrs_cleanup_task);
1237     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1238     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1239 
1240     // Now update the lists
1241     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1242     {
1243       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1244       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1245 
1246       // If we iterate over the global cleanup list at the end of
1247       // cleanup to do this printing we will not guarantee to only
1248       // generate output for the newly-reclaimed regions (the list
1249       // might not be empty at the beginning of cleanup; we might
1250       // still be working on its previous contents). So we do the
1251       // printing here, before we append the new regions to the global
1252       // cleanup list.
1253 
1254       G1HRPrinter* hr_printer = _g1h->hr_printer();
1255       if (hr_printer->is_active()) {
1256         FreeRegionListIterator iter(&local_cleanup_list);
1257         while (iter.more_available()) {
1258           HeapRegion* hr = iter.get_next();
1259           hr_printer->cleanup(hr);
1260         }
1261       }
1262 
1263       _cleanup_list->add_ordered(&local_cleanup_list);
1264       assert(local_cleanup_list.is_empty(), "post-condition");
1265 
1266       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1267     }
1268   }
1269 };
1270 
1271 void G1ConcurrentMark::cleanup() {
1272   // world is stopped at this checkpoint
1273   assert(SafepointSynchronize::is_at_safepoint(),
1274          "world should be stopped");
1275   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1276 
1277   // If a full collection has happened, we shouldn't do this.
1278   if (has_aborted()) {
1279     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1280     return;
1281   }
1282 
1283   g1h->verifier()->verify_region_sets_optional();
1284 
1285   if (VerifyDuringGC) {
1286     HandleMark hm;  // handle scope
1287     g1h->prepare_for_verify();
1288     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1289   }
1290   g1h->verifier()->check_bitmaps("Cleanup Start");
1291 
1292   G1Policy* g1p = g1h->g1_policy();
1293   g1p->record_concurrent_mark_cleanup_start();
1294 
1295   double start = os::elapsedTime();
1296 
1297   HeapRegionRemSet::reset_for_cleanup_tasks();
1298 
1299   {
1300     GCTraceTime(Debug, gc)("Finalize Live Data");
1301     finalize_live_data();
1302   }
1303 
1304   if (VerifyDuringGC) {
1305     GCTraceTime(Debug, gc)("Verify Live Data");
1306     verify_live_data();
1307   }
1308 
1309   g1h->collector_state()->set_mark_in_progress(false);
1310 
1311   double count_end = os::elapsedTime();
1312   double this_final_counting_time = (count_end - start);
1313   _total_counting_time += this_final_counting_time;
1314 
1315   if (log_is_enabled(Trace, gc, liveness)) {
1316     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1317     _g1h->heap_region_iterate(&cl);
1318   }
1319 
1320   // Install newly created mark bitMap as "prev".
1321   swapMarkBitMaps();
1322 
1323   g1h->reset_gc_time_stamp();
1324 
1325   uint n_workers = _g1h->workers()->active_workers();
1326 
1327   // Note end of marking in all heap regions.
1328   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1329   g1h->workers()->run_task(&g1_par_note_end_task);
1330   g1h->check_gc_time_stamps();
1331 
1332   if (!cleanup_list_is_empty()) {
1333     // The cleanup list is not empty, so we'll have to process it
1334     // concurrently. Notify anyone else that might be wanting free
1335     // regions that there will be more free regions coming soon.
1336     g1h->set_free_regions_coming();
1337   }
1338 
1339   // call below, since it affects the metric by which we sort the heap
1340   // regions.
1341   if (G1ScrubRemSets) {
1342     double rs_scrub_start = os::elapsedTime();
1343     g1h->scrub_rem_set();
1344     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1345   }
1346 
1347   // this will also free any regions totally full of garbage objects,
1348   // and sort the regions.
1349   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1350 
1351   // Statistics.
1352   double end = os::elapsedTime();
1353   _cleanup_times.add((end - start) * 1000.0);
1354 
1355   // Clean up will have freed any regions completely full of garbage.
1356   // Update the soft reference policy with the new heap occupancy.
1357   Universe::update_heap_info_at_gc();
1358 
1359   if (VerifyDuringGC) {
1360     HandleMark hm;  // handle scope
1361     g1h->prepare_for_verify();
1362     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1363   }
1364 
1365   g1h->verifier()->check_bitmaps("Cleanup End");
1366 
1367   g1h->verifier()->verify_region_sets_optional();
1368 
1369   // We need to make this be a "collection" so any collection pause that
1370   // races with it goes around and waits for completeCleanup to finish.
1371   g1h->increment_total_collections();
1372 
1373   // Clean out dead classes and update Metaspace sizes.
1374   if (ClassUnloadingWithConcurrentMark) {
1375     ClassLoaderDataGraph::purge();
1376   }
1377   MetaspaceGC::compute_new_size();
1378 
1379   // We reclaimed old regions so we should calculate the sizes to make
1380   // sure we update the old gen/space data.
1381   g1h->g1mm()->update_sizes();
1382   g1h->allocation_context_stats().update_after_mark();
1383 }
1384 
1385 void G1ConcurrentMark::complete_cleanup() {
1386   if (has_aborted()) return;
1387 
1388   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1389 
1390   _cleanup_list.verify_optional();
1391   FreeRegionList tmp_free_list("Tmp Free List");
1392 
1393   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1394                                   "cleanup list has %u entries",
1395                                   _cleanup_list.length());
1396 
1397   // No one else should be accessing the _cleanup_list at this point,
1398   // so it is not necessary to take any locks
1399   while (!_cleanup_list.is_empty()) {
1400     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1401     assert(hr != NULL, "Got NULL from a non-empty list");
1402     hr->par_clear();
1403     tmp_free_list.add_ordered(hr);
1404 
1405     // Instead of adding one region at a time to the secondary_free_list,
1406     // we accumulate them in the local list and move them a few at a
1407     // time. This also cuts down on the number of notify_all() calls
1408     // we do during this process. We'll also append the local list when
1409     // _cleanup_list is empty (which means we just removed the last
1410     // region from the _cleanup_list).
1411     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1412         _cleanup_list.is_empty()) {
1413       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1414                                       "appending %u entries to the secondary_free_list, "
1415                                       "cleanup list still has %u entries",
1416                                       tmp_free_list.length(),
1417                                       _cleanup_list.length());
1418 
1419       {
1420         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1421         g1h->secondary_free_list_add(&tmp_free_list);
1422         SecondaryFreeList_lock->notify_all();
1423       }
1424 #ifndef PRODUCT
1425       if (G1StressConcRegionFreeing) {
1426         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1427           os::sleep(Thread::current(), (jlong) 1, false);
1428         }
1429       }
1430 #endif
1431     }
1432   }
1433   assert(tmp_free_list.is_empty(), "post-condition");
1434 }
1435 
1436 // Supporting Object and Oop closures for reference discovery
1437 // and processing in during marking
1438 
1439 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1440   HeapWord* addr = (HeapWord*)obj;
1441   return addr != NULL &&
1442          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1443 }
1444 
1445 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1446 // Uses the G1CMTask associated with a worker thread (for serial reference
1447 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1448 // trace referent objects.
1449 //
1450 // Using the G1CMTask and embedded local queues avoids having the worker
1451 // threads operating on the global mark stack. This reduces the risk
1452 // of overflowing the stack - which we would rather avoid at this late
1453 // state. Also using the tasks' local queues removes the potential
1454 // of the workers interfering with each other that could occur if
1455 // operating on the global stack.
1456 
1457 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1458   G1ConcurrentMark* _cm;
1459   G1CMTask*         _task;
1460   int               _ref_counter_limit;
1461   int               _ref_counter;
1462   bool              _is_serial;
1463  public:
1464   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1465     _cm(cm), _task(task), _is_serial(is_serial),
1466     _ref_counter_limit(G1RefProcDrainInterval) {
1467     assert(_ref_counter_limit > 0, "sanity");
1468     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1469     _ref_counter = _ref_counter_limit;
1470   }
1471 
1472   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1473   virtual void do_oop(      oop* p) { do_oop_work(p); }
1474 
1475   template <class T> void do_oop_work(T* p) {
1476     if (!_cm->has_overflown()) {
1477       oop obj = oopDesc::load_decode_heap_oop(p);
1478       _task->deal_with_reference(obj);
1479       _ref_counter--;
1480 
1481       if (_ref_counter == 0) {
1482         // We have dealt with _ref_counter_limit references, pushing them
1483         // and objects reachable from them on to the local stack (and
1484         // possibly the global stack). Call G1CMTask::do_marking_step() to
1485         // process these entries.
1486         //
1487         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1488         // there's nothing more to do (i.e. we're done with the entries that
1489         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1490         // above) or we overflow.
1491         //
1492         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1493         // flag while there may still be some work to do. (See the comment at
1494         // the beginning of G1CMTask::do_marking_step() for those conditions -
1495         // one of which is reaching the specified time target.) It is only
1496         // when G1CMTask::do_marking_step() returns without setting the
1497         // has_aborted() flag that the marking step has completed.
1498         do {
1499           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1500           _task->do_marking_step(mark_step_duration_ms,
1501                                  false      /* do_termination */,
1502                                  _is_serial);
1503         } while (_task->has_aborted() && !_cm->has_overflown());
1504         _ref_counter = _ref_counter_limit;
1505       }
1506     }
1507   }
1508 };
1509 
1510 // 'Drain' oop closure used by both serial and parallel reference processing.
1511 // Uses the G1CMTask associated with a given worker thread (for serial
1512 // reference processing the G1CMtask for worker 0 is used). Calls the
1513 // do_marking_step routine, with an unbelievably large timeout value,
1514 // to drain the marking data structures of the remaining entries
1515 // added by the 'keep alive' oop closure above.
1516 
1517 class G1CMDrainMarkingStackClosure: public VoidClosure {
1518   G1ConcurrentMark* _cm;
1519   G1CMTask*         _task;
1520   bool              _is_serial;
1521  public:
1522   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1523     _cm(cm), _task(task), _is_serial(is_serial) {
1524     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1525   }
1526 
1527   void do_void() {
1528     do {
1529       // We call G1CMTask::do_marking_step() to completely drain the local
1530       // and global marking stacks of entries pushed by the 'keep alive'
1531       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1532       //
1533       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1534       // if there's nothing more to do (i.e. we've completely drained the
1535       // entries that were pushed as a a result of applying the 'keep alive'
1536       // closure to the entries on the discovered ref lists) or we overflow
1537       // the global marking stack.
1538       //
1539       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1540       // flag while there may still be some work to do. (See the comment at
1541       // the beginning of G1CMTask::do_marking_step() for those conditions -
1542       // one of which is reaching the specified time target.) It is only
1543       // when G1CMTask::do_marking_step() returns without setting the
1544       // has_aborted() flag that the marking step has completed.
1545 
1546       _task->do_marking_step(1000000000.0 /* something very large */,
1547                              true         /* do_termination */,
1548                              _is_serial);
1549     } while (_task->has_aborted() && !_cm->has_overflown());
1550   }
1551 };
1552 
1553 // Implementation of AbstractRefProcTaskExecutor for parallel
1554 // reference processing at the end of G1 concurrent marking
1555 
1556 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1557 private:
1558   G1CollectedHeap*  _g1h;
1559   G1ConcurrentMark* _cm;
1560   WorkGang*         _workers;
1561   uint              _active_workers;
1562 
1563 public:
1564   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1565                           G1ConcurrentMark* cm,
1566                           WorkGang* workers,
1567                           uint n_workers) :
1568     _g1h(g1h), _cm(cm),
1569     _workers(workers), _active_workers(n_workers) { }
1570 
1571   // Executes the given task using concurrent marking worker threads.
1572   virtual void execute(ProcessTask& task);
1573   virtual void execute(EnqueueTask& task);
1574 };
1575 
1576 class G1CMRefProcTaskProxy: public AbstractGangTask {
1577   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1578   ProcessTask&      _proc_task;
1579   G1CollectedHeap*  _g1h;
1580   G1ConcurrentMark* _cm;
1581 
1582 public:
1583   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1584                        G1CollectedHeap* g1h,
1585                        G1ConcurrentMark* cm) :
1586     AbstractGangTask("Process reference objects in parallel"),
1587     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1588     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1589     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1590   }
1591 
1592   virtual void work(uint worker_id) {
1593     ResourceMark rm;
1594     HandleMark hm;
1595     G1CMTask* task = _cm->task(worker_id);
1596     G1CMIsAliveClosure g1_is_alive(_g1h);
1597     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1598     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1599 
1600     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1601   }
1602 };
1603 
1604 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1605   assert(_workers != NULL, "Need parallel worker threads.");
1606   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1607 
1608   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1609 
1610   // We need to reset the concurrency level before each
1611   // proxy task execution, so that the termination protocol
1612   // and overflow handling in G1CMTask::do_marking_step() knows
1613   // how many workers to wait for.
1614   _cm->set_concurrency(_active_workers);
1615   _workers->run_task(&proc_task_proxy);
1616 }
1617 
1618 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1619   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1620   EnqueueTask& _enq_task;
1621 
1622 public:
1623   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1624     AbstractGangTask("Enqueue reference objects in parallel"),
1625     _enq_task(enq_task) { }
1626 
1627   virtual void work(uint worker_id) {
1628     _enq_task.work(worker_id);
1629   }
1630 };
1631 
1632 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1633   assert(_workers != NULL, "Need parallel worker threads.");
1634   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1635 
1636   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1637 
1638   // Not strictly necessary but...
1639   //
1640   // We need to reset the concurrency level before each
1641   // proxy task execution, so that the termination protocol
1642   // and overflow handling in G1CMTask::do_marking_step() knows
1643   // how many workers to wait for.
1644   _cm->set_concurrency(_active_workers);
1645   _workers->run_task(&enq_task_proxy);
1646 }
1647 
1648 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1649   if (has_overflown()) {
1650     // Skip processing the discovered references if we have
1651     // overflown the global marking stack. Reference objects
1652     // only get discovered once so it is OK to not
1653     // de-populate the discovered reference lists. We could have,
1654     // but the only benefit would be that, when marking restarts,
1655     // less reference objects are discovered.
1656     return;
1657   }
1658 
1659   ResourceMark rm;
1660   HandleMark   hm;
1661 
1662   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1663 
1664   // Is alive closure.
1665   G1CMIsAliveClosure g1_is_alive(g1h);
1666 
1667   // Inner scope to exclude the cleaning of the string and symbol
1668   // tables from the displayed time.
1669   {
1670     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1671 
1672     ReferenceProcessor* rp = g1h->ref_processor_cm();
1673 
1674     // See the comment in G1CollectedHeap::ref_processing_init()
1675     // about how reference processing currently works in G1.
1676 
1677     // Set the soft reference policy
1678     rp->setup_policy(clear_all_soft_refs);
1679     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1680 
1681     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1682     // in serial reference processing. Note these closures are also
1683     // used for serially processing (by the the current thread) the
1684     // JNI references during parallel reference processing.
1685     //
1686     // These closures do not need to synchronize with the worker
1687     // threads involved in parallel reference processing as these
1688     // instances are executed serially by the current thread (e.g.
1689     // reference processing is not multi-threaded and is thus
1690     // performed by the current thread instead of a gang worker).
1691     //
1692     // The gang tasks involved in parallel reference processing create
1693     // their own instances of these closures, which do their own
1694     // synchronization among themselves.
1695     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1696     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1697 
1698     // We need at least one active thread. If reference processing
1699     // is not multi-threaded we use the current (VMThread) thread,
1700     // otherwise we use the work gang from the G1CollectedHeap and
1701     // we utilize all the worker threads we can.
1702     bool processing_is_mt = rp->processing_is_mt();
1703     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1704     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1705 
1706     // Parallel processing task executor.
1707     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1708                                               g1h->workers(), active_workers);
1709     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1710 
1711     // Set the concurrency level. The phase was already set prior to
1712     // executing the remark task.
1713     set_concurrency(active_workers);
1714 
1715     // Set the degree of MT processing here.  If the discovery was done MT,
1716     // the number of threads involved during discovery could differ from
1717     // the number of active workers.  This is OK as long as the discovered
1718     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1719     rp->set_active_mt_degree(active_workers);
1720 
1721     // Process the weak references.
1722     const ReferenceProcessorStats& stats =
1723         rp->process_discovered_references(&g1_is_alive,
1724                                           &g1_keep_alive,
1725                                           &g1_drain_mark_stack,
1726                                           executor,
1727                                           _gc_timer_cm);
1728     _gc_tracer_cm->report_gc_reference_stats(stats);
1729 
1730     // The do_oop work routines of the keep_alive and drain_marking_stack
1731     // oop closures will set the has_overflown flag if we overflow the
1732     // global marking stack.
1733 
1734     assert(has_overflown() || _global_mark_stack.is_empty(),
1735             "Mark stack should be empty (unless it has overflown)");
1736 
1737     assert(rp->num_q() == active_workers, "why not");
1738 
1739     rp->enqueue_discovered_references(executor);
1740 
1741     rp->verify_no_references_recorded();
1742     assert(!rp->discovery_enabled(), "Post condition");
1743   }
1744 
1745   if (has_overflown()) {
1746     // We can not trust g1_is_alive if the marking stack overflowed
1747     return;
1748   }
1749 
1750   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1751 
1752   // Unload Klasses, String, Symbols, Code Cache, etc.
1753   if (ClassUnloadingWithConcurrentMark) {
1754     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1755     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1756     g1h->complete_cleaning(&g1_is_alive, purged_classes);
1757   } else {
1758     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1759     // No need to clean string table and symbol table as they are treated as strong roots when
1760     // class unloading is disabled.
1761     g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1762 
1763   }
1764 }
1765 
1766 void G1ConcurrentMark::swapMarkBitMaps() {
1767   G1CMBitMapRO* temp = _prevMarkBitMap;
1768   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
1769   _nextMarkBitMap    = (G1CMBitMap*)  temp;
1770 }
1771 
1772 // Closure for marking entries in SATB buffers.
1773 class G1CMSATBBufferClosure : public SATBBufferClosure {
1774 private:
1775   G1CMTask* _task;
1776   G1CollectedHeap* _g1h;
1777 
1778   // This is very similar to G1CMTask::deal_with_reference, but with
1779   // more relaxed requirements for the argument, so this must be more
1780   // circumspect about treating the argument as an object.
1781   void do_entry(void* entry) const {
1782     _task->increment_refs_reached();
1783     HeapRegion* hr = _g1h->heap_region_containing(entry);
1784     if (entry < hr->next_top_at_mark_start()) {
1785       // Until we get here, we don't know whether entry refers to a valid
1786       // object; it could instead have been a stale reference.
1787       oop obj = static_cast<oop>(entry);
1788       assert(obj->is_oop(true /* ignore mark word */),
1789              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
1790       _task->make_reference_grey(obj);
1791     }
1792   }
1793 
1794 public:
1795   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1796     : _task(task), _g1h(g1h) { }
1797 
1798   virtual void do_buffer(void** buffer, size_t size) {
1799     for (size_t i = 0; i < size; ++i) {
1800       do_entry(buffer[i]);
1801     }
1802   }
1803 };
1804 
1805 class G1RemarkThreadsClosure : public ThreadClosure {
1806   G1CMSATBBufferClosure _cm_satb_cl;
1807   G1CMOopClosure _cm_cl;
1808   MarkingCodeBlobClosure _code_cl;
1809   int _thread_parity;
1810 
1811  public:
1812   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1813     _cm_satb_cl(task, g1h),
1814     _cm_cl(g1h, g1h->concurrent_mark(), task),
1815     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1816     _thread_parity(Threads::thread_claim_parity()) {}
1817 
1818   void do_thread(Thread* thread) {
1819     if (thread->is_Java_thread()) {
1820       if (thread->claim_oops_do(true, _thread_parity)) {
1821         JavaThread* jt = (JavaThread*)thread;
1822 
1823         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1824         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1825         // * Alive if on the stack of an executing method
1826         // * Weakly reachable otherwise
1827         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1828         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1829         jt->nmethods_do(&_code_cl);
1830 
1831         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1832       }
1833     } else if (thread->is_VM_thread()) {
1834       if (thread->claim_oops_do(true, _thread_parity)) {
1835         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1836       }
1837     }
1838   }
1839 };
1840 
1841 class G1CMRemarkTask: public AbstractGangTask {
1842 private:
1843   G1ConcurrentMark* _cm;
1844 public:
1845   void work(uint worker_id) {
1846     // Since all available tasks are actually started, we should
1847     // only proceed if we're supposed to be active.
1848     if (worker_id < _cm->active_tasks()) {
1849       G1CMTask* task = _cm->task(worker_id);
1850       task->record_start_time();
1851       {
1852         ResourceMark rm;
1853         HandleMark hm;
1854 
1855         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1856         Threads::threads_do(&threads_f);
1857       }
1858 
1859       do {
1860         task->do_marking_step(1000000000.0 /* something very large */,
1861                               true         /* do_termination       */,
1862                               false        /* is_serial            */);
1863       } while (task->has_aborted() && !_cm->has_overflown());
1864       // If we overflow, then we do not want to restart. We instead
1865       // want to abort remark and do concurrent marking again.
1866       task->record_end_time();
1867     }
1868   }
1869 
1870   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1871     AbstractGangTask("Par Remark"), _cm(cm) {
1872     _cm->terminator()->reset_for_reuse(active_workers);
1873   }
1874 };
1875 
1876 void G1ConcurrentMark::checkpointRootsFinalWork() {
1877   ResourceMark rm;
1878   HandleMark   hm;
1879   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1880 
1881   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1882 
1883   g1h->ensure_parsability(false);
1884 
1885   // this is remark, so we'll use up all active threads
1886   uint active_workers = g1h->workers()->active_workers();
1887   set_concurrency_and_phase(active_workers, false /* concurrent */);
1888   // Leave _parallel_marking_threads at it's
1889   // value originally calculated in the G1ConcurrentMark
1890   // constructor and pass values of the active workers
1891   // through the gang in the task.
1892 
1893   {
1894     StrongRootsScope srs(active_workers);
1895 
1896     G1CMRemarkTask remarkTask(this, active_workers);
1897     // We will start all available threads, even if we decide that the
1898     // active_workers will be fewer. The extra ones will just bail out
1899     // immediately.
1900     g1h->workers()->run_task(&remarkTask);
1901   }
1902 
1903   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1904   guarantee(has_overflown() ||
1905             satb_mq_set.completed_buffers_num() == 0,
1906             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1907             BOOL_TO_STR(has_overflown()),
1908             satb_mq_set.completed_buffers_num());
1909 
1910   print_stats();
1911 }
1912 
1913 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1914   // Note we are overriding the read-only view of the prev map here, via
1915   // the cast.
1916   ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr);
1917 }
1918 
1919 HeapRegion*
1920 G1ConcurrentMark::claim_region(uint worker_id) {
1921   // "checkpoint" the finger
1922   HeapWord* finger = _finger;
1923 
1924   // _heap_end will not change underneath our feet; it only changes at
1925   // yield points.
1926   while (finger < _heap_end) {
1927     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1928 
1929     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1930     // Make sure that the reads below do not float before loading curr_region.
1931     OrderAccess::loadload();
1932     // Above heap_region_containing may return NULL as we always scan claim
1933     // until the end of the heap. In this case, just jump to the next region.
1934     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1935 
1936     // Is the gap between reading the finger and doing the CAS too long?
1937     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1938     if (res == finger && curr_region != NULL) {
1939       // we succeeded
1940       HeapWord*   bottom        = curr_region->bottom();
1941       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1942 
1943       // notice that _finger == end cannot be guaranteed here since,
1944       // someone else might have moved the finger even further
1945       assert(_finger >= end, "the finger should have moved forward");
1946 
1947       if (limit > bottom) {
1948         return curr_region;
1949       } else {
1950         assert(limit == bottom,
1951                "the region limit should be at bottom");
1952         // we return NULL and the caller should try calling
1953         // claim_region() again.
1954         return NULL;
1955       }
1956     } else {
1957       assert(_finger > finger, "the finger should have moved forward");
1958       // read it again
1959       finger = _finger;
1960     }
1961   }
1962 
1963   return NULL;
1964 }
1965 
1966 #ifndef PRODUCT
1967 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1968 private:
1969   G1CollectedHeap* _g1h;
1970   const char* _phase;
1971   int _info;
1972 
1973 public:
1974   VerifyNoCSetOops(const char* phase, int info = -1) :
1975     _g1h(G1CollectedHeap::heap()),
1976     _phase(phase),
1977     _info(info)
1978   { }
1979 
1980   void operator()(G1TaskQueueEntry task_entry) const {
1981     if (task_entry.is_array_slice()) {
1982       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1983       return;
1984     }
1985     guarantee(task_entry.obj()->is_oop(),
1986               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1987               p2i(task_entry.obj()), _phase, _info);
1988     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1989               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1990               p2i(task_entry.obj()), _phase, _info);
1991   }
1992 };
1993 
1994 void G1ConcurrentMark::verify_no_cset_oops() {
1995   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1996   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1997     return;
1998   }
1999 
2000   // Verify entries on the global mark stack
2001   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2002 
2003   // Verify entries on the task queues
2004   for (uint i = 0; i < _max_worker_id; ++i) {
2005     G1CMTaskQueue* queue = _task_queues->queue(i);
2006     queue->iterate(VerifyNoCSetOops("Queue", i));
2007   }
2008 
2009   // Verify the global finger
2010   HeapWord* global_finger = finger();
2011   if (global_finger != NULL && global_finger < _heap_end) {
2012     // Since we always iterate over all regions, we might get a NULL HeapRegion
2013     // here.
2014     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2015     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2016               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2017               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2018   }
2019 
2020   // Verify the task fingers
2021   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2022   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2023     G1CMTask* task = _tasks[i];
2024     HeapWord* task_finger = task->finger();
2025     if (task_finger != NULL && task_finger < _heap_end) {
2026       // See above note on the global finger verification.
2027       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2028       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2029                 !task_hr->in_collection_set(),
2030                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2031                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2032     }
2033   }
2034 }
2035 #endif // PRODUCT
2036 void G1ConcurrentMark::create_live_data() {
2037   _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
2038 }
2039 
2040 void G1ConcurrentMark::finalize_live_data() {
2041   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
2042 }
2043 
2044 void G1ConcurrentMark::verify_live_data() {
2045   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
2046 }
2047 
2048 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
2049   _g1h->g1_rem_set()->clear_card_live_data(workers);
2050 }
2051 
2052 #ifdef ASSERT
2053 void G1ConcurrentMark::verify_live_data_clear() {
2054   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
2055 }
2056 #endif
2057 
2058 void G1ConcurrentMark::print_stats() {
2059   if (!log_is_enabled(Debug, gc, stats)) {
2060     return;
2061   }
2062   log_debug(gc, stats)("---------------------------------------------------------------------");
2063   for (size_t i = 0; i < _active_tasks; ++i) {
2064     _tasks[i]->print_stats();
2065     log_debug(gc, stats)("---------------------------------------------------------------------");
2066   }
2067 }
2068 
2069 void G1ConcurrentMark::abort() {
2070   if (!cmThread()->during_cycle() || _has_aborted) {
2071     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2072     return;
2073   }
2074 
2075   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2076   // concurrent bitmap clearing.
2077   {
2078     GCTraceTime(Debug, gc)("Clear Next Bitmap");
2079     clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2080   }
2081   // Note we cannot clear the previous marking bitmap here
2082   // since VerifyDuringGC verifies the objects marked during
2083   // a full GC against the previous bitmap.
2084 
2085   {
2086     GCTraceTime(Debug, gc)("Clear Live Data");
2087     clear_live_data(_g1h->workers());
2088   }
2089   DEBUG_ONLY({
2090     GCTraceTime(Debug, gc)("Verify Live Data Clear");
2091     verify_live_data_clear();
2092   })
2093   // Empty mark stack
2094   reset_marking_state();
2095   for (uint i = 0; i < _max_worker_id; ++i) {
2096     _tasks[i]->clear_region_fields();
2097   }
2098   _first_overflow_barrier_sync.abort();
2099   _second_overflow_barrier_sync.abort();
2100   _has_aborted = true;
2101 
2102   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2103   satb_mq_set.abandon_partial_marking();
2104   // This can be called either during or outside marking, we'll read
2105   // the expected_active value from the SATB queue set.
2106   satb_mq_set.set_active_all_threads(
2107                                  false, /* new active value */
2108                                  satb_mq_set.is_active() /* expected_active */);
2109 }
2110 
2111 static void print_ms_time_info(const char* prefix, const char* name,
2112                                NumberSeq& ns) {
2113   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2114                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2115   if (ns.num() > 0) {
2116     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2117                            prefix, ns.sd(), ns.maximum());
2118   }
2119 }
2120 
2121 void G1ConcurrentMark::print_summary_info() {
2122   Log(gc, marking) log;
2123   if (!log.is_trace()) {
2124     return;
2125   }
2126 
2127   log.trace(" Concurrent marking:");
2128   print_ms_time_info("  ", "init marks", _init_times);
2129   print_ms_time_info("  ", "remarks", _remark_times);
2130   {
2131     print_ms_time_info("     ", "final marks", _remark_mark_times);
2132     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2133 
2134   }
2135   print_ms_time_info("  ", "cleanups", _cleanup_times);
2136   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2137             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2138   if (G1ScrubRemSets) {
2139     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2140               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2141   }
2142   log.trace("  Total stop_world time = %8.2f s.",
2143             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2144   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2145             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2146 }
2147 
2148 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2149   _parallel_workers->print_worker_threads_on(st);
2150 }
2151 
2152 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2153   _parallel_workers->threads_do(tc);
2154 }
2155 
2156 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2157   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2158       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2159   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2160   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2161 }
2162 
2163 // Closure for iteration over bitmaps
2164 class G1CMBitMapClosure : public BitMapClosure {
2165 private:
2166   // the bitmap that is being iterated over
2167   G1CMBitMap*                 _nextMarkBitMap;
2168   G1ConcurrentMark*           _cm;
2169   G1CMTask*                   _task;
2170 
2171 public:
2172   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2173     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2174 
2175   bool do_bit(size_t offset) {
2176     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2177     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2178     assert( addr < _cm->finger(), "invariant");
2179     assert(addr >= _task->finger(), "invariant");
2180 
2181     // We move that task's local finger along.
2182     _task->move_finger_to(addr);
2183 
2184     _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
2185     // we only partially drain the local queue and global stack
2186     _task->drain_local_queue(true);
2187     _task->drain_global_stack(true);
2188 
2189     // if the has_aborted flag has been raised, we need to bail out of
2190     // the iteration
2191     return !_task->has_aborted();
2192   }
2193 };
2194 
2195 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2196   ReferenceProcessor* result = g1h->ref_processor_cm();
2197   assert(result != NULL, "CM reference processor should not be NULL");
2198   return result;
2199 }
2200 
2201 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2202                                G1ConcurrentMark* cm,
2203                                G1CMTask* task)
2204   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2205     _g1h(g1h), _cm(cm), _task(task)
2206 { }
2207 
2208 void G1CMTask::setup_for_region(HeapRegion* hr) {
2209   assert(hr != NULL,
2210         "claim_region() should have filtered out NULL regions");
2211   _curr_region  = hr;
2212   _finger       = hr->bottom();
2213   update_region_limit();
2214 }
2215 
2216 void G1CMTask::update_region_limit() {
2217   HeapRegion* hr            = _curr_region;
2218   HeapWord* bottom          = hr->bottom();
2219   HeapWord* limit           = hr->next_top_at_mark_start();
2220 
2221   if (limit == bottom) {
2222     // The region was collected underneath our feet.
2223     // We set the finger to bottom to ensure that the bitmap
2224     // iteration that will follow this will not do anything.
2225     // (this is not a condition that holds when we set the region up,
2226     // as the region is not supposed to be empty in the first place)
2227     _finger = bottom;
2228   } else if (limit >= _region_limit) {
2229     assert(limit >= _finger, "peace of mind");
2230   } else {
2231     assert(limit < _region_limit, "only way to get here");
2232     // This can happen under some pretty unusual circumstances.  An
2233     // evacuation pause empties the region underneath our feet (NTAMS
2234     // at bottom). We then do some allocation in the region (NTAMS
2235     // stays at bottom), followed by the region being used as a GC
2236     // alloc region (NTAMS will move to top() and the objects
2237     // originally below it will be grayed). All objects now marked in
2238     // the region are explicitly grayed, if below the global finger,
2239     // and we do not need in fact to scan anything else. So, we simply
2240     // set _finger to be limit to ensure that the bitmap iteration
2241     // doesn't do anything.
2242     _finger = limit;
2243   }
2244 
2245   _region_limit = limit;
2246 }
2247 
2248 void G1CMTask::giveup_current_region() {
2249   assert(_curr_region != NULL, "invariant");
2250   clear_region_fields();
2251 }
2252 
2253 void G1CMTask::clear_region_fields() {
2254   // Values for these three fields that indicate that we're not
2255   // holding on to a region.
2256   _curr_region   = NULL;
2257   _finger        = NULL;
2258   _region_limit  = NULL;
2259 }
2260 
2261 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2262   if (cm_oop_closure == NULL) {
2263     assert(_cm_oop_closure != NULL, "invariant");
2264   } else {
2265     assert(_cm_oop_closure == NULL, "invariant");
2266   }
2267   _cm_oop_closure = cm_oop_closure;
2268 }
2269 
2270 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2271   guarantee(nextMarkBitMap != NULL, "invariant");
2272   _nextMarkBitMap                = nextMarkBitMap;
2273   clear_region_fields();
2274 
2275   _calls                         = 0;
2276   _elapsed_time_ms               = 0.0;
2277   _termination_time_ms           = 0.0;
2278   _termination_start_time_ms     = 0.0;
2279 }
2280 
2281 bool G1CMTask::should_exit_termination() {
2282   regular_clock_call();
2283   // This is called when we are in the termination protocol. We should
2284   // quit if, for some reason, this task wants to abort or the global
2285   // stack is not empty (this means that we can get work from it).
2286   return !_cm->mark_stack_empty() || has_aborted();
2287 }
2288 
2289 void G1CMTask::reached_limit() {
2290   assert(_words_scanned >= _words_scanned_limit ||
2291          _refs_reached >= _refs_reached_limit ,
2292          "shouldn't have been called otherwise");
2293   regular_clock_call();
2294 }
2295 
2296 void G1CMTask::regular_clock_call() {
2297   if (has_aborted()) return;
2298 
2299   // First, we need to recalculate the words scanned and refs reached
2300   // limits for the next clock call.
2301   recalculate_limits();
2302 
2303   // During the regular clock call we do the following
2304 
2305   // (1) If an overflow has been flagged, then we abort.
2306   if (_cm->has_overflown()) {
2307     set_has_aborted();
2308     return;
2309   }
2310 
2311   // If we are not concurrent (i.e. we're doing remark) we don't need
2312   // to check anything else. The other steps are only needed during
2313   // the concurrent marking phase.
2314   if (!concurrent()) return;
2315 
2316   // (2) If marking has been aborted for Full GC, then we also abort.
2317   if (_cm->has_aborted()) {
2318     set_has_aborted();
2319     return;
2320   }
2321 
2322   double curr_time_ms = os::elapsedVTime() * 1000.0;
2323 
2324   // (4) We check whether we should yield. If we have to, then we abort.
2325   if (SuspendibleThreadSet::should_yield()) {
2326     // We should yield. To do this we abort the task. The caller is
2327     // responsible for yielding.
2328     set_has_aborted();
2329     return;
2330   }
2331 
2332   // (5) We check whether we've reached our time quota. If we have,
2333   // then we abort.
2334   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2335   if (elapsed_time_ms > _time_target_ms) {
2336     set_has_aborted();
2337     _has_timed_out = true;
2338     return;
2339   }
2340 
2341   // (6) Finally, we check whether there are enough completed STAB
2342   // buffers available for processing. If there are, we abort.
2343   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2344   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2345     // we do need to process SATB buffers, we'll abort and restart
2346     // the marking task to do so
2347     set_has_aborted();
2348     return;
2349   }
2350 }
2351 
2352 void G1CMTask::recalculate_limits() {
2353   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2354   _words_scanned_limit      = _real_words_scanned_limit;
2355 
2356   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2357   _refs_reached_limit       = _real_refs_reached_limit;
2358 }
2359 
2360 void G1CMTask::decrease_limits() {
2361   // This is called when we believe that we're going to do an infrequent
2362   // operation which will increase the per byte scanned cost (i.e. move
2363   // entries to/from the global stack). It basically tries to decrease the
2364   // scanning limit so that the clock is called earlier.
2365 
2366   _words_scanned_limit = _real_words_scanned_limit -
2367     3 * words_scanned_period / 4;
2368   _refs_reached_limit  = _real_refs_reached_limit -
2369     3 * refs_reached_period / 4;
2370 }
2371 
2372 void G1CMTask::move_entries_to_global_stack() {
2373   // Local array where we'll store the entries that will be popped
2374   // from the local queue.
2375   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2376 
2377   size_t n = 0;
2378   G1TaskQueueEntry task_entry;
2379   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2380     buffer[n] = task_entry;
2381     ++n;
2382   }
2383   if (n < G1CMMarkStack::EntriesPerChunk) {
2384     buffer[n] = G1TaskQueueEntry();
2385   }
2386 
2387   if (n > 0) {
2388     if (!_cm->mark_stack_push(buffer)) {
2389       set_has_aborted();
2390     }
2391   }
2392 
2393   // This operation was quite expensive, so decrease the limits.
2394   decrease_limits();
2395 }
2396 
2397 bool G1CMTask::get_entries_from_global_stack() {
2398   // Local array where we'll store the entries that will be popped
2399   // from the global stack.
2400   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2401 
2402   if (!_cm->mark_stack_pop(buffer)) {
2403     return false;
2404   }
2405 
2406   // We did actually pop at least one entry.
2407   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2408     G1TaskQueueEntry task_entry = buffer[i];
2409     if (task_entry.is_null()) {
2410       break;
2411     }
2412     assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2413     bool success = _task_queue->push(task_entry);
2414     // We only call this when the local queue is empty or under a
2415     // given target limit. So, we do not expect this push to fail.
2416     assert(success, "invariant");
2417   }
2418 
2419   // This operation was quite expensive, so decrease the limits
2420   decrease_limits();
2421   return true;
2422 }
2423 
2424 void G1CMTask::drain_local_queue(bool partially) {
2425   if (has_aborted()) {
2426     return;
2427   }
2428 
2429   // Decide what the target size is, depending whether we're going to
2430   // drain it partially (so that other tasks can steal if they run out
2431   // of things to do) or totally (at the very end).
2432   size_t target_size;
2433   if (partially) {
2434     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2435   } else {
2436     target_size = 0;
2437   }
2438 
2439   if (_task_queue->size() > target_size) {
2440     G1TaskQueueEntry entry;
2441     bool ret = _task_queue->pop_local(entry);
2442     while (ret) {
2443       scan_task_entry(entry);
2444       if (_task_queue->size() <= target_size || has_aborted()) {
2445         ret = false;
2446       } else {
2447         ret = _task_queue->pop_local(entry);
2448       }
2449     }
2450   }
2451 }
2452 
2453 void G1CMTask::drain_global_stack(bool partially) {
2454   if (has_aborted()) return;
2455 
2456   // We have a policy to drain the local queue before we attempt to
2457   // drain the global stack.
2458   assert(partially || _task_queue->size() == 0, "invariant");
2459 
2460   // Decide what the target size is, depending whether we're going to
2461   // drain it partially (so that other tasks can steal if they run out
2462   // of things to do) or totally (at the very end).
2463   // Notice that when draining the global mark stack partially, due to the racyness
2464   // of the mark stack size update we might in fact drop below the target. But,
2465   // this is not a problem.
2466   // In case of total draining, we simply process until the global mark stack is
2467   // totally empty, disregarding the size counter.
2468   if (partially) {
2469     size_t const target_size = _cm->partial_mark_stack_size_target();
2470     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2471       if (get_entries_from_global_stack()) {
2472         drain_local_queue(partially);
2473       }
2474     }
2475   } else {
2476     while (!has_aborted() && get_entries_from_global_stack()) {
2477       drain_local_queue(partially);
2478     }
2479   }
2480 }
2481 
2482 // SATB Queue has several assumptions on whether to call the par or
2483 // non-par versions of the methods. this is why some of the code is
2484 // replicated. We should really get rid of the single-threaded version
2485 // of the code to simplify things.
2486 void G1CMTask::drain_satb_buffers() {
2487   if (has_aborted()) return;
2488 
2489   // We set this so that the regular clock knows that we're in the
2490   // middle of draining buffers and doesn't set the abort flag when it
2491   // notices that SATB buffers are available for draining. It'd be
2492   // very counter productive if it did that. :-)
2493   _draining_satb_buffers = true;
2494 
2495   G1CMSATBBufferClosure satb_cl(this, _g1h);
2496   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2497 
2498   // This keeps claiming and applying the closure to completed buffers
2499   // until we run out of buffers or we need to abort.
2500   while (!has_aborted() &&
2501          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2502     regular_clock_call();
2503   }
2504 
2505   _draining_satb_buffers = false;
2506 
2507   assert(has_aborted() ||
2508          concurrent() ||
2509          satb_mq_set.completed_buffers_num() == 0, "invariant");
2510 
2511   // again, this was a potentially expensive operation, decrease the
2512   // limits to get the regular clock call early
2513   decrease_limits();
2514 }
2515 
2516 void G1CMTask::print_stats() {
2517   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2518                        _worker_id, _calls);
2519   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2520                        _elapsed_time_ms, _termination_time_ms);
2521   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2522                        _step_times_ms.num(), _step_times_ms.avg(),
2523                        _step_times_ms.sd());
2524   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2525                        _step_times_ms.maximum(), _step_times_ms.sum());
2526 }
2527 
2528 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2529   return _task_queues->steal(worker_id, hash_seed, task_entry);
2530 }
2531 
2532 /*****************************************************************************
2533 
2534     The do_marking_step(time_target_ms, ...) method is the building
2535     block of the parallel marking framework. It can be called in parallel
2536     with other invocations of do_marking_step() on different tasks
2537     (but only one per task, obviously) and concurrently with the
2538     mutator threads, or during remark, hence it eliminates the need
2539     for two versions of the code. When called during remark, it will
2540     pick up from where the task left off during the concurrent marking
2541     phase. Interestingly, tasks are also claimable during evacuation
2542     pauses too, since do_marking_step() ensures that it aborts before
2543     it needs to yield.
2544 
2545     The data structures that it uses to do marking work are the
2546     following:
2547 
2548       (1) Marking Bitmap. If there are gray objects that appear only
2549       on the bitmap (this happens either when dealing with an overflow
2550       or when the initial marking phase has simply marked the roots
2551       and didn't push them on the stack), then tasks claim heap
2552       regions whose bitmap they then scan to find gray objects. A
2553       global finger indicates where the end of the last claimed region
2554       is. A local finger indicates how far into the region a task has
2555       scanned. The two fingers are used to determine how to gray an
2556       object (i.e. whether simply marking it is OK, as it will be
2557       visited by a task in the future, or whether it needs to be also
2558       pushed on a stack).
2559 
2560       (2) Local Queue. The local queue of the task which is accessed
2561       reasonably efficiently by the task. Other tasks can steal from
2562       it when they run out of work. Throughout the marking phase, a
2563       task attempts to keep its local queue short but not totally
2564       empty, so that entries are available for stealing by other
2565       tasks. Only when there is no more work, a task will totally
2566       drain its local queue.
2567 
2568       (3) Global Mark Stack. This handles local queue overflow. During
2569       marking only sets of entries are moved between it and the local
2570       queues, as access to it requires a mutex and more fine-grain
2571       interaction with it which might cause contention. If it
2572       overflows, then the marking phase should restart and iterate
2573       over the bitmap to identify gray objects. Throughout the marking
2574       phase, tasks attempt to keep the global mark stack at a small
2575       length but not totally empty, so that entries are available for
2576       popping by other tasks. Only when there is no more work, tasks
2577       will totally drain the global mark stack.
2578 
2579       (4) SATB Buffer Queue. This is where completed SATB buffers are
2580       made available. Buffers are regularly removed from this queue
2581       and scanned for roots, so that the queue doesn't get too
2582       long. During remark, all completed buffers are processed, as
2583       well as the filled in parts of any uncompleted buffers.
2584 
2585     The do_marking_step() method tries to abort when the time target
2586     has been reached. There are a few other cases when the
2587     do_marking_step() method also aborts:
2588 
2589       (1) When the marking phase has been aborted (after a Full GC).
2590 
2591       (2) When a global overflow (on the global stack) has been
2592       triggered. Before the task aborts, it will actually sync up with
2593       the other tasks to ensure that all the marking data structures
2594       (local queues, stacks, fingers etc.)  are re-initialized so that
2595       when do_marking_step() completes, the marking phase can
2596       immediately restart.
2597 
2598       (3) When enough completed SATB buffers are available. The
2599       do_marking_step() method only tries to drain SATB buffers right
2600       at the beginning. So, if enough buffers are available, the
2601       marking step aborts and the SATB buffers are processed at
2602       the beginning of the next invocation.
2603 
2604       (4) To yield. when we have to yield then we abort and yield
2605       right at the end of do_marking_step(). This saves us from a lot
2606       of hassle as, by yielding we might allow a Full GC. If this
2607       happens then objects will be compacted underneath our feet, the
2608       heap might shrink, etc. We save checking for this by just
2609       aborting and doing the yield right at the end.
2610 
2611     From the above it follows that the do_marking_step() method should
2612     be called in a loop (or, otherwise, regularly) until it completes.
2613 
2614     If a marking step completes without its has_aborted() flag being
2615     true, it means it has completed the current marking phase (and
2616     also all other marking tasks have done so and have all synced up).
2617 
2618     A method called regular_clock_call() is invoked "regularly" (in
2619     sub ms intervals) throughout marking. It is this clock method that
2620     checks all the abort conditions which were mentioned above and
2621     decides when the task should abort. A work-based scheme is used to
2622     trigger this clock method: when the number of object words the
2623     marking phase has scanned or the number of references the marking
2624     phase has visited reach a given limit. Additional invocations to
2625     the method clock have been planted in a few other strategic places
2626     too. The initial reason for the clock method was to avoid calling
2627     vtime too regularly, as it is quite expensive. So, once it was in
2628     place, it was natural to piggy-back all the other conditions on it
2629     too and not constantly check them throughout the code.
2630 
2631     If do_termination is true then do_marking_step will enter its
2632     termination protocol.
2633 
2634     The value of is_serial must be true when do_marking_step is being
2635     called serially (i.e. by the VMThread) and do_marking_step should
2636     skip any synchronization in the termination and overflow code.
2637     Examples include the serial remark code and the serial reference
2638     processing closures.
2639 
2640     The value of is_serial must be false when do_marking_step is
2641     being called by any of the worker threads in a work gang.
2642     Examples include the concurrent marking code (CMMarkingTask),
2643     the MT remark code, and the MT reference processing closures.
2644 
2645  *****************************************************************************/
2646 
2647 void G1CMTask::do_marking_step(double time_target_ms,
2648                                bool do_termination,
2649                                bool is_serial) {
2650   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2651   assert(concurrent() == _cm->concurrent(), "they should be the same");
2652 
2653   G1Policy* g1_policy = _g1h->g1_policy();
2654   assert(_task_queues != NULL, "invariant");
2655   assert(_task_queue != NULL, "invariant");
2656   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2657 
2658   assert(!_claimed,
2659          "only one thread should claim this task at any one time");
2660 
2661   // OK, this doesn't safeguard again all possible scenarios, as it is
2662   // possible for two threads to set the _claimed flag at the same
2663   // time. But it is only for debugging purposes anyway and it will
2664   // catch most problems.
2665   _claimed = true;
2666 
2667   _start_time_ms = os::elapsedVTime() * 1000.0;
2668 
2669   // If do_stealing is true then do_marking_step will attempt to
2670   // steal work from the other G1CMTasks. It only makes sense to
2671   // enable stealing when the termination protocol is enabled
2672   // and do_marking_step() is not being called serially.
2673   bool do_stealing = do_termination && !is_serial;
2674 
2675   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2676   _time_target_ms = time_target_ms - diff_prediction_ms;
2677 
2678   // set up the variables that are used in the work-based scheme to
2679   // call the regular clock method
2680   _words_scanned = 0;
2681   _refs_reached  = 0;
2682   recalculate_limits();
2683 
2684   // clear all flags
2685   clear_has_aborted();
2686   _has_timed_out = false;
2687   _draining_satb_buffers = false;
2688 
2689   ++_calls;
2690 
2691   // Set up the bitmap and oop closures. Anything that uses them is
2692   // eventually called from this method, so it is OK to allocate these
2693   // statically.
2694   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
2695   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2696   set_cm_oop_closure(&cm_oop_closure);
2697 
2698   if (_cm->has_overflown()) {
2699     // This can happen if the mark stack overflows during a GC pause
2700     // and this task, after a yield point, restarts. We have to abort
2701     // as we need to get into the overflow protocol which happens
2702     // right at the end of this task.
2703     set_has_aborted();
2704   }
2705 
2706   // First drain any available SATB buffers. After this, we will not
2707   // look at SATB buffers before the next invocation of this method.
2708   // If enough completed SATB buffers are queued up, the regular clock
2709   // will abort this task so that it restarts.
2710   drain_satb_buffers();
2711   // ...then partially drain the local queue and the global stack
2712   drain_local_queue(true);
2713   drain_global_stack(true);
2714 
2715   do {
2716     if (!has_aborted() && _curr_region != NULL) {
2717       // This means that we're already holding on to a region.
2718       assert(_finger != NULL, "if region is not NULL, then the finger "
2719              "should not be NULL either");
2720 
2721       // We might have restarted this task after an evacuation pause
2722       // which might have evacuated the region we're holding on to
2723       // underneath our feet. Let's read its limit again to make sure
2724       // that we do not iterate over a region of the heap that
2725       // contains garbage (update_region_limit() will also move
2726       // _finger to the start of the region if it is found empty).
2727       update_region_limit();
2728       // We will start from _finger not from the start of the region,
2729       // as we might be restarting this task after aborting half-way
2730       // through scanning this region. In this case, _finger points to
2731       // the address where we last found a marked object. If this is a
2732       // fresh region, _finger points to start().
2733       MemRegion mr = MemRegion(_finger, _region_limit);
2734 
2735       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2736              "humongous regions should go around loop once only");
2737 
2738       // Some special cases:
2739       // If the memory region is empty, we can just give up the region.
2740       // If the current region is humongous then we only need to check
2741       // the bitmap for the bit associated with the start of the object,
2742       // scan the object if it's live, and give up the region.
2743       // Otherwise, let's iterate over the bitmap of the part of the region
2744       // that is left.
2745       // If the iteration is successful, give up the region.
2746       if (mr.is_empty()) {
2747         giveup_current_region();
2748         regular_clock_call();
2749       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2750         if (_nextMarkBitMap->isMarked(mr.start())) {
2751           // The object is marked - apply the closure
2752           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
2753           bitmap_closure.do_bit(offset);
2754         }
2755         // Even if this task aborted while scanning the humongous object
2756         // we can (and should) give up the current region.
2757         giveup_current_region();
2758         regular_clock_call();
2759       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2760         giveup_current_region();
2761         regular_clock_call();
2762       } else {
2763         assert(has_aborted(), "currently the only way to do so");
2764         // The only way to abort the bitmap iteration is to return
2765         // false from the do_bit() method. However, inside the
2766         // do_bit() method we move the _finger to point to the
2767         // object currently being looked at. So, if we bail out, we
2768         // have definitely set _finger to something non-null.
2769         assert(_finger != NULL, "invariant");
2770 
2771         // Region iteration was actually aborted. So now _finger
2772         // points to the address of the object we last scanned. If we
2773         // leave it there, when we restart this task, we will rescan
2774         // the object. It is easy to avoid this. We move the finger by
2775         // enough to point to the next possible object header (the
2776         // bitmap knows by how much we need to move it as it knows its
2777         // granularity).
2778         assert(_finger < _region_limit, "invariant");
2779         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
2780         // Check if bitmap iteration was aborted while scanning the last object
2781         if (new_finger >= _region_limit) {
2782           giveup_current_region();
2783         } else {
2784           move_finger_to(new_finger);
2785         }
2786       }
2787     }
2788     // At this point we have either completed iterating over the
2789     // region we were holding on to, or we have aborted.
2790 
2791     // We then partially drain the local queue and the global stack.
2792     // (Do we really need this?)
2793     drain_local_queue(true);
2794     drain_global_stack(true);
2795 
2796     // Read the note on the claim_region() method on why it might
2797     // return NULL with potentially more regions available for
2798     // claiming and why we have to check out_of_regions() to determine
2799     // whether we're done or not.
2800     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2801       // We are going to try to claim a new region. We should have
2802       // given up on the previous one.
2803       // Separated the asserts so that we know which one fires.
2804       assert(_curr_region  == NULL, "invariant");
2805       assert(_finger       == NULL, "invariant");
2806       assert(_region_limit == NULL, "invariant");
2807       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2808       if (claimed_region != NULL) {
2809         // Yes, we managed to claim one
2810         setup_for_region(claimed_region);
2811         assert(_curr_region == claimed_region, "invariant");
2812       }
2813       // It is important to call the regular clock here. It might take
2814       // a while to claim a region if, for example, we hit a large
2815       // block of empty regions. So we need to call the regular clock
2816       // method once round the loop to make sure it's called
2817       // frequently enough.
2818       regular_clock_call();
2819     }
2820 
2821     if (!has_aborted() && _curr_region == NULL) {
2822       assert(_cm->out_of_regions(),
2823              "at this point we should be out of regions");
2824     }
2825   } while ( _curr_region != NULL && !has_aborted());
2826 
2827   if (!has_aborted()) {
2828     // We cannot check whether the global stack is empty, since other
2829     // tasks might be pushing objects to it concurrently.
2830     assert(_cm->out_of_regions(),
2831            "at this point we should be out of regions");
2832     // Try to reduce the number of available SATB buffers so that
2833     // remark has less work to do.
2834     drain_satb_buffers();
2835   }
2836 
2837   // Since we've done everything else, we can now totally drain the
2838   // local queue and global stack.
2839   drain_local_queue(false);
2840   drain_global_stack(false);
2841 
2842   // Attempt at work stealing from other task's queues.
2843   if (do_stealing && !has_aborted()) {
2844     // We have not aborted. This means that we have finished all that
2845     // we could. Let's try to do some stealing...
2846 
2847     // We cannot check whether the global stack is empty, since other
2848     // tasks might be pushing objects to it concurrently.
2849     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2850            "only way to reach here");
2851     while (!has_aborted()) {
2852       G1TaskQueueEntry entry;
2853       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2854         scan_task_entry(entry);
2855 
2856         // And since we're towards the end, let's totally drain the
2857         // local queue and global stack.
2858         drain_local_queue(false);
2859         drain_global_stack(false);
2860       } else {
2861         break;
2862       }
2863     }
2864   }
2865 
2866   // We still haven't aborted. Now, let's try to get into the
2867   // termination protocol.
2868   if (do_termination && !has_aborted()) {
2869     // We cannot check whether the global stack is empty, since other
2870     // tasks might be concurrently pushing objects on it.
2871     // Separated the asserts so that we know which one fires.
2872     assert(_cm->out_of_regions(), "only way to reach here");
2873     assert(_task_queue->size() == 0, "only way to reach here");
2874     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2875 
2876     // The G1CMTask class also extends the TerminatorTerminator class,
2877     // hence its should_exit_termination() method will also decide
2878     // whether to exit the termination protocol or not.
2879     bool finished = (is_serial ||
2880                      _cm->terminator()->offer_termination(this));
2881     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2882     _termination_time_ms +=
2883       termination_end_time_ms - _termination_start_time_ms;
2884 
2885     if (finished) {
2886       // We're all done.
2887 
2888       if (_worker_id == 0) {
2889         // let's allow task 0 to do this
2890         if (concurrent()) {
2891           assert(_cm->concurrent_marking_in_progress(), "invariant");
2892           // we need to set this to false before the next
2893           // safepoint. This way we ensure that the marking phase
2894           // doesn't observe any more heap expansions.
2895           _cm->clear_concurrent_marking_in_progress();
2896         }
2897       }
2898 
2899       // We can now guarantee that the global stack is empty, since
2900       // all other tasks have finished. We separated the guarantees so
2901       // that, if a condition is false, we can immediately find out
2902       // which one.
2903       guarantee(_cm->out_of_regions(), "only way to reach here");
2904       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2905       guarantee(_task_queue->size() == 0, "only way to reach here");
2906       guarantee(!_cm->has_overflown(), "only way to reach here");
2907     } else {
2908       // Apparently there's more work to do. Let's abort this task. It
2909       // will restart it and we can hopefully find more things to do.
2910       set_has_aborted();
2911     }
2912   }
2913 
2914   // Mainly for debugging purposes to make sure that a pointer to the
2915   // closure which was statically allocated in this frame doesn't
2916   // escape it by accident.
2917   set_cm_oop_closure(NULL);
2918   double end_time_ms = os::elapsedVTime() * 1000.0;
2919   double elapsed_time_ms = end_time_ms - _start_time_ms;
2920   // Update the step history.
2921   _step_times_ms.add(elapsed_time_ms);
2922 
2923   if (has_aborted()) {
2924     // The task was aborted for some reason.
2925     if (_has_timed_out) {
2926       double diff_ms = elapsed_time_ms - _time_target_ms;
2927       // Keep statistics of how well we did with respect to hitting
2928       // our target only if we actually timed out (if we aborted for
2929       // other reasons, then the results might get skewed).
2930       _marking_step_diffs_ms.add(diff_ms);
2931     }
2932 
2933     if (_cm->has_overflown()) {
2934       // This is the interesting one. We aborted because a global
2935       // overflow was raised. This means we have to restart the
2936       // marking phase and start iterating over regions. However, in
2937       // order to do this we have to make sure that all tasks stop
2938       // what they are doing and re-initialize in a safe manner. We
2939       // will achieve this with the use of two barrier sync points.
2940 
2941       if (!is_serial) {
2942         // We only need to enter the sync barrier if being called
2943         // from a parallel context
2944         _cm->enter_first_sync_barrier(_worker_id);
2945 
2946         // When we exit this sync barrier we know that all tasks have
2947         // stopped doing marking work. So, it's now safe to
2948         // re-initialize our data structures. At the end of this method,
2949         // task 0 will clear the global data structures.
2950       }
2951 
2952       // We clear the local state of this task...
2953       clear_region_fields();
2954 
2955       if (!is_serial) {
2956         // ...and enter the second barrier.
2957         _cm->enter_second_sync_barrier(_worker_id);
2958       }
2959       // At this point, if we're during the concurrent phase of
2960       // marking, everything has been re-initialized and we're
2961       // ready to restart.
2962     }
2963   }
2964 
2965   _claimed = false;
2966 }
2967 
2968 G1CMTask::G1CMTask(uint worker_id,
2969                    G1ConcurrentMark* cm,
2970                    G1CMTaskQueue* task_queue,
2971                    G1CMTaskQueueSet* task_queues)
2972   : _g1h(G1CollectedHeap::heap()),
2973     _worker_id(worker_id), _cm(cm),
2974     _objArray_processor(this),
2975     _claimed(false),
2976     _nextMarkBitMap(NULL), _hash_seed(17),
2977     _task_queue(task_queue),
2978     _task_queues(task_queues),
2979     _cm_oop_closure(NULL) {
2980   guarantee(task_queue != NULL, "invariant");
2981   guarantee(task_queues != NULL, "invariant");
2982 
2983   _marking_step_diffs_ms.add(0.5);
2984 }
2985 
2986 // These are formatting macros that are used below to ensure
2987 // consistent formatting. The *_H_* versions are used to format the
2988 // header for a particular value and they should be kept consistent
2989 // with the corresponding macro. Also note that most of the macros add
2990 // the necessary white space (as a prefix) which makes them a bit
2991 // easier to compose.
2992 
2993 // All the output lines are prefixed with this string to be able to
2994 // identify them easily in a large log file.
2995 #define G1PPRL_LINE_PREFIX            "###"
2996 
2997 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2998 #ifdef _LP64
2999 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3000 #else // _LP64
3001 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3002 #endif // _LP64
3003 
3004 // For per-region info
3005 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3006 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3007 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3008 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3009 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3010 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3011 
3012 // For summary info
3013 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3014 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3015 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3016 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3017 
3018 G1PrintRegionLivenessInfoClosure::
3019 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3020   : _total_used_bytes(0), _total_capacity_bytes(0),
3021     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3022     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3023   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3024   MemRegion g1_reserved = g1h->g1_reserved();
3025   double now = os::elapsedTime();
3026 
3027   // Print the header of the output.
3028   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3029   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3030                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3031                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3032                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3033                           HeapRegion::GrainBytes);
3034   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3035   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3036                           G1PPRL_TYPE_H_FORMAT
3037                           G1PPRL_ADDR_BASE_H_FORMAT
3038                           G1PPRL_BYTE_H_FORMAT
3039                           G1PPRL_BYTE_H_FORMAT
3040                           G1PPRL_BYTE_H_FORMAT
3041                           G1PPRL_DOUBLE_H_FORMAT
3042                           G1PPRL_BYTE_H_FORMAT
3043                           G1PPRL_BYTE_H_FORMAT,
3044                           "type", "address-range",
3045                           "used", "prev-live", "next-live", "gc-eff",
3046                           "remset", "code-roots");
3047   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3048                           G1PPRL_TYPE_H_FORMAT
3049                           G1PPRL_ADDR_BASE_H_FORMAT
3050                           G1PPRL_BYTE_H_FORMAT
3051                           G1PPRL_BYTE_H_FORMAT
3052                           G1PPRL_BYTE_H_FORMAT
3053                           G1PPRL_DOUBLE_H_FORMAT
3054                           G1PPRL_BYTE_H_FORMAT
3055                           G1PPRL_BYTE_H_FORMAT,
3056                           "", "",
3057                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3058                           "(bytes)", "(bytes)");
3059 }
3060 
3061 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3062   const char* type       = r->get_type_str();
3063   HeapWord* bottom       = r->bottom();
3064   HeapWord* end          = r->end();
3065   size_t capacity_bytes  = r->capacity();
3066   size_t used_bytes      = r->used();
3067   size_t prev_live_bytes = r->live_bytes();
3068   size_t next_live_bytes = r->next_live_bytes();
3069   double gc_eff          = r->gc_efficiency();
3070   size_t remset_bytes    = r->rem_set()->mem_size();
3071   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3072 
3073   _total_used_bytes      += used_bytes;
3074   _total_capacity_bytes  += capacity_bytes;
3075   _total_prev_live_bytes += prev_live_bytes;
3076   _total_next_live_bytes += next_live_bytes;
3077   _total_remset_bytes    += remset_bytes;
3078   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3079 
3080   // Print a line for this particular region.
3081   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3082                           G1PPRL_TYPE_FORMAT
3083                           G1PPRL_ADDR_BASE_FORMAT
3084                           G1PPRL_BYTE_FORMAT
3085                           G1PPRL_BYTE_FORMAT
3086                           G1PPRL_BYTE_FORMAT
3087                           G1PPRL_DOUBLE_FORMAT
3088                           G1PPRL_BYTE_FORMAT
3089                           G1PPRL_BYTE_FORMAT,
3090                           type, p2i(bottom), p2i(end),
3091                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3092                           remset_bytes, strong_code_roots_bytes);
3093 
3094   return false;
3095 }
3096 
3097 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3098   // add static memory usages to remembered set sizes
3099   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3100   // Print the footer of the output.
3101   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3102   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3103                          " SUMMARY"
3104                          G1PPRL_SUM_MB_FORMAT("capacity")
3105                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3106                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3107                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3108                          G1PPRL_SUM_MB_FORMAT("remset")
3109                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3110                          bytes_to_mb(_total_capacity_bytes),
3111                          bytes_to_mb(_total_used_bytes),
3112                          perc(_total_used_bytes, _total_capacity_bytes),
3113                          bytes_to_mb(_total_prev_live_bytes),
3114                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3115                          bytes_to_mb(_total_next_live_bytes),
3116                          perc(_total_next_live_bytes, _total_capacity_bytes),
3117                          bytes_to_mb(_total_remset_bytes),
3118                          bytes_to_mb(_total_strong_code_roots_bytes));
3119 }