1 /*
   2  * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 #include "utilities/growableArray.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                  const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = align_up(addr, HeapWordSize << _shifter);
  75   size_t addrOffset = heapWordToOffset(addr);
  76   assert(limit != NULL, "limit must not be NULL");
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 #ifndef PRODUCT
  87 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  88   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  89   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  90          "size inconsistency");
  91   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  92          _bmWordSize  == heap_rs.word_size();
  93 }
  94 #endif
  95 
  96 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  97   _bm.print_on_error(st, prefix);
  98 }
  99 
 100 size_t G1CMBitMap::compute_size(size_t heap_size) {
 101   return ReservedSpace::allocation_align_up(heap_size / mark_distance());
 102 }
 103 
 104 size_t G1CMBitMap::mark_distance() {
 105   return MinObjAlignmentInBytes * BitsPerByte;
 106 }
 107 
 108 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 109   _bmStartWord = heap.start();
 110   _bmWordSize = heap.word_size();
 111 
 112   _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter);
 113 
 114   storage->set_mapping_changed_listener(&_listener);
 115 }
 116 
 117 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 118   if (zero_filled) {
 119     return;
 120   }
 121   // We need to clear the bitmap on commit, removing any existing information.
 122   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 123   _bm->clear_range(mr);
 124 }
 125 
 126 void G1CMBitMap::clear_range(MemRegion mr) {
 127   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 128   assert(!mr.is_empty(), "unexpected empty region");
 129   // convert address range into offset range
 130   _bm.at_put_range(heapWordToOffset(mr.start()),
 131                    heapWordToOffset(mr.end()), false);
 132 }
 133 
 134 G1CMMarkStack::G1CMMarkStack() :
 135   _max_chunk_capacity(0),
 136   _base(NULL),
 137   _chunk_capacity(0) {
 138   set_empty();
 139 }
 140 
 141 bool G1CMMarkStack::resize(size_t new_capacity) {
 142   assert(is_empty(), "Only resize when stack is empty.");
 143   assert(new_capacity <= _max_chunk_capacity,
 144          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
 145 
 146   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity);
 147 
 148   if (new_base == NULL) {
 149     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 150     return false;
 151   }
 152   // Release old mapping.
 153   if (_base != NULL) {
 154     MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
 155   }
 156 
 157   _base = new_base;
 158   _chunk_capacity = new_capacity;
 159   set_empty();
 160 
 161   return true;
 162 }
 163 
 164 size_t G1CMMarkStack::capacity_alignment() {
 165   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 166 }
 167 
 168 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 169   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 170 
 171   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 172 
 173   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 174   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 175 
 176   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 177             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 178             _max_chunk_capacity,
 179             initial_chunk_capacity);
 180 
 181   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 182                 initial_chunk_capacity, _max_chunk_capacity);
 183 
 184   return resize(initial_chunk_capacity);
 185 }
 186 
 187 void G1CMMarkStack::expand() {
 188   if (_chunk_capacity == _max_chunk_capacity) {
 189     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 190     return;
 191   }
 192   size_t old_capacity = _chunk_capacity;
 193   // Double capacity if possible
 194   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 195 
 196   if (resize(new_capacity)) {
 197     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 198                   old_capacity, new_capacity);
 199   } else {
 200     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 201                     old_capacity, new_capacity);
 202   }
 203 }
 204 
 205 G1CMMarkStack::~G1CMMarkStack() {
 206   if (_base != NULL) {
 207     MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
 208   }
 209 }
 210 
 211 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 212   elem->next = *list;
 213   *list = elem;
 214 }
 215 
 216 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 217   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 218   add_chunk_to_list(&_chunk_list, elem);
 219   _chunks_in_chunk_list++;
 220 }
 221 
 222 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 223   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 224   add_chunk_to_list(&_free_list, elem);
 225 }
 226 
 227 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 228   TaskQueueEntryChunk* result = *list;
 229   if (result != NULL) {
 230     *list = (*list)->next;
 231   }
 232   return result;
 233 }
 234 
 235 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 236   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 237   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 238   if (result != NULL) {
 239     _chunks_in_chunk_list--;
 240   }
 241   return result;
 242 }
 243 
 244 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 245   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 246   return remove_chunk_from_list(&_free_list);
 247 }
 248 
 249 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 250   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 251   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 252   // wraparound of _hwm.
 253   if (_hwm >= _chunk_capacity) {
 254     return NULL;
 255   }
 256 
 257   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 258   if (cur_idx >= _chunk_capacity) {
 259     return NULL;
 260   }
 261 
 262   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 263   result->next = NULL;
 264   return result;
 265 }
 266 
 267 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 268   // Get a new chunk.
 269   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 270 
 271   if (new_chunk == NULL) {
 272     // Did not get a chunk from the free list. Allocate from backing memory.
 273     new_chunk = allocate_new_chunk();
 274 
 275     if (new_chunk == NULL) {
 276       return false;
 277     }
 278   }
 279 
 280   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 281 
 282   add_chunk_to_chunk_list(new_chunk);
 283 
 284   return true;
 285 }
 286 
 287 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 288   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 289 
 290   if (cur == NULL) {
 291     return false;
 292   }
 293 
 294   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 295 
 296   add_chunk_to_free_list(cur);
 297   return true;
 298 }
 299 
 300 void G1CMMarkStack::set_empty() {
 301   _chunks_in_chunk_list = 0;
 302   _hwm = 0;
 303   _chunk_list = NULL;
 304   _free_list = NULL;
 305 }
 306 
 307 G1CMRootRegions::G1CMRootRegions() :
 308   _cm(NULL), _scan_in_progress(false),
 309   _should_abort(false), _claimed_survivor_index(0) { }
 310 
 311 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 312   _survivors = survivors;
 313   _cm = cm;
 314 }
 315 
 316 void G1CMRootRegions::prepare_for_scan() {
 317   assert(!scan_in_progress(), "pre-condition");
 318 
 319   // Currently, only survivors can be root regions.
 320   _claimed_survivor_index = 0;
 321   _scan_in_progress = _survivors->regions()->is_nonempty();
 322   _should_abort = false;
 323 }
 324 
 325 HeapRegion* G1CMRootRegions::claim_next() {
 326   if (_should_abort) {
 327     // If someone has set the should_abort flag, we return NULL to
 328     // force the caller to bail out of their loop.
 329     return NULL;
 330   }
 331 
 332   // Currently, only survivors can be root regions.
 333   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 334 
 335   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 336   if (claimed_index < survivor_regions->length()) {
 337     return survivor_regions->at(claimed_index);
 338   }
 339   return NULL;
 340 }
 341 
 342 uint G1CMRootRegions::num_root_regions() const {
 343   return (uint)_survivors->regions()->length();
 344 }
 345 
 346 void G1CMRootRegions::notify_scan_done() {
 347   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 348   _scan_in_progress = false;
 349   RootRegionScan_lock->notify_all();
 350 }
 351 
 352 void G1CMRootRegions::cancel_scan() {
 353   notify_scan_done();
 354 }
 355 
 356 void G1CMRootRegions::scan_finished() {
 357   assert(scan_in_progress(), "pre-condition");
 358 
 359   // Currently, only survivors can be root regions.
 360   if (!_should_abort) {
 361     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 362     assert((uint)_claimed_survivor_index >= _survivors->length(),
 363            "we should have claimed all survivors, claimed index = %u, length = %u",
 364            (uint)_claimed_survivor_index, _survivors->length());
 365   }
 366 
 367   notify_scan_done();
 368 }
 369 
 370 bool G1CMRootRegions::wait_until_scan_finished() {
 371   if (!scan_in_progress()) return false;
 372 
 373   {
 374     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 375     while (scan_in_progress()) {
 376       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 377     }
 378   }
 379   return true;
 380 }
 381 
 382 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 383   return MAX2((n_par_threads + 2) / 4, 1U);
 384 }
 385 
 386 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 387   _g1h(g1h),
 388   _markBitMap1(),
 389   _markBitMap2(),
 390   _parallel_marking_threads(0),
 391   _max_parallel_marking_threads(0),
 392   _sleep_factor(0.0),
 393   _marking_task_overhead(1.0),
 394   _cleanup_list("Cleanup List"),
 395 
 396   _prevMarkBitMap(&_markBitMap1),
 397   _nextMarkBitMap(&_markBitMap2),
 398 
 399   _global_mark_stack(),
 400   // _finger set in set_non_marking_state
 401 
 402   _max_worker_id(ParallelGCThreads),
 403   // _active_tasks set in set_non_marking_state
 404   // _tasks set inside the constructor
 405   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 406   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 407 
 408   _has_overflown(false),
 409   _concurrent(false),
 410   _has_aborted(false),
 411   _restart_for_overflow(false),
 412   _concurrent_marking_in_progress(false),
 413   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 414   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 415 
 416   // _verbose_level set below
 417 
 418   _init_times(),
 419   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 420   _cleanup_times(),
 421   _total_counting_time(0.0),
 422   _total_rs_scrub_time(0.0),
 423 
 424   _parallel_workers(NULL),
 425 
 426   _completed_initialization(false) {
 427 
 428   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 429   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 430 
 431   // Create & start a ConcurrentMark thread.
 432   _cmThread = new ConcurrentMarkThread(this);
 433   assert(cmThread() != NULL, "CM Thread should have been created");
 434   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 435   if (_cmThread->osthread() == NULL) {
 436       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 437   }
 438 
 439   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 440   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 441   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 442 
 443   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 444   satb_qs.set_buffer_size(G1SATBBufferSize);
 445 
 446   _root_regions.init(_g1h->survivor(), this);
 447 
 448   if (ConcGCThreads > ParallelGCThreads) {
 449     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 450                     ConcGCThreads, ParallelGCThreads);
 451     return;
 452   }
 453   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 454     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 455     // if both are set
 456     _sleep_factor             = 0.0;
 457     _marking_task_overhead    = 1.0;
 458   } else if (G1MarkingOverheadPercent > 0) {
 459     // We will calculate the number of parallel marking threads based
 460     // on a target overhead with respect to the soft real-time goal
 461     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 462     double overall_cm_overhead =
 463       (double) MaxGCPauseMillis * marking_overhead /
 464       (double) GCPauseIntervalMillis;
 465     double cpu_ratio = 1.0 / os::initial_active_processor_count();
 466     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 467     double marking_task_overhead =
 468       overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
 469     double sleep_factor =
 470                        (1.0 - marking_task_overhead) / marking_task_overhead;
 471 
 472     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 473     _sleep_factor             = sleep_factor;
 474     _marking_task_overhead    = marking_task_overhead;
 475   } else {
 476     // Calculate the number of parallel marking threads by scaling
 477     // the number of parallel GC threads.
 478     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 479     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 480     _sleep_factor             = 0.0;
 481     _marking_task_overhead    = 1.0;
 482   }
 483 
 484   assert(ConcGCThreads > 0, "Should have been set");
 485   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 486   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 487   _parallel_marking_threads = ConcGCThreads;
 488   _max_parallel_marking_threads = _parallel_marking_threads;
 489 
 490   _parallel_workers = new WorkGang("G1 Marker",
 491        _max_parallel_marking_threads, false, true);
 492   if (_parallel_workers == NULL) {
 493     vm_exit_during_initialization("Failed necessary allocation.");
 494   } else {
 495     _parallel_workers->initialize_workers();
 496   }
 497 
 498   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 499     size_t mark_stack_size =
 500       MIN2(MarkStackSizeMax,
 501           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 502     // Verify that the calculated value for MarkStackSize is in range.
 503     // It would be nice to use the private utility routine from Arguments.
 504     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 505       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 506                       "must be between 1 and " SIZE_FORMAT,
 507                       mark_stack_size, MarkStackSizeMax);
 508       return;
 509     }
 510     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 511   } else {
 512     // Verify MarkStackSize is in range.
 513     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 514       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 515         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 516           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 517                           "must be between 1 and " SIZE_FORMAT,
 518                           MarkStackSize, MarkStackSizeMax);
 519           return;
 520         }
 521       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 522         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 523           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 524                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 525                           MarkStackSize, MarkStackSizeMax);
 526           return;
 527         }
 528       }
 529     }
 530   }
 531 
 532   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 533     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 534   }
 535 
 536   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 537   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 538 
 539   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 540   _active_tasks = _max_worker_id;
 541 
 542   for (uint i = 0; i < _max_worker_id; ++i) {
 543     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 544     task_queue->initialize();
 545     _task_queues->register_queue(i, task_queue);
 546 
 547     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 548 
 549     _accum_task_vtime[i] = 0.0;
 550   }
 551 
 552   // so that the call below can read a sensible value
 553   _heap_start = g1h->reserved_region().start();
 554   set_non_marking_state();
 555   _completed_initialization = true;
 556 }
 557 
 558 void G1ConcurrentMark::reset() {
 559   // Starting values for these two. This should be called in a STW
 560   // phase.
 561   MemRegion reserved = _g1h->g1_reserved();
 562   _heap_start = reserved.start();
 563   _heap_end   = reserved.end();
 564 
 565   // Separated the asserts so that we know which one fires.
 566   assert(_heap_start != NULL, "heap bounds should look ok");
 567   assert(_heap_end != NULL, "heap bounds should look ok");
 568   assert(_heap_start < _heap_end, "heap bounds should look ok");
 569 
 570   // Reset all the marking data structures and any necessary flags
 571   reset_marking_state();
 572 
 573   // We do reset all of them, since different phases will use
 574   // different number of active threads. So, it's easiest to have all
 575   // of them ready.
 576   for (uint i = 0; i < _max_worker_id; ++i) {
 577     _tasks[i]->reset(_nextMarkBitMap);
 578   }
 579 
 580   // we need this to make sure that the flag is on during the evac
 581   // pause with initial mark piggy-backed
 582   set_concurrent_marking_in_progress();
 583 }
 584 
 585 
 586 void G1ConcurrentMark::reset_marking_state() {
 587   _global_mark_stack.set_empty();
 588 
 589   // Expand the marking stack, if we have to and if we can.
 590   if (has_overflown()) {
 591     _global_mark_stack.expand();
 592   }
 593 
 594   clear_has_overflown();
 595   _finger = _heap_start;
 596 
 597   for (uint i = 0; i < _max_worker_id; ++i) {
 598     G1CMTaskQueue* queue = _task_queues->queue(i);
 599     queue->set_empty();
 600   }
 601 }
 602 
 603 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 604   assert(active_tasks <= _max_worker_id, "we should not have more");
 605 
 606   _active_tasks = active_tasks;
 607   // Need to update the three data structures below according to the
 608   // number of active threads for this phase.
 609   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 610   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 611   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 612 }
 613 
 614 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 615   set_concurrency(active_tasks);
 616 
 617   _concurrent = concurrent;
 618   // We propagate this to all tasks, not just the active ones.
 619   for (uint i = 0; i < _max_worker_id; ++i)
 620     _tasks[i]->set_concurrent(concurrent);
 621 
 622   if (concurrent) {
 623     set_concurrent_marking_in_progress();
 624   } else {
 625     // We currently assume that the concurrent flag has been set to
 626     // false before we start remark. At this point we should also be
 627     // in a STW phase.
 628     assert(!concurrent_marking_in_progress(), "invariant");
 629     assert(out_of_regions(),
 630            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 631            p2i(_finger), p2i(_heap_end));
 632   }
 633 }
 634 
 635 void G1ConcurrentMark::set_non_marking_state() {
 636   // We set the global marking state to some default values when we're
 637   // not doing marking.
 638   reset_marking_state();
 639   _active_tasks = 0;
 640   clear_concurrent_marking_in_progress();
 641 }
 642 
 643 G1ConcurrentMark::~G1ConcurrentMark() {
 644   // The G1ConcurrentMark instance is never freed.
 645   ShouldNotReachHere();
 646 }
 647 
 648 class G1ClearBitMapTask : public AbstractGangTask {
 649 public:
 650   static size_t chunk_size() { return M; }
 651 
 652 private:
 653   // Heap region closure used for clearing the given mark bitmap.
 654   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 655   private:
 656     G1CMBitMap* _bitmap;
 657     G1ConcurrentMark* _cm;
 658   public:
 659     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 660     }
 661 
 662     virtual bool doHeapRegion(HeapRegion* r) {
 663       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 664 
 665       HeapWord* cur = r->bottom();
 666       HeapWord* const end = r->end();
 667 
 668       while (cur < end) {
 669         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 670         _bitmap->clear_range(mr);
 671 
 672         cur += chunk_size_in_words;
 673 
 674         // Abort iteration if after yielding the marking has been aborted.
 675         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 676           return true;
 677         }
 678         // Repeat the asserts from before the start of the closure. We will do them
 679         // as asserts here to minimize their overhead on the product. However, we
 680         // will have them as guarantees at the beginning / end of the bitmap
 681         // clearing to get some checking in the product.
 682         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 683         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 684       }
 685       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 686 
 687       return false;
 688     }
 689   };
 690 
 691   G1ClearBitmapHRClosure _cl;
 692   HeapRegionClaimer _hr_claimer;
 693   bool _suspendible; // If the task is suspendible, workers must join the STS.
 694 
 695 public:
 696   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 697     AbstractGangTask("G1 Clear Bitmap"),
 698     _cl(bitmap, suspendible ? cm : NULL),
 699     _hr_claimer(n_workers),
 700     _suspendible(suspendible)
 701   { }
 702 
 703   void work(uint worker_id) {
 704     SuspendibleThreadSetJoiner sts_join(_suspendible);
 705     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
 706   }
 707 
 708   bool is_complete() {
 709     return _cl.complete();
 710   }
 711 };
 712 
 713 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 714   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 715 
 716   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 717   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 718 
 719   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 720 
 721   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 722 
 723   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 724   workers->run_task(&cl, num_workers);
 725   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 726 }
 727 
 728 void G1ConcurrentMark::cleanup_for_next_mark() {
 729   // Make sure that the concurrent mark thread looks to still be in
 730   // the current cycle.
 731   guarantee(cmThread()->during_cycle(), "invariant");
 732 
 733   // We are finishing up the current cycle by clearing the next
 734   // marking bitmap and getting it ready for the next cycle. During
 735   // this time no other cycle can start. So, let's make sure that this
 736   // is the case.
 737   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 738 
 739   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 740 
 741   // Clear the live count data. If the marking has been aborted, the abort()
 742   // call already did that.
 743   if (!has_aborted()) {
 744     clear_live_data(_parallel_workers);
 745     DEBUG_ONLY(verify_live_data_clear());
 746   }
 747 
 748   // Repeat the asserts from above.
 749   guarantee(cmThread()->during_cycle(), "invariant");
 750   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 751 }
 752 
 753 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 754   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 755   clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false);
 756 }
 757 
 758 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 759   G1CMBitMap* _bitmap;
 760   bool _error;
 761  public:
 762   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 763   }
 764 
 765   virtual bool doHeapRegion(HeapRegion* r) {
 766     // This closure can be called concurrently to the mutator, so we must make sure
 767     // that the result of the getNextMarkedWordAddress() call is compared to the
 768     // value passed to it as limit to detect any found bits.
 769     // end never changes in G1.
 770     HeapWord* end = r->end();
 771     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 772   }
 773 };
 774 
 775 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 776   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 777   _g1h->heap_region_iterate(&cl);
 778   return cl.complete();
 779 }
 780 
 781 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 782 public:
 783   bool doHeapRegion(HeapRegion* r) {
 784     r->note_start_of_marking();
 785     return false;
 786   }
 787 };
 788 
 789 void G1ConcurrentMark::checkpointRootsInitialPre() {
 790   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 791   G1Policy* g1p = g1h->g1_policy();
 792 
 793   _has_aborted = false;
 794 
 795   // Initialize marking structures. This has to be done in a STW phase.
 796   reset();
 797 
 798   // For each region note start of marking.
 799   NoteStartOfMarkHRClosure startcl;
 800   g1h->heap_region_iterate(&startcl);
 801 }
 802 
 803 
 804 void G1ConcurrentMark::checkpointRootsInitialPost() {
 805   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 806 
 807   // Start Concurrent Marking weak-reference discovery.
 808   ReferenceProcessor* rp = g1h->ref_processor_cm();
 809   // enable ("weak") refs discovery
 810   rp->enable_discovery();
 811   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 812 
 813   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 814   // This is the start of  the marking cycle, we're expected all
 815   // threads to have SATB queues with active set to false.
 816   satb_mq_set.set_active_all_threads(true, /* new active value */
 817                                      false /* expected_active */);
 818 
 819   _root_regions.prepare_for_scan();
 820 
 821   // update_g1_committed() will be called at the end of an evac pause
 822   // when marking is on. So, it's also called at the end of the
 823   // initial-mark pause to update the heap end, if the heap expands
 824   // during it. No need to call it here.
 825 }
 826 
 827 /*
 828  * Notice that in the next two methods, we actually leave the STS
 829  * during the barrier sync and join it immediately afterwards. If we
 830  * do not do this, the following deadlock can occur: one thread could
 831  * be in the barrier sync code, waiting for the other thread to also
 832  * sync up, whereas another one could be trying to yield, while also
 833  * waiting for the other threads to sync up too.
 834  *
 835  * Note, however, that this code is also used during remark and in
 836  * this case we should not attempt to leave / enter the STS, otherwise
 837  * we'll either hit an assert (debug / fastdebug) or deadlock
 838  * (product). So we should only leave / enter the STS if we are
 839  * operating concurrently.
 840  *
 841  * Because the thread that does the sync barrier has left the STS, it
 842  * is possible to be suspended for a Full GC or an evacuation pause
 843  * could occur. This is actually safe, since the entering the sync
 844  * barrier is one of the last things do_marking_step() does, and it
 845  * doesn't manipulate any data structures afterwards.
 846  */
 847 
 848 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 849   bool barrier_aborted;
 850   {
 851     SuspendibleThreadSetLeaver sts_leave(concurrent());
 852     barrier_aborted = !_first_overflow_barrier_sync.enter();
 853   }
 854 
 855   // at this point everyone should have synced up and not be doing any
 856   // more work
 857 
 858   if (barrier_aborted) {
 859     // If the barrier aborted we ignore the overflow condition and
 860     // just abort the whole marking phase as quickly as possible.
 861     return;
 862   }
 863 
 864   // If we're executing the concurrent phase of marking, reset the marking
 865   // state; otherwise the marking state is reset after reference processing,
 866   // during the remark pause.
 867   // If we reset here as a result of an overflow during the remark we will
 868   // see assertion failures from any subsequent set_concurrency_and_phase()
 869   // calls.
 870   if (concurrent()) {
 871     // let the task associated with with worker 0 do this
 872     if (worker_id == 0) {
 873       // task 0 is responsible for clearing the global data structures
 874       // We should be here because of an overflow. During STW we should
 875       // not clear the overflow flag since we rely on it being true when
 876       // we exit this method to abort the pause and restart concurrent
 877       // marking.
 878       reset_marking_state();
 879 
 880       log_info(gc, marking)("Concurrent Mark reset for overflow");
 881     }
 882   }
 883 
 884   // after this, each task should reset its own data structures then
 885   // then go into the second barrier
 886 }
 887 
 888 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 889   SuspendibleThreadSetLeaver sts_leave(concurrent());
 890   _second_overflow_barrier_sync.enter();
 891 
 892   // at this point everything should be re-initialized and ready to go
 893 }
 894 
 895 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 896 private:
 897   G1ConcurrentMark*     _cm;
 898   ConcurrentMarkThread* _cmt;
 899 
 900 public:
 901   void work(uint worker_id) {
 902     assert(Thread::current()->is_ConcurrentGC_thread(),
 903            "this should only be done by a conc GC thread");
 904     ResourceMark rm;
 905 
 906     double start_vtime = os::elapsedVTime();
 907 
 908     {
 909       SuspendibleThreadSetJoiner sts_join;
 910 
 911       assert(worker_id < _cm->active_tasks(), "invariant");
 912       G1CMTask* the_task = _cm->task(worker_id);
 913       the_task->record_start_time();
 914       if (!_cm->has_aborted()) {
 915         do {
 916           double start_vtime_sec = os::elapsedVTime();
 917           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 918 
 919           the_task->do_marking_step(mark_step_duration_ms,
 920                                     true  /* do_termination */,
 921                                     false /* is_serial*/);
 922 
 923           double end_vtime_sec = os::elapsedVTime();
 924           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 925           _cm->do_yield_check();
 926 
 927           jlong sleep_time_ms;
 928           if (!_cm->has_aborted() && the_task->has_aborted()) {
 929             sleep_time_ms =
 930               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 931             {
 932               SuspendibleThreadSetLeaver sts_leave;
 933               os::sleep(Thread::current(), sleep_time_ms, false);
 934             }
 935           }
 936         } while (!_cm->has_aborted() && the_task->has_aborted());
 937       }
 938       the_task->record_end_time();
 939       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 940     }
 941 
 942     double end_vtime = os::elapsedVTime();
 943     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 944   }
 945 
 946   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 947                             ConcurrentMarkThread* cmt) :
 948       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 949 
 950   ~G1CMConcurrentMarkingTask() { }
 951 };
 952 
 953 // Calculates the number of active workers for a concurrent
 954 // phase.
 955 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 956   uint n_conc_workers = 0;
 957   if (!UseDynamicNumberOfGCThreads ||
 958       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 959        !ForceDynamicNumberOfGCThreads)) {
 960     n_conc_workers = max_parallel_marking_threads();
 961   } else {
 962     n_conc_workers =
 963       AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
 964                                                       1, /* Minimum workers */
 965                                                       parallel_marking_threads(),
 966                                                       Threads::number_of_non_daemon_threads());
 967     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 968     // that scaling has already gone into "_max_parallel_marking_threads".
 969   }
 970   assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
 971          "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
 972          max_parallel_marking_threads(), n_conc_workers);
 973   return n_conc_workers;
 974 }
 975 
 976 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
 977   // Currently, only survivors can be root regions.
 978   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 979   G1RootRegionScanClosure cl(_g1h, this);
 980 
 981   const uintx interval = PrefetchScanIntervalInBytes;
 982   HeapWord* curr = hr->bottom();
 983   const HeapWord* end = hr->top();
 984   while (curr < end) {
 985     Prefetch::read(curr, interval);
 986     oop obj = oop(curr);
 987     int size = obj->oop_iterate_size(&cl);
 988     assert(size == obj->size(), "sanity");
 989     curr += size;
 990   }
 991 }
 992 
 993 class G1CMRootRegionScanTask : public AbstractGangTask {
 994 private:
 995   G1ConcurrentMark* _cm;
 996 
 997 public:
 998   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 999     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
1000 
1001   void work(uint worker_id) {
1002     assert(Thread::current()->is_ConcurrentGC_thread(),
1003            "this should only be done by a conc GC thread");
1004 
1005     G1CMRootRegions* root_regions = _cm->root_regions();
1006     HeapRegion* hr = root_regions->claim_next();
1007     while (hr != NULL) {
1008       _cm->scanRootRegion(hr);
1009       hr = root_regions->claim_next();
1010     }
1011   }
1012 };
1013 
1014 void G1ConcurrentMark::scan_root_regions() {
1015   // scan_in_progress() will have been set to true only if there was
1016   // at least one root region to scan. So, if it's false, we
1017   // should not attempt to do any further work.
1018   if (root_regions()->scan_in_progress()) {
1019     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1020 
1021     _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
1022                                      // We distribute work on a per-region basis, so starting
1023                                      // more threads than that is useless.
1024                                      root_regions()->num_root_regions());
1025     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1026            "Maximum number of marking threads exceeded");
1027 
1028     G1CMRootRegionScanTask task(this);
1029     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1030                         task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
1031     _parallel_workers->run_task(&task, _parallel_marking_threads);
1032 
1033     // It's possible that has_aborted() is true here without actually
1034     // aborting the survivor scan earlier. This is OK as it's
1035     // mainly used for sanity checking.
1036     root_regions()->scan_finished();
1037   }
1038 }
1039 
1040 void G1ConcurrentMark::concurrent_cycle_start() {
1041   _gc_timer_cm->register_gc_start();
1042 
1043   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1044 
1045   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1046 }
1047 
1048 void G1ConcurrentMark::concurrent_cycle_end() {
1049   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1050 
1051   if (has_aborted()) {
1052     _gc_tracer_cm->report_concurrent_mode_failure();
1053   }
1054 
1055   _gc_timer_cm->register_gc_end();
1056 
1057   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1058 }
1059 
1060 void G1ConcurrentMark::mark_from_roots() {
1061   // we might be tempted to assert that:
1062   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1063   //        "inconsistent argument?");
1064   // However that wouldn't be right, because it's possible that
1065   // a safepoint is indeed in progress as a younger generation
1066   // stop-the-world GC happens even as we mark in this generation.
1067 
1068   _restart_for_overflow = false;
1069 
1070   // _g1h has _n_par_threads
1071   _parallel_marking_threads = calc_parallel_marking_threads();
1072   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1073     "Maximum number of marking threads exceeded");
1074 
1075   uint active_workers = MAX2(1U, parallel_marking_threads());
1076   assert(active_workers > 0, "Should have been set");
1077 
1078   // Setting active workers is not guaranteed since fewer
1079   // worker threads may currently exist and more may not be
1080   // available.
1081   active_workers = _parallel_workers->update_active_workers(active_workers);
1082   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1083 
1084   // Parallel task terminator is set in "set_concurrency_and_phase()"
1085   set_concurrency_and_phase(active_workers, true /* concurrent */);
1086 
1087   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1088   _parallel_workers->run_task(&markingTask);
1089   print_stats();
1090 }
1091 
1092 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1093   // world is stopped at this checkpoint
1094   assert(SafepointSynchronize::is_at_safepoint(),
1095          "world should be stopped");
1096 
1097   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1098 
1099   // If a full collection has happened, we shouldn't do this.
1100   if (has_aborted()) {
1101     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1102     return;
1103   }
1104 
1105   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1106 
1107   if (VerifyDuringGC) {
1108     HandleMark hm;  // handle scope
1109     g1h->prepare_for_verify();
1110     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1111   }
1112   g1h->verifier()->check_bitmaps("Remark Start");
1113 
1114   G1Policy* g1p = g1h->g1_policy();
1115   g1p->record_concurrent_mark_remark_start();
1116 
1117   double start = os::elapsedTime();
1118 
1119   checkpointRootsFinalWork();
1120 
1121   double mark_work_end = os::elapsedTime();
1122 
1123   weakRefsWork(clear_all_soft_refs);
1124 
1125   if (has_overflown()) {
1126     // We overflowed.  Restart concurrent marking.
1127     _restart_for_overflow = true;
1128 
1129     // Verify the heap w.r.t. the previous marking bitmap.
1130     if (VerifyDuringGC) {
1131       HandleMark hm;  // handle scope
1132       g1h->prepare_for_verify();
1133       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1134     }
1135 
1136     // Clear the marking state because we will be restarting
1137     // marking due to overflowing the global mark stack.
1138     reset_marking_state();
1139   } else {
1140     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1141     // We're done with marking.
1142     // This is the end of  the marking cycle, we're expected all
1143     // threads to have SATB queues with active set to true.
1144     satb_mq_set.set_active_all_threads(false, /* new active value */
1145                                        true /* expected_active */);
1146 
1147     if (VerifyDuringGC) {
1148       HandleMark hm;  // handle scope
1149       g1h->prepare_for_verify();
1150       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1151     }
1152     g1h->verifier()->check_bitmaps("Remark End");
1153     assert(!restart_for_overflow(), "sanity");
1154     // Completely reset the marking state since marking completed
1155     set_non_marking_state();
1156   }
1157 
1158   // Statistics
1159   double now = os::elapsedTime();
1160   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1161   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1162   _remark_times.add((now - start) * 1000.0);
1163 
1164   g1p->record_concurrent_mark_remark_end();
1165 
1166   G1CMIsAliveClosure is_alive(g1h);
1167   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1168 }
1169 
1170 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1171   G1CollectedHeap* _g1;
1172   size_t _freed_bytes;
1173   FreeRegionList* _local_cleanup_list;
1174   uint _old_regions_removed;
1175   uint _humongous_regions_removed;
1176   HRRSCleanupTask* _hrrs_cleanup_task;
1177 
1178 public:
1179   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1180                              FreeRegionList* local_cleanup_list,
1181                              HRRSCleanupTask* hrrs_cleanup_task) :
1182     _g1(g1),
1183     _freed_bytes(0),
1184     _local_cleanup_list(local_cleanup_list),
1185     _old_regions_removed(0),
1186     _humongous_regions_removed(0),
1187     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1188 
1189   size_t freed_bytes() { return _freed_bytes; }
1190   const uint old_regions_removed() { return _old_regions_removed; }
1191   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1192 
1193   bool doHeapRegion(HeapRegion *hr) {
1194     if (hr->is_archive()) {
1195       return false;
1196     }
1197     _g1->reset_gc_time_stamps(hr);
1198     hr->note_end_of_marking();
1199 
1200     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1201       _freed_bytes += hr->used();
1202       hr->set_containing_set(NULL);
1203       if (hr->is_humongous()) {
1204         _humongous_regions_removed++;
1205         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1206       } else {
1207         _old_regions_removed++;
1208         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1209       }
1210     } else {
1211       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1212     }
1213 
1214     return false;
1215   }
1216 };
1217 
1218 class G1ParNoteEndTask: public AbstractGangTask {
1219   friend class G1NoteEndOfConcMarkClosure;
1220 
1221 protected:
1222   G1CollectedHeap* _g1h;
1223   FreeRegionList* _cleanup_list;
1224   HeapRegionClaimer _hrclaimer;
1225 
1226 public:
1227   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1228       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1229   }
1230 
1231   void work(uint worker_id) {
1232     FreeRegionList local_cleanup_list("Local Cleanup List");
1233     HRRSCleanupTask hrrs_cleanup_task;
1234     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1235                                            &hrrs_cleanup_task);
1236     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1237     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1238 
1239     // Now update the lists
1240     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1241     {
1242       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1243       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1244 
1245       // If we iterate over the global cleanup list at the end of
1246       // cleanup to do this printing we will not guarantee to only
1247       // generate output for the newly-reclaimed regions (the list
1248       // might not be empty at the beginning of cleanup; we might
1249       // still be working on its previous contents). So we do the
1250       // printing here, before we append the new regions to the global
1251       // cleanup list.
1252 
1253       G1HRPrinter* hr_printer = _g1h->hr_printer();
1254       if (hr_printer->is_active()) {
1255         FreeRegionListIterator iter(&local_cleanup_list);
1256         while (iter.more_available()) {
1257           HeapRegion* hr = iter.get_next();
1258           hr_printer->cleanup(hr);
1259         }
1260       }
1261 
1262       _cleanup_list->add_ordered(&local_cleanup_list);
1263       assert(local_cleanup_list.is_empty(), "post-condition");
1264 
1265       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1266     }
1267   }
1268 };
1269 
1270 void G1ConcurrentMark::cleanup() {
1271   // world is stopped at this checkpoint
1272   assert(SafepointSynchronize::is_at_safepoint(),
1273          "world should be stopped");
1274   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1275 
1276   // If a full collection has happened, we shouldn't do this.
1277   if (has_aborted()) {
1278     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1279     return;
1280   }
1281 
1282   g1h->verifier()->verify_region_sets_optional();
1283 
1284   if (VerifyDuringGC) {
1285     HandleMark hm;  // handle scope
1286     g1h->prepare_for_verify();
1287     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1288   }
1289   g1h->verifier()->check_bitmaps("Cleanup Start");
1290 
1291   G1Policy* g1p = g1h->g1_policy();
1292   g1p->record_concurrent_mark_cleanup_start();
1293 
1294   double start = os::elapsedTime();
1295 
1296   HeapRegionRemSet::reset_for_cleanup_tasks();
1297 
1298   {
1299     GCTraceTime(Debug, gc)("Finalize Live Data");
1300     finalize_live_data();
1301   }
1302 
1303   if (VerifyDuringGC) {
1304     GCTraceTime(Debug, gc)("Verify Live Data");
1305     verify_live_data();
1306   }
1307 
1308   g1h->collector_state()->set_mark_in_progress(false);
1309 
1310   double count_end = os::elapsedTime();
1311   double this_final_counting_time = (count_end - start);
1312   _total_counting_time += this_final_counting_time;
1313 
1314   if (log_is_enabled(Trace, gc, liveness)) {
1315     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1316     _g1h->heap_region_iterate(&cl);
1317   }
1318 
1319   // Install newly created mark bitMap as "prev".
1320   swapMarkBitMaps();
1321 
1322   g1h->reset_gc_time_stamp();
1323 
1324   uint n_workers = _g1h->workers()->active_workers();
1325 
1326   // Note end of marking in all heap regions.
1327   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1328   g1h->workers()->run_task(&g1_par_note_end_task);
1329   g1h->check_gc_time_stamps();
1330 
1331   if (!cleanup_list_is_empty()) {
1332     // The cleanup list is not empty, so we'll have to process it
1333     // concurrently. Notify anyone else that might be wanting free
1334     // regions that there will be more free regions coming soon.
1335     g1h->set_free_regions_coming();
1336   }
1337 
1338   // call below, since it affects the metric by which we sort the heap
1339   // regions.
1340   if (G1ScrubRemSets) {
1341     double rs_scrub_start = os::elapsedTime();
1342     g1h->scrub_rem_set();
1343     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1344   }
1345 
1346   // this will also free any regions totally full of garbage objects,
1347   // and sort the regions.
1348   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1349 
1350   // Statistics.
1351   double end = os::elapsedTime();
1352   _cleanup_times.add((end - start) * 1000.0);
1353 
1354   // Clean up will have freed any regions completely full of garbage.
1355   // Update the soft reference policy with the new heap occupancy.
1356   Universe::update_heap_info_at_gc();
1357 
1358   if (VerifyDuringGC) {
1359     HandleMark hm;  // handle scope
1360     g1h->prepare_for_verify();
1361     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1362   }
1363 
1364   g1h->verifier()->check_bitmaps("Cleanup End");
1365 
1366   g1h->verifier()->verify_region_sets_optional();
1367 
1368   // We need to make this be a "collection" so any collection pause that
1369   // races with it goes around and waits for completeCleanup to finish.
1370   g1h->increment_total_collections();
1371 
1372   // Clean out dead classes and update Metaspace sizes.
1373   if (ClassUnloadingWithConcurrentMark) {
1374     ClassLoaderDataGraph::purge();
1375   }
1376   MetaspaceGC::compute_new_size();
1377 
1378   // We reclaimed old regions so we should calculate the sizes to make
1379   // sure we update the old gen/space data.
1380   g1h->g1mm()->update_sizes();
1381   g1h->allocation_context_stats().update_after_mark();
1382 }
1383 
1384 void G1ConcurrentMark::complete_cleanup() {
1385   if (has_aborted()) return;
1386 
1387   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1388 
1389   _cleanup_list.verify_optional();
1390   FreeRegionList tmp_free_list("Tmp Free List");
1391 
1392   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1393                                   "cleanup list has %u entries",
1394                                   _cleanup_list.length());
1395 
1396   // No one else should be accessing the _cleanup_list at this point,
1397   // so it is not necessary to take any locks
1398   while (!_cleanup_list.is_empty()) {
1399     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1400     assert(hr != NULL, "Got NULL from a non-empty list");
1401     hr->par_clear();
1402     tmp_free_list.add_ordered(hr);
1403 
1404     // Instead of adding one region at a time to the secondary_free_list,
1405     // we accumulate them in the local list and move them a few at a
1406     // time. This also cuts down on the number of notify_all() calls
1407     // we do during this process. We'll also append the local list when
1408     // _cleanup_list is empty (which means we just removed the last
1409     // region from the _cleanup_list).
1410     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1411         _cleanup_list.is_empty()) {
1412       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1413                                       "appending %u entries to the secondary_free_list, "
1414                                       "cleanup list still has %u entries",
1415                                       tmp_free_list.length(),
1416                                       _cleanup_list.length());
1417 
1418       {
1419         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1420         g1h->secondary_free_list_add(&tmp_free_list);
1421         SecondaryFreeList_lock->notify_all();
1422       }
1423 #ifndef PRODUCT
1424       if (G1StressConcRegionFreeing) {
1425         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1426           os::sleep(Thread::current(), (jlong) 1, false);
1427         }
1428       }
1429 #endif
1430     }
1431   }
1432   assert(tmp_free_list.is_empty(), "post-condition");
1433 }
1434 
1435 // Supporting Object and Oop closures for reference discovery
1436 // and processing in during marking
1437 
1438 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1439   HeapWord* addr = (HeapWord*)obj;
1440   return addr != NULL &&
1441          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1442 }
1443 
1444 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1445 // Uses the G1CMTask associated with a worker thread (for serial reference
1446 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1447 // trace referent objects.
1448 //
1449 // Using the G1CMTask and embedded local queues avoids having the worker
1450 // threads operating on the global mark stack. This reduces the risk
1451 // of overflowing the stack - which we would rather avoid at this late
1452 // state. Also using the tasks' local queues removes the potential
1453 // of the workers interfering with each other that could occur if
1454 // operating on the global stack.
1455 
1456 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1457   G1ConcurrentMark* _cm;
1458   G1CMTask*         _task;
1459   int               _ref_counter_limit;
1460   int               _ref_counter;
1461   bool              _is_serial;
1462  public:
1463   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1464     _cm(cm), _task(task), _is_serial(is_serial),
1465     _ref_counter_limit(G1RefProcDrainInterval) {
1466     assert(_ref_counter_limit > 0, "sanity");
1467     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1468     _ref_counter = _ref_counter_limit;
1469   }
1470 
1471   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1472   virtual void do_oop(      oop* p) { do_oop_work(p); }
1473 
1474   template <class T> void do_oop_work(T* p) {
1475     if (!_cm->has_overflown()) {
1476       oop obj = oopDesc::load_decode_heap_oop(p);
1477       _task->deal_with_reference(obj);
1478       _ref_counter--;
1479 
1480       if (_ref_counter == 0) {
1481         // We have dealt with _ref_counter_limit references, pushing them
1482         // and objects reachable from them on to the local stack (and
1483         // possibly the global stack). Call G1CMTask::do_marking_step() to
1484         // process these entries.
1485         //
1486         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1487         // there's nothing more to do (i.e. we're done with the entries that
1488         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1489         // above) or we overflow.
1490         //
1491         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1492         // flag while there may still be some work to do. (See the comment at
1493         // the beginning of G1CMTask::do_marking_step() for those conditions -
1494         // one of which is reaching the specified time target.) It is only
1495         // when G1CMTask::do_marking_step() returns without setting the
1496         // has_aborted() flag that the marking step has completed.
1497         do {
1498           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1499           _task->do_marking_step(mark_step_duration_ms,
1500                                  false      /* do_termination */,
1501                                  _is_serial);
1502         } while (_task->has_aborted() && !_cm->has_overflown());
1503         _ref_counter = _ref_counter_limit;
1504       }
1505     }
1506   }
1507 };
1508 
1509 // 'Drain' oop closure used by both serial and parallel reference processing.
1510 // Uses the G1CMTask associated with a given worker thread (for serial
1511 // reference processing the G1CMtask for worker 0 is used). Calls the
1512 // do_marking_step routine, with an unbelievably large timeout value,
1513 // to drain the marking data structures of the remaining entries
1514 // added by the 'keep alive' oop closure above.
1515 
1516 class G1CMDrainMarkingStackClosure: public VoidClosure {
1517   G1ConcurrentMark* _cm;
1518   G1CMTask*         _task;
1519   bool              _is_serial;
1520  public:
1521   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1522     _cm(cm), _task(task), _is_serial(is_serial) {
1523     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1524   }
1525 
1526   void do_void() {
1527     do {
1528       // We call G1CMTask::do_marking_step() to completely drain the local
1529       // and global marking stacks of entries pushed by the 'keep alive'
1530       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1531       //
1532       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1533       // if there's nothing more to do (i.e. we've completely drained the
1534       // entries that were pushed as a a result of applying the 'keep alive'
1535       // closure to the entries on the discovered ref lists) or we overflow
1536       // the global marking stack.
1537       //
1538       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1539       // flag while there may still be some work to do. (See the comment at
1540       // the beginning of G1CMTask::do_marking_step() for those conditions -
1541       // one of which is reaching the specified time target.) It is only
1542       // when G1CMTask::do_marking_step() returns without setting the
1543       // has_aborted() flag that the marking step has completed.
1544 
1545       _task->do_marking_step(1000000000.0 /* something very large */,
1546                              true         /* do_termination */,
1547                              _is_serial);
1548     } while (_task->has_aborted() && !_cm->has_overflown());
1549   }
1550 };
1551 
1552 // Implementation of AbstractRefProcTaskExecutor for parallel
1553 // reference processing at the end of G1 concurrent marking
1554 
1555 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1556 private:
1557   G1CollectedHeap*  _g1h;
1558   G1ConcurrentMark* _cm;
1559   WorkGang*         _workers;
1560   uint              _active_workers;
1561 
1562 public:
1563   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1564                           G1ConcurrentMark* cm,
1565                           WorkGang* workers,
1566                           uint n_workers) :
1567     _g1h(g1h), _cm(cm),
1568     _workers(workers), _active_workers(n_workers) { }
1569 
1570   // Executes the given task using concurrent marking worker threads.
1571   virtual void execute(ProcessTask& task);
1572   virtual void execute(EnqueueTask& task);
1573 };
1574 
1575 class G1CMRefProcTaskProxy: public AbstractGangTask {
1576   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1577   ProcessTask&      _proc_task;
1578   G1CollectedHeap*  _g1h;
1579   G1ConcurrentMark* _cm;
1580 
1581 public:
1582   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1583                        G1CollectedHeap* g1h,
1584                        G1ConcurrentMark* cm) :
1585     AbstractGangTask("Process reference objects in parallel"),
1586     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1587     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1588     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1589   }
1590 
1591   virtual void work(uint worker_id) {
1592     ResourceMark rm;
1593     HandleMark hm;
1594     G1CMTask* task = _cm->task(worker_id);
1595     G1CMIsAliveClosure g1_is_alive(_g1h);
1596     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1597     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1598 
1599     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1600   }
1601 };
1602 
1603 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1604   assert(_workers != NULL, "Need parallel worker threads.");
1605   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1606 
1607   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1608 
1609   // We need to reset the concurrency level before each
1610   // proxy task execution, so that the termination protocol
1611   // and overflow handling in G1CMTask::do_marking_step() knows
1612   // how many workers to wait for.
1613   _cm->set_concurrency(_active_workers);
1614   _workers->run_task(&proc_task_proxy);
1615 }
1616 
1617 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1618   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1619   EnqueueTask& _enq_task;
1620 
1621 public:
1622   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1623     AbstractGangTask("Enqueue reference objects in parallel"),
1624     _enq_task(enq_task) { }
1625 
1626   virtual void work(uint worker_id) {
1627     _enq_task.work(worker_id);
1628   }
1629 };
1630 
1631 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1632   assert(_workers != NULL, "Need parallel worker threads.");
1633   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1634 
1635   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1636 
1637   // Not strictly necessary but...
1638   //
1639   // We need to reset the concurrency level before each
1640   // proxy task execution, so that the termination protocol
1641   // and overflow handling in G1CMTask::do_marking_step() knows
1642   // how many workers to wait for.
1643   _cm->set_concurrency(_active_workers);
1644   _workers->run_task(&enq_task_proxy);
1645 }
1646 
1647 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1648   if (has_overflown()) {
1649     // Skip processing the discovered references if we have
1650     // overflown the global marking stack. Reference objects
1651     // only get discovered once so it is OK to not
1652     // de-populate the discovered reference lists. We could have,
1653     // but the only benefit would be that, when marking restarts,
1654     // less reference objects are discovered.
1655     return;
1656   }
1657 
1658   ResourceMark rm;
1659   HandleMark   hm;
1660 
1661   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1662 
1663   // Is alive closure.
1664   G1CMIsAliveClosure g1_is_alive(g1h);
1665 
1666   // Inner scope to exclude the cleaning of the string and symbol
1667   // tables from the displayed time.
1668   {
1669     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1670 
1671     ReferenceProcessor* rp = g1h->ref_processor_cm();
1672 
1673     // See the comment in G1CollectedHeap::ref_processing_init()
1674     // about how reference processing currently works in G1.
1675 
1676     // Set the soft reference policy
1677     rp->setup_policy(clear_all_soft_refs);
1678     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1679 
1680     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1681     // in serial reference processing. Note these closures are also
1682     // used for serially processing (by the the current thread) the
1683     // JNI references during parallel reference processing.
1684     //
1685     // These closures do not need to synchronize with the worker
1686     // threads involved in parallel reference processing as these
1687     // instances are executed serially by the current thread (e.g.
1688     // reference processing is not multi-threaded and is thus
1689     // performed by the current thread instead of a gang worker).
1690     //
1691     // The gang tasks involved in parallel reference processing create
1692     // their own instances of these closures, which do their own
1693     // synchronization among themselves.
1694     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1695     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1696 
1697     // We need at least one active thread. If reference processing
1698     // is not multi-threaded we use the current (VMThread) thread,
1699     // otherwise we use the work gang from the G1CollectedHeap and
1700     // we utilize all the worker threads we can.
1701     bool processing_is_mt = rp->processing_is_mt();
1702     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1703     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1704 
1705     // Parallel processing task executor.
1706     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1707                                               g1h->workers(), active_workers);
1708     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1709 
1710     // Set the concurrency level. The phase was already set prior to
1711     // executing the remark task.
1712     set_concurrency(active_workers);
1713 
1714     // Set the degree of MT processing here.  If the discovery was done MT,
1715     // the number of threads involved during discovery could differ from
1716     // the number of active workers.  This is OK as long as the discovered
1717     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1718     rp->set_active_mt_degree(active_workers);
1719 
1720     // Process the weak references.
1721     const ReferenceProcessorStats& stats =
1722         rp->process_discovered_references(&g1_is_alive,
1723                                           &g1_keep_alive,
1724                                           &g1_drain_mark_stack,
1725                                           executor,
1726                                           _gc_timer_cm);
1727     _gc_tracer_cm->report_gc_reference_stats(stats);
1728 
1729     // The do_oop work routines of the keep_alive and drain_marking_stack
1730     // oop closures will set the has_overflown flag if we overflow the
1731     // global marking stack.
1732 
1733     assert(has_overflown() || _global_mark_stack.is_empty(),
1734             "Mark stack should be empty (unless it has overflown)");
1735 
1736     assert(rp->num_q() == active_workers, "why not");
1737 
1738     rp->enqueue_discovered_references(executor);
1739 
1740     rp->verify_no_references_recorded();
1741     assert(!rp->discovery_enabled(), "Post condition");
1742   }
1743 
1744   if (has_overflown()) {
1745     // We can not trust g1_is_alive if the marking stack overflowed
1746     return;
1747   }
1748 
1749   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1750 
1751   // Unload Klasses, String, Symbols, Code Cache, etc.
1752   if (ClassUnloadingWithConcurrentMark) {
1753     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1754     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1755     g1h->complete_cleaning(&g1_is_alive, purged_classes);
1756   } else {
1757     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1758     // No need to clean string table and symbol table as they are treated as strong roots when
1759     // class unloading is disabled.
1760     g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1761 
1762   }
1763 }
1764 
1765 void G1ConcurrentMark::swapMarkBitMaps() {
1766   G1CMBitMapRO* temp = _prevMarkBitMap;
1767   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
1768   _nextMarkBitMap    = (G1CMBitMap*)  temp;
1769 }
1770 
1771 // Closure for marking entries in SATB buffers.
1772 class G1CMSATBBufferClosure : public SATBBufferClosure {
1773 private:
1774   G1CMTask* _task;
1775   G1CollectedHeap* _g1h;
1776 
1777   // This is very similar to G1CMTask::deal_with_reference, but with
1778   // more relaxed requirements for the argument, so this must be more
1779   // circumspect about treating the argument as an object.
1780   void do_entry(void* entry) const {
1781     _task->increment_refs_reached();
1782     HeapRegion* hr = _g1h->heap_region_containing(entry);
1783     if (entry < hr->next_top_at_mark_start()) {
1784       // Until we get here, we don't know whether entry refers to a valid
1785       // object; it could instead have been a stale reference.
1786       oop obj = static_cast<oop>(entry);
1787       assert(obj->is_oop(true /* ignore mark word */),
1788              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
1789       _task->make_reference_grey(obj);
1790     }
1791   }
1792 
1793 public:
1794   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1795     : _task(task), _g1h(g1h) { }
1796 
1797   virtual void do_buffer(void** buffer, size_t size) {
1798     for (size_t i = 0; i < size; ++i) {
1799       do_entry(buffer[i]);
1800     }
1801   }
1802 };
1803 
1804 class G1RemarkThreadsClosure : public ThreadClosure {
1805   G1CMSATBBufferClosure _cm_satb_cl;
1806   G1CMOopClosure _cm_cl;
1807   MarkingCodeBlobClosure _code_cl;
1808   int _thread_parity;
1809 
1810  public:
1811   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1812     _cm_satb_cl(task, g1h),
1813     _cm_cl(g1h, g1h->concurrent_mark(), task),
1814     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1815     _thread_parity(Threads::thread_claim_parity()) {}
1816 
1817   void do_thread(Thread* thread) {
1818     if (thread->is_Java_thread()) {
1819       if (thread->claim_oops_do(true, _thread_parity)) {
1820         JavaThread* jt = (JavaThread*)thread;
1821 
1822         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1823         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1824         // * Alive if on the stack of an executing method
1825         // * Weakly reachable otherwise
1826         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1827         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1828         jt->nmethods_do(&_code_cl);
1829 
1830         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1831       }
1832     } else if (thread->is_VM_thread()) {
1833       if (thread->claim_oops_do(true, _thread_parity)) {
1834         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1835       }
1836     }
1837   }
1838 };
1839 
1840 class G1CMRemarkTask: public AbstractGangTask {
1841 private:
1842   G1ConcurrentMark* _cm;
1843 public:
1844   void work(uint worker_id) {
1845     // Since all available tasks are actually started, we should
1846     // only proceed if we're supposed to be active.
1847     if (worker_id < _cm->active_tasks()) {
1848       G1CMTask* task = _cm->task(worker_id);
1849       task->record_start_time();
1850       {
1851         ResourceMark rm;
1852         HandleMark hm;
1853 
1854         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1855         Threads::threads_do(&threads_f);
1856       }
1857 
1858       do {
1859         task->do_marking_step(1000000000.0 /* something very large */,
1860                               true         /* do_termination       */,
1861                               false        /* is_serial            */);
1862       } while (task->has_aborted() && !_cm->has_overflown());
1863       // If we overflow, then we do not want to restart. We instead
1864       // want to abort remark and do concurrent marking again.
1865       task->record_end_time();
1866     }
1867   }
1868 
1869   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1870     AbstractGangTask("Par Remark"), _cm(cm) {
1871     _cm->terminator()->reset_for_reuse(active_workers);
1872   }
1873 };
1874 
1875 void G1ConcurrentMark::checkpointRootsFinalWork() {
1876   ResourceMark rm;
1877   HandleMark   hm;
1878   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1879 
1880   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1881 
1882   g1h->ensure_parsability(false);
1883 
1884   // this is remark, so we'll use up all active threads
1885   uint active_workers = g1h->workers()->active_workers();
1886   set_concurrency_and_phase(active_workers, false /* concurrent */);
1887   // Leave _parallel_marking_threads at it's
1888   // value originally calculated in the G1ConcurrentMark
1889   // constructor and pass values of the active workers
1890   // through the gang in the task.
1891 
1892   {
1893     StrongRootsScope srs(active_workers);
1894 
1895     G1CMRemarkTask remarkTask(this, active_workers);
1896     // We will start all available threads, even if we decide that the
1897     // active_workers will be fewer. The extra ones will just bail out
1898     // immediately.
1899     g1h->workers()->run_task(&remarkTask);
1900   }
1901 
1902   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1903   guarantee(has_overflown() ||
1904             satb_mq_set.completed_buffers_num() == 0,
1905             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1906             BOOL_TO_STR(has_overflown()),
1907             satb_mq_set.completed_buffers_num());
1908 
1909   print_stats();
1910 }
1911 
1912 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1913   // Note we are overriding the read-only view of the prev map here, via
1914   // the cast.
1915   ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr);
1916 }
1917 
1918 HeapRegion*
1919 G1ConcurrentMark::claim_region(uint worker_id) {
1920   // "checkpoint" the finger
1921   HeapWord* finger = _finger;
1922 
1923   // _heap_end will not change underneath our feet; it only changes at
1924   // yield points.
1925   while (finger < _heap_end) {
1926     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1927 
1928     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1929     // Make sure that the reads below do not float before loading curr_region.
1930     OrderAccess::loadload();
1931     // Above heap_region_containing may return NULL as we always scan claim
1932     // until the end of the heap. In this case, just jump to the next region.
1933     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1934 
1935     // Is the gap between reading the finger and doing the CAS too long?
1936     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1937     if (res == finger && curr_region != NULL) {
1938       // we succeeded
1939       HeapWord*   bottom        = curr_region->bottom();
1940       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1941 
1942       // notice that _finger == end cannot be guaranteed here since,
1943       // someone else might have moved the finger even further
1944       assert(_finger >= end, "the finger should have moved forward");
1945 
1946       if (limit > bottom) {
1947         return curr_region;
1948       } else {
1949         assert(limit == bottom,
1950                "the region limit should be at bottom");
1951         // we return NULL and the caller should try calling
1952         // claim_region() again.
1953         return NULL;
1954       }
1955     } else {
1956       assert(_finger > finger, "the finger should have moved forward");
1957       // read it again
1958       finger = _finger;
1959     }
1960   }
1961 
1962   return NULL;
1963 }
1964 
1965 #ifndef PRODUCT
1966 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1967 private:
1968   G1CollectedHeap* _g1h;
1969   const char* _phase;
1970   int _info;
1971 
1972 public:
1973   VerifyNoCSetOops(const char* phase, int info = -1) :
1974     _g1h(G1CollectedHeap::heap()),
1975     _phase(phase),
1976     _info(info)
1977   { }
1978 
1979   void operator()(G1TaskQueueEntry task_entry) const {
1980     if (task_entry.is_array_slice()) {
1981       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1982       return;
1983     }
1984     guarantee(task_entry.obj()->is_oop(),
1985               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1986               p2i(task_entry.obj()), _phase, _info);
1987     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1988               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1989               p2i(task_entry.obj()), _phase, _info);
1990   }
1991 };
1992 
1993 void G1ConcurrentMark::verify_no_cset_oops() {
1994   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1995   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1996     return;
1997   }
1998 
1999   // Verify entries on the global mark stack
2000   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2001 
2002   // Verify entries on the task queues
2003   for (uint i = 0; i < _max_worker_id; ++i) {
2004     G1CMTaskQueue* queue = _task_queues->queue(i);
2005     queue->iterate(VerifyNoCSetOops("Queue", i));
2006   }
2007 
2008   // Verify the global finger
2009   HeapWord* global_finger = finger();
2010   if (global_finger != NULL && global_finger < _heap_end) {
2011     // Since we always iterate over all regions, we might get a NULL HeapRegion
2012     // here.
2013     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2014     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2015               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2016               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2017   }
2018 
2019   // Verify the task fingers
2020   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2021   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2022     G1CMTask* task = _tasks[i];
2023     HeapWord* task_finger = task->finger();
2024     if (task_finger != NULL && task_finger < _heap_end) {
2025       // See above note on the global finger verification.
2026       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2027       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2028                 !task_hr->in_collection_set(),
2029                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2030                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2031     }
2032   }
2033 }
2034 #endif // PRODUCT
2035 void G1ConcurrentMark::create_live_data() {
2036   _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
2037 }
2038 
2039 void G1ConcurrentMark::finalize_live_data() {
2040   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
2041 }
2042 
2043 void G1ConcurrentMark::verify_live_data() {
2044   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
2045 }
2046 
2047 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
2048   _g1h->g1_rem_set()->clear_card_live_data(workers);
2049 }
2050 
2051 #ifdef ASSERT
2052 void G1ConcurrentMark::verify_live_data_clear() {
2053   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
2054 }
2055 #endif
2056 
2057 void G1ConcurrentMark::print_stats() {
2058   if (!log_is_enabled(Debug, gc, stats)) {
2059     return;
2060   }
2061   log_debug(gc, stats)("---------------------------------------------------------------------");
2062   for (size_t i = 0; i < _active_tasks; ++i) {
2063     _tasks[i]->print_stats();
2064     log_debug(gc, stats)("---------------------------------------------------------------------");
2065   }
2066 }
2067 
2068 void G1ConcurrentMark::abort() {
2069   if (!cmThread()->during_cycle() || _has_aborted) {
2070     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2071     return;
2072   }
2073 
2074   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2075   // concurrent bitmap clearing.
2076   {
2077     GCTraceTime(Debug, gc)("Clear Next Bitmap");
2078     clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2079   }
2080   // Note we cannot clear the previous marking bitmap here
2081   // since VerifyDuringGC verifies the objects marked during
2082   // a full GC against the previous bitmap.
2083 
2084   {
2085     GCTraceTime(Debug, gc)("Clear Live Data");
2086     clear_live_data(_g1h->workers());
2087   }
2088   DEBUG_ONLY({
2089     GCTraceTime(Debug, gc)("Verify Live Data Clear");
2090     verify_live_data_clear();
2091   })
2092   // Empty mark stack
2093   reset_marking_state();
2094   for (uint i = 0; i < _max_worker_id; ++i) {
2095     _tasks[i]->clear_region_fields();
2096   }
2097   _first_overflow_barrier_sync.abort();
2098   _second_overflow_barrier_sync.abort();
2099   _has_aborted = true;
2100 
2101   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2102   satb_mq_set.abandon_partial_marking();
2103   // This can be called either during or outside marking, we'll read
2104   // the expected_active value from the SATB queue set.
2105   satb_mq_set.set_active_all_threads(
2106                                  false, /* new active value */
2107                                  satb_mq_set.is_active() /* expected_active */);
2108 }
2109 
2110 static void print_ms_time_info(const char* prefix, const char* name,
2111                                NumberSeq& ns) {
2112   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2113                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2114   if (ns.num() > 0) {
2115     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2116                            prefix, ns.sd(), ns.maximum());
2117   }
2118 }
2119 
2120 void G1ConcurrentMark::print_summary_info() {
2121   Log(gc, marking) log;
2122   if (!log.is_trace()) {
2123     return;
2124   }
2125 
2126   log.trace(" Concurrent marking:");
2127   print_ms_time_info("  ", "init marks", _init_times);
2128   print_ms_time_info("  ", "remarks", _remark_times);
2129   {
2130     print_ms_time_info("     ", "final marks", _remark_mark_times);
2131     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2132 
2133   }
2134   print_ms_time_info("  ", "cleanups", _cleanup_times);
2135   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2136             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2137   if (G1ScrubRemSets) {
2138     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2139               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2140   }
2141   log.trace("  Total stop_world time = %8.2f s.",
2142             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2143   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2144             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2145 }
2146 
2147 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2148   _parallel_workers->print_worker_threads_on(st);
2149 }
2150 
2151 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2152   _parallel_workers->threads_do(tc);
2153 }
2154 
2155 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2156   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2157       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2158   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2159   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2160 }
2161 
2162 // Closure for iteration over bitmaps
2163 class G1CMBitMapClosure : public BitMapClosure {
2164 private:
2165   // the bitmap that is being iterated over
2166   G1CMBitMap*                 _nextMarkBitMap;
2167   G1ConcurrentMark*           _cm;
2168   G1CMTask*                   _task;
2169 
2170 public:
2171   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2172     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2173 
2174   bool do_bit(size_t offset) {
2175     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2176     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2177     assert( addr < _cm->finger(), "invariant");
2178     assert(addr >= _task->finger(), "invariant");
2179 
2180     // We move that task's local finger along.
2181     _task->move_finger_to(addr);
2182 
2183     _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
2184     // we only partially drain the local queue and global stack
2185     _task->drain_local_queue(true);
2186     _task->drain_global_stack(true);
2187 
2188     // if the has_aborted flag has been raised, we need to bail out of
2189     // the iteration
2190     return !_task->has_aborted();
2191   }
2192 };
2193 
2194 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2195   ReferenceProcessor* result = g1h->ref_processor_cm();
2196   assert(result != NULL, "CM reference processor should not be NULL");
2197   return result;
2198 }
2199 
2200 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2201                                G1ConcurrentMark* cm,
2202                                G1CMTask* task)
2203   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2204     _g1h(g1h), _cm(cm), _task(task)
2205 { }
2206 
2207 void G1CMTask::setup_for_region(HeapRegion* hr) {
2208   assert(hr != NULL,
2209         "claim_region() should have filtered out NULL regions");
2210   _curr_region  = hr;
2211   _finger       = hr->bottom();
2212   update_region_limit();
2213 }
2214 
2215 void G1CMTask::update_region_limit() {
2216   HeapRegion* hr            = _curr_region;
2217   HeapWord* bottom          = hr->bottom();
2218   HeapWord* limit           = hr->next_top_at_mark_start();
2219 
2220   if (limit == bottom) {
2221     // The region was collected underneath our feet.
2222     // We set the finger to bottom to ensure that the bitmap
2223     // iteration that will follow this will not do anything.
2224     // (this is not a condition that holds when we set the region up,
2225     // as the region is not supposed to be empty in the first place)
2226     _finger = bottom;
2227   } else if (limit >= _region_limit) {
2228     assert(limit >= _finger, "peace of mind");
2229   } else {
2230     assert(limit < _region_limit, "only way to get here");
2231     // This can happen under some pretty unusual circumstances.  An
2232     // evacuation pause empties the region underneath our feet (NTAMS
2233     // at bottom). We then do some allocation in the region (NTAMS
2234     // stays at bottom), followed by the region being used as a GC
2235     // alloc region (NTAMS will move to top() and the objects
2236     // originally below it will be grayed). All objects now marked in
2237     // the region are explicitly grayed, if below the global finger,
2238     // and we do not need in fact to scan anything else. So, we simply
2239     // set _finger to be limit to ensure that the bitmap iteration
2240     // doesn't do anything.
2241     _finger = limit;
2242   }
2243 
2244   _region_limit = limit;
2245 }
2246 
2247 void G1CMTask::giveup_current_region() {
2248   assert(_curr_region != NULL, "invariant");
2249   clear_region_fields();
2250 }
2251 
2252 void G1CMTask::clear_region_fields() {
2253   // Values for these three fields that indicate that we're not
2254   // holding on to a region.
2255   _curr_region   = NULL;
2256   _finger        = NULL;
2257   _region_limit  = NULL;
2258 }
2259 
2260 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2261   if (cm_oop_closure == NULL) {
2262     assert(_cm_oop_closure != NULL, "invariant");
2263   } else {
2264     assert(_cm_oop_closure == NULL, "invariant");
2265   }
2266   _cm_oop_closure = cm_oop_closure;
2267 }
2268 
2269 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2270   guarantee(nextMarkBitMap != NULL, "invariant");
2271   _nextMarkBitMap                = nextMarkBitMap;
2272   clear_region_fields();
2273 
2274   _calls                         = 0;
2275   _elapsed_time_ms               = 0.0;
2276   _termination_time_ms           = 0.0;
2277   _termination_start_time_ms     = 0.0;
2278 }
2279 
2280 bool G1CMTask::should_exit_termination() {
2281   regular_clock_call();
2282   // This is called when we are in the termination protocol. We should
2283   // quit if, for some reason, this task wants to abort or the global
2284   // stack is not empty (this means that we can get work from it).
2285   return !_cm->mark_stack_empty() || has_aborted();
2286 }
2287 
2288 void G1CMTask::reached_limit() {
2289   assert(_words_scanned >= _words_scanned_limit ||
2290          _refs_reached >= _refs_reached_limit ,
2291          "shouldn't have been called otherwise");
2292   regular_clock_call();
2293 }
2294 
2295 void G1CMTask::regular_clock_call() {
2296   if (has_aborted()) return;
2297 
2298   // First, we need to recalculate the words scanned and refs reached
2299   // limits for the next clock call.
2300   recalculate_limits();
2301 
2302   // During the regular clock call we do the following
2303 
2304   // (1) If an overflow has been flagged, then we abort.
2305   if (_cm->has_overflown()) {
2306     set_has_aborted();
2307     return;
2308   }
2309 
2310   // If we are not concurrent (i.e. we're doing remark) we don't need
2311   // to check anything else. The other steps are only needed during
2312   // the concurrent marking phase.
2313   if (!concurrent()) return;
2314 
2315   // (2) If marking has been aborted for Full GC, then we also abort.
2316   if (_cm->has_aborted()) {
2317     set_has_aborted();
2318     return;
2319   }
2320 
2321   double curr_time_ms = os::elapsedVTime() * 1000.0;
2322 
2323   // (4) We check whether we should yield. If we have to, then we abort.
2324   if (SuspendibleThreadSet::should_yield()) {
2325     // We should yield. To do this we abort the task. The caller is
2326     // responsible for yielding.
2327     set_has_aborted();
2328     return;
2329   }
2330 
2331   // (5) We check whether we've reached our time quota. If we have,
2332   // then we abort.
2333   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2334   if (elapsed_time_ms > _time_target_ms) {
2335     set_has_aborted();
2336     _has_timed_out = true;
2337     return;
2338   }
2339 
2340   // (6) Finally, we check whether there are enough completed STAB
2341   // buffers available for processing. If there are, we abort.
2342   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2343   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2344     // we do need to process SATB buffers, we'll abort and restart
2345     // the marking task to do so
2346     set_has_aborted();
2347     return;
2348   }
2349 }
2350 
2351 void G1CMTask::recalculate_limits() {
2352   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2353   _words_scanned_limit      = _real_words_scanned_limit;
2354 
2355   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2356   _refs_reached_limit       = _real_refs_reached_limit;
2357 }
2358 
2359 void G1CMTask::decrease_limits() {
2360   // This is called when we believe that we're going to do an infrequent
2361   // operation which will increase the per byte scanned cost (i.e. move
2362   // entries to/from the global stack). It basically tries to decrease the
2363   // scanning limit so that the clock is called earlier.
2364 
2365   _words_scanned_limit = _real_words_scanned_limit -
2366     3 * words_scanned_period / 4;
2367   _refs_reached_limit  = _real_refs_reached_limit -
2368     3 * refs_reached_period / 4;
2369 }
2370 
2371 void G1CMTask::move_entries_to_global_stack() {
2372   // Local array where we'll store the entries that will be popped
2373   // from the local queue.
2374   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2375 
2376   size_t n = 0;
2377   G1TaskQueueEntry task_entry;
2378   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2379     buffer[n] = task_entry;
2380     ++n;
2381   }
2382   if (n < G1CMMarkStack::EntriesPerChunk) {
2383     buffer[n] = G1TaskQueueEntry();
2384   }
2385 
2386   if (n > 0) {
2387     if (!_cm->mark_stack_push(buffer)) {
2388       set_has_aborted();
2389     }
2390   }
2391 
2392   // This operation was quite expensive, so decrease the limits.
2393   decrease_limits();
2394 }
2395 
2396 bool G1CMTask::get_entries_from_global_stack() {
2397   // Local array where we'll store the entries that will be popped
2398   // from the global stack.
2399   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2400 
2401   if (!_cm->mark_stack_pop(buffer)) {
2402     return false;
2403   }
2404 
2405   // We did actually pop at least one entry.
2406   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2407     G1TaskQueueEntry task_entry = buffer[i];
2408     if (task_entry.is_null()) {
2409       break;
2410     }
2411     assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2412     bool success = _task_queue->push(task_entry);
2413     // We only call this when the local queue is empty or under a
2414     // given target limit. So, we do not expect this push to fail.
2415     assert(success, "invariant");
2416   }
2417 
2418   // This operation was quite expensive, so decrease the limits
2419   decrease_limits();
2420   return true;
2421 }
2422 
2423 void G1CMTask::drain_local_queue(bool partially) {
2424   if (has_aborted()) {
2425     return;
2426   }
2427 
2428   // Decide what the target size is, depending whether we're going to
2429   // drain it partially (so that other tasks can steal if they run out
2430   // of things to do) or totally (at the very end).
2431   size_t target_size;
2432   if (partially) {
2433     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2434   } else {
2435     target_size = 0;
2436   }
2437 
2438   if (_task_queue->size() > target_size) {
2439     G1TaskQueueEntry entry;
2440     bool ret = _task_queue->pop_local(entry);
2441     while (ret) {
2442       scan_task_entry(entry);
2443       if (_task_queue->size() <= target_size || has_aborted()) {
2444         ret = false;
2445       } else {
2446         ret = _task_queue->pop_local(entry);
2447       }
2448     }
2449   }
2450 }
2451 
2452 void G1CMTask::drain_global_stack(bool partially) {
2453   if (has_aborted()) return;
2454 
2455   // We have a policy to drain the local queue before we attempt to
2456   // drain the global stack.
2457   assert(partially || _task_queue->size() == 0, "invariant");
2458 
2459   // Decide what the target size is, depending whether we're going to
2460   // drain it partially (so that other tasks can steal if they run out
2461   // of things to do) or totally (at the very end).
2462   // Notice that when draining the global mark stack partially, due to the racyness
2463   // of the mark stack size update we might in fact drop below the target. But,
2464   // this is not a problem.
2465   // In case of total draining, we simply process until the global mark stack is
2466   // totally empty, disregarding the size counter.
2467   if (partially) {
2468     size_t const target_size = _cm->partial_mark_stack_size_target();
2469     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2470       if (get_entries_from_global_stack()) {
2471         drain_local_queue(partially);
2472       }
2473     }
2474   } else {
2475     while (!has_aborted() && get_entries_from_global_stack()) {
2476       drain_local_queue(partially);
2477     }
2478   }
2479 }
2480 
2481 // SATB Queue has several assumptions on whether to call the par or
2482 // non-par versions of the methods. this is why some of the code is
2483 // replicated. We should really get rid of the single-threaded version
2484 // of the code to simplify things.
2485 void G1CMTask::drain_satb_buffers() {
2486   if (has_aborted()) return;
2487 
2488   // We set this so that the regular clock knows that we're in the
2489   // middle of draining buffers and doesn't set the abort flag when it
2490   // notices that SATB buffers are available for draining. It'd be
2491   // very counter productive if it did that. :-)
2492   _draining_satb_buffers = true;
2493 
2494   G1CMSATBBufferClosure satb_cl(this, _g1h);
2495   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2496 
2497   // This keeps claiming and applying the closure to completed buffers
2498   // until we run out of buffers or we need to abort.
2499   while (!has_aborted() &&
2500          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2501     regular_clock_call();
2502   }
2503 
2504   _draining_satb_buffers = false;
2505 
2506   assert(has_aborted() ||
2507          concurrent() ||
2508          satb_mq_set.completed_buffers_num() == 0, "invariant");
2509 
2510   // again, this was a potentially expensive operation, decrease the
2511   // limits to get the regular clock call early
2512   decrease_limits();
2513 }
2514 
2515 void G1CMTask::print_stats() {
2516   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2517                        _worker_id, _calls);
2518   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2519                        _elapsed_time_ms, _termination_time_ms);
2520   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2521                        _step_times_ms.num(), _step_times_ms.avg(),
2522                        _step_times_ms.sd());
2523   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2524                        _step_times_ms.maximum(), _step_times_ms.sum());
2525 }
2526 
2527 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2528   return _task_queues->steal(worker_id, hash_seed, task_entry);
2529 }
2530 
2531 /*****************************************************************************
2532 
2533     The do_marking_step(time_target_ms, ...) method is the building
2534     block of the parallel marking framework. It can be called in parallel
2535     with other invocations of do_marking_step() on different tasks
2536     (but only one per task, obviously) and concurrently with the
2537     mutator threads, or during remark, hence it eliminates the need
2538     for two versions of the code. When called during remark, it will
2539     pick up from where the task left off during the concurrent marking
2540     phase. Interestingly, tasks are also claimable during evacuation
2541     pauses too, since do_marking_step() ensures that it aborts before
2542     it needs to yield.
2543 
2544     The data structures that it uses to do marking work are the
2545     following:
2546 
2547       (1) Marking Bitmap. If there are gray objects that appear only
2548       on the bitmap (this happens either when dealing with an overflow
2549       or when the initial marking phase has simply marked the roots
2550       and didn't push them on the stack), then tasks claim heap
2551       regions whose bitmap they then scan to find gray objects. A
2552       global finger indicates where the end of the last claimed region
2553       is. A local finger indicates how far into the region a task has
2554       scanned. The two fingers are used to determine how to gray an
2555       object (i.e. whether simply marking it is OK, as it will be
2556       visited by a task in the future, or whether it needs to be also
2557       pushed on a stack).
2558 
2559       (2) Local Queue. The local queue of the task which is accessed
2560       reasonably efficiently by the task. Other tasks can steal from
2561       it when they run out of work. Throughout the marking phase, a
2562       task attempts to keep its local queue short but not totally
2563       empty, so that entries are available for stealing by other
2564       tasks. Only when there is no more work, a task will totally
2565       drain its local queue.
2566 
2567       (3) Global Mark Stack. This handles local queue overflow. During
2568       marking only sets of entries are moved between it and the local
2569       queues, as access to it requires a mutex and more fine-grain
2570       interaction with it which might cause contention. If it
2571       overflows, then the marking phase should restart and iterate
2572       over the bitmap to identify gray objects. Throughout the marking
2573       phase, tasks attempt to keep the global mark stack at a small
2574       length but not totally empty, so that entries are available for
2575       popping by other tasks. Only when there is no more work, tasks
2576       will totally drain the global mark stack.
2577 
2578       (4) SATB Buffer Queue. This is where completed SATB buffers are
2579       made available. Buffers are regularly removed from this queue
2580       and scanned for roots, so that the queue doesn't get too
2581       long. During remark, all completed buffers are processed, as
2582       well as the filled in parts of any uncompleted buffers.
2583 
2584     The do_marking_step() method tries to abort when the time target
2585     has been reached. There are a few other cases when the
2586     do_marking_step() method also aborts:
2587 
2588       (1) When the marking phase has been aborted (after a Full GC).
2589 
2590       (2) When a global overflow (on the global stack) has been
2591       triggered. Before the task aborts, it will actually sync up with
2592       the other tasks to ensure that all the marking data structures
2593       (local queues, stacks, fingers etc.)  are re-initialized so that
2594       when do_marking_step() completes, the marking phase can
2595       immediately restart.
2596 
2597       (3) When enough completed SATB buffers are available. The
2598       do_marking_step() method only tries to drain SATB buffers right
2599       at the beginning. So, if enough buffers are available, the
2600       marking step aborts and the SATB buffers are processed at
2601       the beginning of the next invocation.
2602 
2603       (4) To yield. when we have to yield then we abort and yield
2604       right at the end of do_marking_step(). This saves us from a lot
2605       of hassle as, by yielding we might allow a Full GC. If this
2606       happens then objects will be compacted underneath our feet, the
2607       heap might shrink, etc. We save checking for this by just
2608       aborting and doing the yield right at the end.
2609 
2610     From the above it follows that the do_marking_step() method should
2611     be called in a loop (or, otherwise, regularly) until it completes.
2612 
2613     If a marking step completes without its has_aborted() flag being
2614     true, it means it has completed the current marking phase (and
2615     also all other marking tasks have done so and have all synced up).
2616 
2617     A method called regular_clock_call() is invoked "regularly" (in
2618     sub ms intervals) throughout marking. It is this clock method that
2619     checks all the abort conditions which were mentioned above and
2620     decides when the task should abort. A work-based scheme is used to
2621     trigger this clock method: when the number of object words the
2622     marking phase has scanned or the number of references the marking
2623     phase has visited reach a given limit. Additional invocations to
2624     the method clock have been planted in a few other strategic places
2625     too. The initial reason for the clock method was to avoid calling
2626     vtime too regularly, as it is quite expensive. So, once it was in
2627     place, it was natural to piggy-back all the other conditions on it
2628     too and not constantly check them throughout the code.
2629 
2630     If do_termination is true then do_marking_step will enter its
2631     termination protocol.
2632 
2633     The value of is_serial must be true when do_marking_step is being
2634     called serially (i.e. by the VMThread) and do_marking_step should
2635     skip any synchronization in the termination and overflow code.
2636     Examples include the serial remark code and the serial reference
2637     processing closures.
2638 
2639     The value of is_serial must be false when do_marking_step is
2640     being called by any of the worker threads in a work gang.
2641     Examples include the concurrent marking code (CMMarkingTask),
2642     the MT remark code, and the MT reference processing closures.
2643 
2644  *****************************************************************************/
2645 
2646 void G1CMTask::do_marking_step(double time_target_ms,
2647                                bool do_termination,
2648                                bool is_serial) {
2649   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2650   assert(concurrent() == _cm->concurrent(), "they should be the same");
2651 
2652   G1Policy* g1_policy = _g1h->g1_policy();
2653   assert(_task_queues != NULL, "invariant");
2654   assert(_task_queue != NULL, "invariant");
2655   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2656 
2657   assert(!_claimed,
2658          "only one thread should claim this task at any one time");
2659 
2660   // OK, this doesn't safeguard again all possible scenarios, as it is
2661   // possible for two threads to set the _claimed flag at the same
2662   // time. But it is only for debugging purposes anyway and it will
2663   // catch most problems.
2664   _claimed = true;
2665 
2666   _start_time_ms = os::elapsedVTime() * 1000.0;
2667 
2668   // If do_stealing is true then do_marking_step will attempt to
2669   // steal work from the other G1CMTasks. It only makes sense to
2670   // enable stealing when the termination protocol is enabled
2671   // and do_marking_step() is not being called serially.
2672   bool do_stealing = do_termination && !is_serial;
2673 
2674   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2675   _time_target_ms = time_target_ms - diff_prediction_ms;
2676 
2677   // set up the variables that are used in the work-based scheme to
2678   // call the regular clock method
2679   _words_scanned = 0;
2680   _refs_reached  = 0;
2681   recalculate_limits();
2682 
2683   // clear all flags
2684   clear_has_aborted();
2685   _has_timed_out = false;
2686   _draining_satb_buffers = false;
2687 
2688   ++_calls;
2689 
2690   // Set up the bitmap and oop closures. Anything that uses them is
2691   // eventually called from this method, so it is OK to allocate these
2692   // statically.
2693   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
2694   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2695   set_cm_oop_closure(&cm_oop_closure);
2696 
2697   if (_cm->has_overflown()) {
2698     // This can happen if the mark stack overflows during a GC pause
2699     // and this task, after a yield point, restarts. We have to abort
2700     // as we need to get into the overflow protocol which happens
2701     // right at the end of this task.
2702     set_has_aborted();
2703   }
2704 
2705   // First drain any available SATB buffers. After this, we will not
2706   // look at SATB buffers before the next invocation of this method.
2707   // If enough completed SATB buffers are queued up, the regular clock
2708   // will abort this task so that it restarts.
2709   drain_satb_buffers();
2710   // ...then partially drain the local queue and the global stack
2711   drain_local_queue(true);
2712   drain_global_stack(true);
2713 
2714   do {
2715     if (!has_aborted() && _curr_region != NULL) {
2716       // This means that we're already holding on to a region.
2717       assert(_finger != NULL, "if region is not NULL, then the finger "
2718              "should not be NULL either");
2719 
2720       // We might have restarted this task after an evacuation pause
2721       // which might have evacuated the region we're holding on to
2722       // underneath our feet. Let's read its limit again to make sure
2723       // that we do not iterate over a region of the heap that
2724       // contains garbage (update_region_limit() will also move
2725       // _finger to the start of the region if it is found empty).
2726       update_region_limit();
2727       // We will start from _finger not from the start of the region,
2728       // as we might be restarting this task after aborting half-way
2729       // through scanning this region. In this case, _finger points to
2730       // the address where we last found a marked object. If this is a
2731       // fresh region, _finger points to start().
2732       MemRegion mr = MemRegion(_finger, _region_limit);
2733 
2734       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2735              "humongous regions should go around loop once only");
2736 
2737       // Some special cases:
2738       // If the memory region is empty, we can just give up the region.
2739       // If the current region is humongous then we only need to check
2740       // the bitmap for the bit associated with the start of the object,
2741       // scan the object if it's live, and give up the region.
2742       // Otherwise, let's iterate over the bitmap of the part of the region
2743       // that is left.
2744       // If the iteration is successful, give up the region.
2745       if (mr.is_empty()) {
2746         giveup_current_region();
2747         regular_clock_call();
2748       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2749         if (_nextMarkBitMap->isMarked(mr.start())) {
2750           // The object is marked - apply the closure
2751           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
2752           bitmap_closure.do_bit(offset);
2753         }
2754         // Even if this task aborted while scanning the humongous object
2755         // we can (and should) give up the current region.
2756         giveup_current_region();
2757         regular_clock_call();
2758       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2759         giveup_current_region();
2760         regular_clock_call();
2761       } else {
2762         assert(has_aborted(), "currently the only way to do so");
2763         // The only way to abort the bitmap iteration is to return
2764         // false from the do_bit() method. However, inside the
2765         // do_bit() method we move the _finger to point to the
2766         // object currently being looked at. So, if we bail out, we
2767         // have definitely set _finger to something non-null.
2768         assert(_finger != NULL, "invariant");
2769 
2770         // Region iteration was actually aborted. So now _finger
2771         // points to the address of the object we last scanned. If we
2772         // leave it there, when we restart this task, we will rescan
2773         // the object. It is easy to avoid this. We move the finger by
2774         // enough to point to the next possible object header (the
2775         // bitmap knows by how much we need to move it as it knows its
2776         // granularity).
2777         assert(_finger < _region_limit, "invariant");
2778         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
2779         // Check if bitmap iteration was aborted while scanning the last object
2780         if (new_finger >= _region_limit) {
2781           giveup_current_region();
2782         } else {
2783           move_finger_to(new_finger);
2784         }
2785       }
2786     }
2787     // At this point we have either completed iterating over the
2788     // region we were holding on to, or we have aborted.
2789 
2790     // We then partially drain the local queue and the global stack.
2791     // (Do we really need this?)
2792     drain_local_queue(true);
2793     drain_global_stack(true);
2794 
2795     // Read the note on the claim_region() method on why it might
2796     // return NULL with potentially more regions available for
2797     // claiming and why we have to check out_of_regions() to determine
2798     // whether we're done or not.
2799     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2800       // We are going to try to claim a new region. We should have
2801       // given up on the previous one.
2802       // Separated the asserts so that we know which one fires.
2803       assert(_curr_region  == NULL, "invariant");
2804       assert(_finger       == NULL, "invariant");
2805       assert(_region_limit == NULL, "invariant");
2806       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2807       if (claimed_region != NULL) {
2808         // Yes, we managed to claim one
2809         setup_for_region(claimed_region);
2810         assert(_curr_region == claimed_region, "invariant");
2811       }
2812       // It is important to call the regular clock here. It might take
2813       // a while to claim a region if, for example, we hit a large
2814       // block of empty regions. So we need to call the regular clock
2815       // method once round the loop to make sure it's called
2816       // frequently enough.
2817       regular_clock_call();
2818     }
2819 
2820     if (!has_aborted() && _curr_region == NULL) {
2821       assert(_cm->out_of_regions(),
2822              "at this point we should be out of regions");
2823     }
2824   } while ( _curr_region != NULL && !has_aborted());
2825 
2826   if (!has_aborted()) {
2827     // We cannot check whether the global stack is empty, since other
2828     // tasks might be pushing objects to it concurrently.
2829     assert(_cm->out_of_regions(),
2830            "at this point we should be out of regions");
2831     // Try to reduce the number of available SATB buffers so that
2832     // remark has less work to do.
2833     drain_satb_buffers();
2834   }
2835 
2836   // Since we've done everything else, we can now totally drain the
2837   // local queue and global stack.
2838   drain_local_queue(false);
2839   drain_global_stack(false);
2840 
2841   // Attempt at work stealing from other task's queues.
2842   if (do_stealing && !has_aborted()) {
2843     // We have not aborted. This means that we have finished all that
2844     // we could. Let's try to do some stealing...
2845 
2846     // We cannot check whether the global stack is empty, since other
2847     // tasks might be pushing objects to it concurrently.
2848     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2849            "only way to reach here");
2850     while (!has_aborted()) {
2851       G1TaskQueueEntry entry;
2852       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2853         scan_task_entry(entry);
2854 
2855         // And since we're towards the end, let's totally drain the
2856         // local queue and global stack.
2857         drain_local_queue(false);
2858         drain_global_stack(false);
2859       } else {
2860         break;
2861       }
2862     }
2863   }
2864 
2865   // We still haven't aborted. Now, let's try to get into the
2866   // termination protocol.
2867   if (do_termination && !has_aborted()) {
2868     // We cannot check whether the global stack is empty, since other
2869     // tasks might be concurrently pushing objects on it.
2870     // Separated the asserts so that we know which one fires.
2871     assert(_cm->out_of_regions(), "only way to reach here");
2872     assert(_task_queue->size() == 0, "only way to reach here");
2873     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2874 
2875     // The G1CMTask class also extends the TerminatorTerminator class,
2876     // hence its should_exit_termination() method will also decide
2877     // whether to exit the termination protocol or not.
2878     bool finished = (is_serial ||
2879                      _cm->terminator()->offer_termination(this));
2880     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2881     _termination_time_ms +=
2882       termination_end_time_ms - _termination_start_time_ms;
2883 
2884     if (finished) {
2885       // We're all done.
2886 
2887       if (_worker_id == 0) {
2888         // let's allow task 0 to do this
2889         if (concurrent()) {
2890           assert(_cm->concurrent_marking_in_progress(), "invariant");
2891           // we need to set this to false before the next
2892           // safepoint. This way we ensure that the marking phase
2893           // doesn't observe any more heap expansions.
2894           _cm->clear_concurrent_marking_in_progress();
2895         }
2896       }
2897 
2898       // We can now guarantee that the global stack is empty, since
2899       // all other tasks have finished. We separated the guarantees so
2900       // that, if a condition is false, we can immediately find out
2901       // which one.
2902       guarantee(_cm->out_of_regions(), "only way to reach here");
2903       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2904       guarantee(_task_queue->size() == 0, "only way to reach here");
2905       guarantee(!_cm->has_overflown(), "only way to reach here");
2906     } else {
2907       // Apparently there's more work to do. Let's abort this task. It
2908       // will restart it and we can hopefully find more things to do.
2909       set_has_aborted();
2910     }
2911   }
2912 
2913   // Mainly for debugging purposes to make sure that a pointer to the
2914   // closure which was statically allocated in this frame doesn't
2915   // escape it by accident.
2916   set_cm_oop_closure(NULL);
2917   double end_time_ms = os::elapsedVTime() * 1000.0;
2918   double elapsed_time_ms = end_time_ms - _start_time_ms;
2919   // Update the step history.
2920   _step_times_ms.add(elapsed_time_ms);
2921 
2922   if (has_aborted()) {
2923     // The task was aborted for some reason.
2924     if (_has_timed_out) {
2925       double diff_ms = elapsed_time_ms - _time_target_ms;
2926       // Keep statistics of how well we did with respect to hitting
2927       // our target only if we actually timed out (if we aborted for
2928       // other reasons, then the results might get skewed).
2929       _marking_step_diffs_ms.add(diff_ms);
2930     }
2931 
2932     if (_cm->has_overflown()) {
2933       // This is the interesting one. We aborted because a global
2934       // overflow was raised. This means we have to restart the
2935       // marking phase and start iterating over regions. However, in
2936       // order to do this we have to make sure that all tasks stop
2937       // what they are doing and re-initialize in a safe manner. We
2938       // will achieve this with the use of two barrier sync points.
2939 
2940       if (!is_serial) {
2941         // We only need to enter the sync barrier if being called
2942         // from a parallel context
2943         _cm->enter_first_sync_barrier(_worker_id);
2944 
2945         // When we exit this sync barrier we know that all tasks have
2946         // stopped doing marking work. So, it's now safe to
2947         // re-initialize our data structures. At the end of this method,
2948         // task 0 will clear the global data structures.
2949       }
2950 
2951       // We clear the local state of this task...
2952       clear_region_fields();
2953 
2954       if (!is_serial) {
2955         // ...and enter the second barrier.
2956         _cm->enter_second_sync_barrier(_worker_id);
2957       }
2958       // At this point, if we're during the concurrent phase of
2959       // marking, everything has been re-initialized and we're
2960       // ready to restart.
2961     }
2962   }
2963 
2964   _claimed = false;
2965 }
2966 
2967 G1CMTask::G1CMTask(uint worker_id,
2968                    G1ConcurrentMark* cm,
2969                    G1CMTaskQueue* task_queue,
2970                    G1CMTaskQueueSet* task_queues)
2971   : _g1h(G1CollectedHeap::heap()),
2972     _worker_id(worker_id), _cm(cm),
2973     _objArray_processor(this),
2974     _claimed(false),
2975     _nextMarkBitMap(NULL), _hash_seed(17),
2976     _task_queue(task_queue),
2977     _task_queues(task_queues),
2978     _cm_oop_closure(NULL) {
2979   guarantee(task_queue != NULL, "invariant");
2980   guarantee(task_queues != NULL, "invariant");
2981 
2982   _marking_step_diffs_ms.add(0.5);
2983 }
2984 
2985 // These are formatting macros that are used below to ensure
2986 // consistent formatting. The *_H_* versions are used to format the
2987 // header for a particular value and they should be kept consistent
2988 // with the corresponding macro. Also note that most of the macros add
2989 // the necessary white space (as a prefix) which makes them a bit
2990 // easier to compose.
2991 
2992 // All the output lines are prefixed with this string to be able to
2993 // identify them easily in a large log file.
2994 #define G1PPRL_LINE_PREFIX            "###"
2995 
2996 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2997 #ifdef _LP64
2998 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2999 #else // _LP64
3000 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3001 #endif // _LP64
3002 
3003 // For per-region info
3004 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3005 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3006 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3007 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3008 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3009 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3010 
3011 // For summary info
3012 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3013 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3014 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3015 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3016 
3017 G1PrintRegionLivenessInfoClosure::
3018 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3019   : _total_used_bytes(0), _total_capacity_bytes(0),
3020     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3021     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3022   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3023   MemRegion g1_reserved = g1h->g1_reserved();
3024   double now = os::elapsedTime();
3025 
3026   // Print the header of the output.
3027   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3028   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3029                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3030                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3031                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3032                           HeapRegion::GrainBytes);
3033   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3034   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3035                           G1PPRL_TYPE_H_FORMAT
3036                           G1PPRL_ADDR_BASE_H_FORMAT
3037                           G1PPRL_BYTE_H_FORMAT
3038                           G1PPRL_BYTE_H_FORMAT
3039                           G1PPRL_BYTE_H_FORMAT
3040                           G1PPRL_DOUBLE_H_FORMAT
3041                           G1PPRL_BYTE_H_FORMAT
3042                           G1PPRL_BYTE_H_FORMAT,
3043                           "type", "address-range",
3044                           "used", "prev-live", "next-live", "gc-eff",
3045                           "remset", "code-roots");
3046   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3047                           G1PPRL_TYPE_H_FORMAT
3048                           G1PPRL_ADDR_BASE_H_FORMAT
3049                           G1PPRL_BYTE_H_FORMAT
3050                           G1PPRL_BYTE_H_FORMAT
3051                           G1PPRL_BYTE_H_FORMAT
3052                           G1PPRL_DOUBLE_H_FORMAT
3053                           G1PPRL_BYTE_H_FORMAT
3054                           G1PPRL_BYTE_H_FORMAT,
3055                           "", "",
3056                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3057                           "(bytes)", "(bytes)");
3058 }
3059 
3060 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3061   const char* type       = r->get_type_str();
3062   HeapWord* bottom       = r->bottom();
3063   HeapWord* end          = r->end();
3064   size_t capacity_bytes  = r->capacity();
3065   size_t used_bytes      = r->used();
3066   size_t prev_live_bytes = r->live_bytes();
3067   size_t next_live_bytes = r->next_live_bytes();
3068   double gc_eff          = r->gc_efficiency();
3069   size_t remset_bytes    = r->rem_set()->mem_size();
3070   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3071 
3072   _total_used_bytes      += used_bytes;
3073   _total_capacity_bytes  += capacity_bytes;
3074   _total_prev_live_bytes += prev_live_bytes;
3075   _total_next_live_bytes += next_live_bytes;
3076   _total_remset_bytes    += remset_bytes;
3077   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3078 
3079   // Print a line for this particular region.
3080   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3081                           G1PPRL_TYPE_FORMAT
3082                           G1PPRL_ADDR_BASE_FORMAT
3083                           G1PPRL_BYTE_FORMAT
3084                           G1PPRL_BYTE_FORMAT
3085                           G1PPRL_BYTE_FORMAT
3086                           G1PPRL_DOUBLE_FORMAT
3087                           G1PPRL_BYTE_FORMAT
3088                           G1PPRL_BYTE_FORMAT,
3089                           type, p2i(bottom), p2i(end),
3090                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3091                           remset_bytes, strong_code_roots_bytes);
3092 
3093   return false;
3094 }
3095 
3096 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3097   // add static memory usages to remembered set sizes
3098   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3099   // Print the footer of the output.
3100   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3101   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3102                          " SUMMARY"
3103                          G1PPRL_SUM_MB_FORMAT("capacity")
3104                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3105                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3106                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3107                          G1PPRL_SUM_MB_FORMAT("remset")
3108                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3109                          bytes_to_mb(_total_capacity_bytes),
3110                          bytes_to_mb(_total_used_bytes),
3111                          perc(_total_used_bytes, _total_capacity_bytes),
3112                          bytes_to_mb(_total_prev_live_bytes),
3113                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3114                          bytes_to_mb(_total_next_live_bytes),
3115                          perc(_total_next_live_bytes, _total_capacity_bytes),
3116                          bytes_to_mb(_total_remset_bytes),
3117                          bytes_to_mb(_total_strong_code_roots_bytes));
3118 }