1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 #include "utilities/growableArray.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                  const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   assert(limit != NULL, "limit must not be NULL");
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 #ifndef PRODUCT
  88 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  89   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  90   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  91          "size inconsistency");
  92   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  93          _bmWordSize  == heap_rs.word_size();
  94 }
  95 #endif
  96 
  97 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  98   _bm.print_on_error(st, prefix);
  99 }
 100 
 101 size_t G1CMBitMap::compute_size(size_t heap_size) {
 102   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 103 }
 104 
 105 size_t G1CMBitMap::mark_distance() {
 106   return MinObjAlignmentInBytes * BitsPerByte;
 107 }
 108 
 109 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 110   _bmStartWord = heap.start();
 111   _bmWordSize = heap.word_size();
 112 
 113   _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter);
 114 
 115   storage->set_mapping_changed_listener(&_listener);
 116 }
 117 
 118 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 119   if (zero_filled) {
 120     return;
 121   }
 122   // We need to clear the bitmap on commit, removing any existing information.
 123   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 124   _bm->clear_range(mr);
 125 }
 126 
 127 void G1CMBitMap::clear_range(MemRegion mr) {
 128   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 129   assert(!mr.is_empty(), "unexpected empty region");
 130   // convert address range into offset range
 131   _bm.at_put_range(heapWordToOffset(mr.start()),
 132                    heapWordToOffset(mr.end()), false);
 133 }
 134 
 135 G1CMMarkStack::G1CMMarkStack() :
 136   _reserved_space(),
 137   _base(NULL),
 138   _capacity(0),
 139   _saved_index((size_t)AllBits),
 140   _should_expand(false) {
 141   set_empty();
 142 }
 143 
 144 bool G1CMMarkStack::resize(size_t new_capacity) {
 145   assert(is_empty(), "Only resize when stack is empty.");
 146   assert(new_capacity <= MarkStackSizeMax,
 147          "Trying to resize stack to " SIZE_FORMAT " elements when the maximum is " SIZE_FORMAT, new_capacity, MarkStackSizeMax);
 148 
 149   size_t reservation_size = ReservedSpace::allocation_align_size_up(new_capacity * sizeof(oop));
 150 
 151   ReservedSpace rs(reservation_size);
 152   if (!rs.is_reserved()) {
 153     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " elements and size " SIZE_FORMAT "B.", new_capacity, reservation_size);
 154     return false;
 155   }
 156 
 157   VirtualSpace vs;
 158 
 159   if (!vs.initialize(rs, rs.size())) {
 160     rs.release();
 161     log_warning(gc)("Failed to commit memory for new overflow mark stack of size " SIZE_FORMAT "B.", rs.size());
 162     return false;
 163   }
 164 
 165   assert(vs.committed_size() == rs.size(), "Failed to commit all of the mark stack.");
 166 
 167   // Release old mapping.
 168   _reserved_space.release();
 169 
 170   // Save new mapping for future unmapping.
 171   _reserved_space = rs;
 172 
 173   MemTracker::record_virtual_memory_type((address)_reserved_space.base(), mtGC);
 174 
 175   _base = (oop*) vs.low();
 176   _capacity = new_capacity;
 177   set_empty();
 178   _should_expand = false;
 179 
 180   return true;
 181 }
 182 
 183 bool G1CMMarkStack::allocate(size_t capacity) {
 184   return resize(capacity);
 185 }
 186 
 187 void G1CMMarkStack::expand() {
 188   // Clear expansion flag
 189   _should_expand = false;
 190 
 191   if (_capacity == MarkStackSizeMax) {
 192     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " elements.", _capacity);
 193     return;
 194   }
 195   size_t old_capacity = _capacity;
 196   // Double capacity if possible
 197   size_t new_capacity = MIN2(old_capacity * 2, MarkStackSizeMax);
 198 
 199   if (resize(new_capacity)) {
 200     log_debug(gc)("Expanded marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements",
 201                   old_capacity, new_capacity);
 202   } else {
 203     log_warning(gc)("Failed to expand marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements",
 204                     old_capacity, new_capacity);
 205   }
 206 }
 207 
 208 G1CMMarkStack::~G1CMMarkStack() {
 209   if (_base != NULL) {
 210     _base = NULL;
 211     _reserved_space.release();
 212   }
 213 }
 214 
 215 void G1CMMarkStack::par_push_arr(oop* buffer, size_t n) {
 216   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 217   size_t start = _index;
 218   size_t next_index = start + n;
 219   if (next_index > _capacity) {
 220     _overflow = true;
 221     return;
 222   }
 223   // Otherwise.
 224   _index = next_index;
 225   for (size_t i = 0; i < n; i++) {
 226     size_t ind = start + i;
 227     assert(ind < _capacity, "By overflow test above.");
 228     _base[ind] = buffer[i];
 229   }
 230 }
 231 
 232 bool G1CMMarkStack::par_pop_arr(oop* buffer, size_t max, size_t* n) {
 233   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 234   size_t index = _index;
 235   if (index == 0) {
 236     *n = 0;
 237     return false;
 238   } else {
 239     size_t k = MIN2(max, index);
 240     size_t new_ind = index - k;
 241     for (size_t j = 0; j < k; j++) {
 242       buffer[j] = _base[new_ind + j];
 243     }
 244     _index = new_ind;
 245     *n = k;
 246     return true;
 247   }
 248 }
 249 
 250 void G1CMMarkStack::note_start_of_gc() {
 251   assert(_saved_index == (size_t)AllBits, "note_start_of_gc()/end_of_gc() calls bracketed incorrectly");
 252   _saved_index = _index;
 253 }
 254 
 255 void G1CMMarkStack::note_end_of_gc() {
 256   guarantee(!stack_modified(), "Saved index " SIZE_FORMAT " must be the same as " SIZE_FORMAT, _saved_index, _index);
 257 
 258   _saved_index = (size_t)AllBits;
 259 }
 260 
 261 G1CMRootRegions::G1CMRootRegions() :
 262   _cm(NULL), _scan_in_progress(false),
 263   _should_abort(false), _claimed_survivor_index(0) { }
 264 
 265 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 266   _survivors = survivors;
 267   _cm = cm;
 268 }
 269 
 270 void G1CMRootRegions::prepare_for_scan() {
 271   assert(!scan_in_progress(), "pre-condition");
 272 
 273   // Currently, only survivors can be root regions.
 274   _claimed_survivor_index = 0;
 275   _scan_in_progress = _survivors->regions()->is_nonempty();
 276   _should_abort = false;
 277 }
 278 
 279 HeapRegion* G1CMRootRegions::claim_next() {
 280   if (_should_abort) {
 281     // If someone has set the should_abort flag, we return NULL to
 282     // force the caller to bail out of their loop.
 283     return NULL;
 284   }
 285 
 286   // Currently, only survivors can be root regions.
 287   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 288 
 289   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 290   if (claimed_index < survivor_regions->length()) {
 291     return survivor_regions->at(claimed_index);
 292   }
 293   return NULL;
 294 }
 295 
 296 uint G1CMRootRegions::num_root_regions() const {
 297   return (uint)_survivors->regions()->length();
 298 }
 299 
 300 void G1CMRootRegions::notify_scan_done() {
 301   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 302   _scan_in_progress = false;
 303   RootRegionScan_lock->notify_all();
 304 }
 305 
 306 void G1CMRootRegions::cancel_scan() {
 307   notify_scan_done();
 308 }
 309 
 310 void G1CMRootRegions::scan_finished() {
 311   assert(scan_in_progress(), "pre-condition");
 312 
 313   // Currently, only survivors can be root regions.
 314   if (!_should_abort) {
 315     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 316     assert((uint)_claimed_survivor_index >= _survivors->length(),
 317            "we should have claimed all survivors, claimed index = %u, length = %u",
 318            (uint)_claimed_survivor_index, _survivors->length());
 319   }
 320 
 321   notify_scan_done();
 322 }
 323 
 324 bool G1CMRootRegions::wait_until_scan_finished() {
 325   if (!scan_in_progress()) return false;
 326 
 327   {
 328     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 329     while (scan_in_progress()) {
 330       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 331     }
 332   }
 333   return true;
 334 }
 335 
 336 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 337   return MAX2((n_par_threads + 2) / 4, 1U);
 338 }
 339 
 340 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 341   _g1h(g1h),
 342   _markBitMap1(),
 343   _markBitMap2(),
 344   _parallel_marking_threads(0),
 345   _max_parallel_marking_threads(0),
 346   _sleep_factor(0.0),
 347   _marking_task_overhead(1.0),
 348   _cleanup_list("Cleanup List"),
 349 
 350   _prevMarkBitMap(&_markBitMap1),
 351   _nextMarkBitMap(&_markBitMap2),
 352 
 353   _global_mark_stack(),
 354   // _finger set in set_non_marking_state
 355 
 356   _max_worker_id(ParallelGCThreads),
 357   // _active_tasks set in set_non_marking_state
 358   // _tasks set inside the constructor
 359   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 360   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 361 
 362   _has_overflown(false),
 363   _concurrent(false),
 364   _has_aborted(false),
 365   _restart_for_overflow(false),
 366   _concurrent_marking_in_progress(false),
 367   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 368   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 369 
 370   // _verbose_level set below
 371 
 372   _init_times(),
 373   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 374   _cleanup_times(),
 375   _total_counting_time(0.0),
 376   _total_rs_scrub_time(0.0),
 377 
 378   _parallel_workers(NULL),
 379 
 380   _completed_initialization(false) {
 381 
 382   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 383   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 384 
 385   // Create & start a ConcurrentMark thread.
 386   _cmThread = new ConcurrentMarkThread(this);
 387   assert(cmThread() != NULL, "CM Thread should have been created");
 388   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 389   if (_cmThread->osthread() == NULL) {
 390       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 391   }
 392 
 393   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 394   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 395   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 396 
 397   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 398   satb_qs.set_buffer_size(G1SATBBufferSize);
 399 
 400   _root_regions.init(_g1h->survivor(), this);
 401 
 402   if (ConcGCThreads > ParallelGCThreads) {
 403     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 404                     ConcGCThreads, ParallelGCThreads);
 405     return;
 406   }
 407   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 408     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 409     // if both are set
 410     _sleep_factor             = 0.0;
 411     _marking_task_overhead    = 1.0;
 412   } else if (G1MarkingOverheadPercent > 0) {
 413     // We will calculate the number of parallel marking threads based
 414     // on a target overhead with respect to the soft real-time goal
 415     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 416     double overall_cm_overhead =
 417       (double) MaxGCPauseMillis * marking_overhead /
 418       (double) GCPauseIntervalMillis;
 419     double cpu_ratio = 1.0 / os::initial_active_processor_count();
 420     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 421     double marking_task_overhead =
 422       overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
 423     double sleep_factor =
 424                        (1.0 - marking_task_overhead) / marking_task_overhead;
 425 
 426     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 427     _sleep_factor             = sleep_factor;
 428     _marking_task_overhead    = marking_task_overhead;
 429   } else {
 430     // Calculate the number of parallel marking threads by scaling
 431     // the number of parallel GC threads.
 432     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 433     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 434     _sleep_factor             = 0.0;
 435     _marking_task_overhead    = 1.0;
 436   }
 437 
 438   assert(ConcGCThreads > 0, "Should have been set");
 439   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 440   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 441   _parallel_marking_threads = ConcGCThreads;
 442   _max_parallel_marking_threads = _parallel_marking_threads;
 443 
 444   _parallel_workers = new WorkGang("G1 Marker",
 445        _max_parallel_marking_threads, false, true);
 446   if (_parallel_workers == NULL) {
 447     vm_exit_during_initialization("Failed necessary allocation.");
 448   } else {
 449     _parallel_workers->initialize_workers();
 450   }
 451 
 452   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 453     size_t mark_stack_size =
 454       MIN2(MarkStackSizeMax,
 455           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 456     // Verify that the calculated value for MarkStackSize is in range.
 457     // It would be nice to use the private utility routine from Arguments.
 458     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 459       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 460                       "must be between 1 and " SIZE_FORMAT,
 461                       mark_stack_size, MarkStackSizeMax);
 462       return;
 463     }
 464     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 465   } else {
 466     // Verify MarkStackSize is in range.
 467     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 468       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 469         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 470           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 471                           "must be between 1 and " SIZE_FORMAT,
 472                           MarkStackSize, MarkStackSizeMax);
 473           return;
 474         }
 475       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 476         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 477           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 478                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 479                           MarkStackSize, MarkStackSizeMax);
 480           return;
 481         }
 482       }
 483     }
 484   }
 485 
 486   if (!_global_mark_stack.allocate(MarkStackSize)) {
 487     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 488     return;
 489   }
 490 
 491   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 492   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 493 
 494   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 495   _active_tasks = _max_worker_id;
 496 
 497   for (uint i = 0; i < _max_worker_id; ++i) {
 498     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 499     task_queue->initialize();
 500     _task_queues->register_queue(i, task_queue);
 501 
 502     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 503 
 504     _accum_task_vtime[i] = 0.0;
 505   }
 506 
 507   // so that the call below can read a sensible value
 508   _heap_start = g1h->reserved_region().start();
 509   set_non_marking_state();
 510   _completed_initialization = true;
 511 }
 512 
 513 void G1ConcurrentMark::reset() {
 514   // Starting values for these two. This should be called in a STW
 515   // phase.
 516   MemRegion reserved = _g1h->g1_reserved();
 517   _heap_start = reserved.start();
 518   _heap_end   = reserved.end();
 519 
 520   // Separated the asserts so that we know which one fires.
 521   assert(_heap_start != NULL, "heap bounds should look ok");
 522   assert(_heap_end != NULL, "heap bounds should look ok");
 523   assert(_heap_start < _heap_end, "heap bounds should look ok");
 524 
 525   // Reset all the marking data structures and any necessary flags
 526   reset_marking_state();
 527 
 528   // We do reset all of them, since different phases will use
 529   // different number of active threads. So, it's easiest to have all
 530   // of them ready.
 531   for (uint i = 0; i < _max_worker_id; ++i) {
 532     _tasks[i]->reset(_nextMarkBitMap);
 533   }
 534 
 535   // we need this to make sure that the flag is on during the evac
 536   // pause with initial mark piggy-backed
 537   set_concurrent_marking_in_progress();
 538 }
 539 
 540 
 541 void G1ConcurrentMark::reset_marking_state(bool clear_overflow) {
 542   _global_mark_stack.set_should_expand(has_overflown());
 543   _global_mark_stack.set_empty();        // Also clears the overflow stack's overflow flag
 544   if (clear_overflow) {
 545     clear_has_overflown();
 546   } else {
 547     assert(has_overflown(), "pre-condition");
 548   }
 549   _finger = _heap_start;
 550 
 551   for (uint i = 0; i < _max_worker_id; ++i) {
 552     G1CMTaskQueue* queue = _task_queues->queue(i);
 553     queue->set_empty();
 554   }
 555 }
 556 
 557 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 558   assert(active_tasks <= _max_worker_id, "we should not have more");
 559 
 560   _active_tasks = active_tasks;
 561   // Need to update the three data structures below according to the
 562   // number of active threads for this phase.
 563   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 564   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 565   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 566 }
 567 
 568 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 569   set_concurrency(active_tasks);
 570 
 571   _concurrent = concurrent;
 572   // We propagate this to all tasks, not just the active ones.
 573   for (uint i = 0; i < _max_worker_id; ++i)
 574     _tasks[i]->set_concurrent(concurrent);
 575 
 576   if (concurrent) {
 577     set_concurrent_marking_in_progress();
 578   } else {
 579     // We currently assume that the concurrent flag has been set to
 580     // false before we start remark. At this point we should also be
 581     // in a STW phase.
 582     assert(!concurrent_marking_in_progress(), "invariant");
 583     assert(out_of_regions(),
 584            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 585            p2i(_finger), p2i(_heap_end));
 586   }
 587 }
 588 
 589 void G1ConcurrentMark::set_non_marking_state() {
 590   // We set the global marking state to some default values when we're
 591   // not doing marking.
 592   reset_marking_state();
 593   _active_tasks = 0;
 594   clear_concurrent_marking_in_progress();
 595 }
 596 
 597 G1ConcurrentMark::~G1ConcurrentMark() {
 598   // The G1ConcurrentMark instance is never freed.
 599   ShouldNotReachHere();
 600 }
 601 
 602 class G1ClearBitMapTask : public AbstractGangTask {
 603 public:
 604   static size_t chunk_size() { return M; }
 605 
 606 private:
 607   // Heap region closure used for clearing the given mark bitmap.
 608   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 609   private:
 610     G1CMBitMap* _bitmap;
 611     G1ConcurrentMark* _cm;
 612   public:
 613     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 614     }
 615 
 616     virtual bool doHeapRegion(HeapRegion* r) {
 617       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 618 
 619       HeapWord* cur = r->bottom();
 620       HeapWord* const end = r->end();
 621 
 622       while (cur < end) {
 623         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 624         _bitmap->clear_range(mr);
 625 
 626         cur += chunk_size_in_words;
 627 
 628         // Abort iteration if after yielding the marking has been aborted.
 629         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 630           return true;
 631         }
 632         // Repeat the asserts from before the start of the closure. We will do them
 633         // as asserts here to minimize their overhead on the product. However, we
 634         // will have them as guarantees at the beginning / end of the bitmap
 635         // clearing to get some checking in the product.
 636         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 637         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 638       }
 639       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 640 
 641       return false;
 642     }
 643   };
 644 
 645   G1ClearBitmapHRClosure _cl;
 646   HeapRegionClaimer _hr_claimer;
 647   bool _suspendible; // If the task is suspendible, workers must join the STS.
 648 
 649 public:
 650   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 651     AbstractGangTask("G1 Clear Bitmap"),
 652     _cl(bitmap, suspendible ? cm : NULL),
 653     _hr_claimer(n_workers),
 654     _suspendible(suspendible)
 655   { }
 656 
 657   void work(uint worker_id) {
 658     SuspendibleThreadSetJoiner sts_join(_suspendible);
 659     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true);
 660   }
 661 
 662   bool is_complete() {
 663     return _cl.complete();
 664   }
 665 };
 666 
 667 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 668   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 669 
 670   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 671   size_t const num_chunks = align_size_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 672 
 673   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 674 
 675   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 676 
 677   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 678   workers->run_task(&cl, num_workers);
 679   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 680 }
 681 
 682 void G1ConcurrentMark::cleanup_for_next_mark() {
 683   // Make sure that the concurrent mark thread looks to still be in
 684   // the current cycle.
 685   guarantee(cmThread()->during_cycle(), "invariant");
 686 
 687   // We are finishing up the current cycle by clearing the next
 688   // marking bitmap and getting it ready for the next cycle. During
 689   // this time no other cycle can start. So, let's make sure that this
 690   // is the case.
 691   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 692 
 693   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 694 
 695   // Clear the live count data. If the marking has been aborted, the abort()
 696   // call already did that.
 697   if (!has_aborted()) {
 698     clear_live_data(_parallel_workers);
 699     DEBUG_ONLY(verify_live_data_clear());
 700   }
 701 
 702   // Repeat the asserts from above.
 703   guarantee(cmThread()->during_cycle(), "invariant");
 704   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 705 }
 706 
 707 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 708   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 709   clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false);
 710 }
 711 
 712 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 713   G1CMBitMap* _bitmap;
 714   bool _error;
 715  public:
 716   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 717   }
 718 
 719   virtual bool doHeapRegion(HeapRegion* r) {
 720     // This closure can be called concurrently to the mutator, so we must make sure
 721     // that the result of the getNextMarkedWordAddress() call is compared to the
 722     // value passed to it as limit to detect any found bits.
 723     // end never changes in G1.
 724     HeapWord* end = r->end();
 725     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 726   }
 727 };
 728 
 729 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 730   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 731   _g1h->heap_region_iterate(&cl);
 732   return cl.complete();
 733 }
 734 
 735 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 736 public:
 737   bool doHeapRegion(HeapRegion* r) {
 738     r->note_start_of_marking();
 739     return false;
 740   }
 741 };
 742 
 743 void G1ConcurrentMark::checkpointRootsInitialPre() {
 744   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 745   G1Policy* g1p = g1h->g1_policy();
 746 
 747   _has_aborted = false;
 748 
 749   // Initialize marking structures. This has to be done in a STW phase.
 750   reset();
 751 
 752   // For each region note start of marking.
 753   NoteStartOfMarkHRClosure startcl;
 754   g1h->heap_region_iterate(&startcl);
 755 }
 756 
 757 
 758 void G1ConcurrentMark::checkpointRootsInitialPost() {
 759   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 760 
 761   // Start Concurrent Marking weak-reference discovery.
 762   ReferenceProcessor* rp = g1h->ref_processor_cm();
 763   // enable ("weak") refs discovery
 764   rp->enable_discovery();
 765   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 766 
 767   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 768   // This is the start of  the marking cycle, we're expected all
 769   // threads to have SATB queues with active set to false.
 770   satb_mq_set.set_active_all_threads(true, /* new active value */
 771                                      false /* expected_active */);
 772 
 773   _root_regions.prepare_for_scan();
 774 
 775   // update_g1_committed() will be called at the end of an evac pause
 776   // when marking is on. So, it's also called at the end of the
 777   // initial-mark pause to update the heap end, if the heap expands
 778   // during it. No need to call it here.
 779 }
 780 
 781 /*
 782  * Notice that in the next two methods, we actually leave the STS
 783  * during the barrier sync and join it immediately afterwards. If we
 784  * do not do this, the following deadlock can occur: one thread could
 785  * be in the barrier sync code, waiting for the other thread to also
 786  * sync up, whereas another one could be trying to yield, while also
 787  * waiting for the other threads to sync up too.
 788  *
 789  * Note, however, that this code is also used during remark and in
 790  * this case we should not attempt to leave / enter the STS, otherwise
 791  * we'll either hit an assert (debug / fastdebug) or deadlock
 792  * (product). So we should only leave / enter the STS if we are
 793  * operating concurrently.
 794  *
 795  * Because the thread that does the sync barrier has left the STS, it
 796  * is possible to be suspended for a Full GC or an evacuation pause
 797  * could occur. This is actually safe, since the entering the sync
 798  * barrier is one of the last things do_marking_step() does, and it
 799  * doesn't manipulate any data structures afterwards.
 800  */
 801 
 802 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 803   bool barrier_aborted;
 804   {
 805     SuspendibleThreadSetLeaver sts_leave(concurrent());
 806     barrier_aborted = !_first_overflow_barrier_sync.enter();
 807   }
 808 
 809   // at this point everyone should have synced up and not be doing any
 810   // more work
 811 
 812   if (barrier_aborted) {
 813     // If the barrier aborted we ignore the overflow condition and
 814     // just abort the whole marking phase as quickly as possible.
 815     return;
 816   }
 817 
 818   // If we're executing the concurrent phase of marking, reset the marking
 819   // state; otherwise the marking state is reset after reference processing,
 820   // during the remark pause.
 821   // If we reset here as a result of an overflow during the remark we will
 822   // see assertion failures from any subsequent set_concurrency_and_phase()
 823   // calls.
 824   if (concurrent()) {
 825     // let the task associated with with worker 0 do this
 826     if (worker_id == 0) {
 827       // task 0 is responsible for clearing the global data structures
 828       // We should be here because of an overflow. During STW we should
 829       // not clear the overflow flag since we rely on it being true when
 830       // we exit this method to abort the pause and restart concurrent
 831       // marking.
 832       reset_marking_state(true /* clear_overflow */);
 833 
 834       log_info(gc, marking)("Concurrent Mark reset for overflow");
 835     }
 836   }
 837 
 838   // after this, each task should reset its own data structures then
 839   // then go into the second barrier
 840 }
 841 
 842 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 843   SuspendibleThreadSetLeaver sts_leave(concurrent());
 844   _second_overflow_barrier_sync.enter();
 845 
 846   // at this point everything should be re-initialized and ready to go
 847 }
 848 
 849 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 850 private:
 851   G1ConcurrentMark*     _cm;
 852   ConcurrentMarkThread* _cmt;
 853 
 854 public:
 855   void work(uint worker_id) {
 856     assert(Thread::current()->is_ConcurrentGC_thread(),
 857            "this should only be done by a conc GC thread");
 858     ResourceMark rm;
 859 
 860     double start_vtime = os::elapsedVTime();
 861 
 862     {
 863       SuspendibleThreadSetJoiner sts_join;
 864 
 865       assert(worker_id < _cm->active_tasks(), "invariant");
 866       G1CMTask* the_task = _cm->task(worker_id);
 867       the_task->record_start_time();
 868       if (!_cm->has_aborted()) {
 869         do {
 870           double start_vtime_sec = os::elapsedVTime();
 871           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 872 
 873           the_task->do_marking_step(mark_step_duration_ms,
 874                                     true  /* do_termination */,
 875                                     false /* is_serial*/);
 876 
 877           double end_vtime_sec = os::elapsedVTime();
 878           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 879           _cm->clear_has_overflown();
 880 
 881           _cm->do_yield_check();
 882 
 883           jlong sleep_time_ms;
 884           if (!_cm->has_aborted() && the_task->has_aborted()) {
 885             sleep_time_ms =
 886               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 887             {
 888               SuspendibleThreadSetLeaver sts_leave;
 889               os::sleep(Thread::current(), sleep_time_ms, false);
 890             }
 891           }
 892         } while (!_cm->has_aborted() && the_task->has_aborted());
 893       }
 894       the_task->record_end_time();
 895       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 896     }
 897 
 898     double end_vtime = os::elapsedVTime();
 899     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 900   }
 901 
 902   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 903                             ConcurrentMarkThread* cmt) :
 904       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 905 
 906   ~G1CMConcurrentMarkingTask() { }
 907 };
 908 
 909 // Calculates the number of active workers for a concurrent
 910 // phase.
 911 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 912   uint n_conc_workers = 0;
 913   if (!UseDynamicNumberOfGCThreads ||
 914       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 915        !ForceDynamicNumberOfGCThreads)) {
 916     n_conc_workers = max_parallel_marking_threads();
 917   } else {
 918     n_conc_workers =
 919       AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
 920                                                       1, /* Minimum workers */
 921                                                       parallel_marking_threads(),
 922                                                       Threads::number_of_non_daemon_threads());
 923     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 924     // that scaling has already gone into "_max_parallel_marking_threads".
 925   }
 926   assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
 927          "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
 928          max_parallel_marking_threads(), n_conc_workers);
 929   return n_conc_workers;
 930 }
 931 
 932 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
 933   // Currently, only survivors can be root regions.
 934   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 935   G1RootRegionScanClosure cl(_g1h, this);
 936 
 937   const uintx interval = PrefetchScanIntervalInBytes;
 938   HeapWord* curr = hr->bottom();
 939   const HeapWord* end = hr->top();
 940   while (curr < end) {
 941     Prefetch::read(curr, interval);
 942     oop obj = oop(curr);
 943     int size = obj->oop_iterate_size(&cl);
 944     assert(size == obj->size(), "sanity");
 945     curr += size;
 946   }
 947 }
 948 
 949 class G1CMRootRegionScanTask : public AbstractGangTask {
 950 private:
 951   G1ConcurrentMark* _cm;
 952 
 953 public:
 954   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 955     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 956 
 957   void work(uint worker_id) {
 958     assert(Thread::current()->is_ConcurrentGC_thread(),
 959            "this should only be done by a conc GC thread");
 960 
 961     G1CMRootRegions* root_regions = _cm->root_regions();
 962     HeapRegion* hr = root_regions->claim_next();
 963     while (hr != NULL) {
 964       _cm->scanRootRegion(hr);
 965       hr = root_regions->claim_next();
 966     }
 967   }
 968 };
 969 
 970 void G1ConcurrentMark::scan_root_regions() {
 971   // scan_in_progress() will have been set to true only if there was
 972   // at least one root region to scan. So, if it's false, we
 973   // should not attempt to do any further work.
 974   if (root_regions()->scan_in_progress()) {
 975     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 976 
 977     _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
 978                                      // We distribute work on a per-region basis, so starting
 979                                      // more threads than that is useless.
 980                                      root_regions()->num_root_regions());
 981     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 982            "Maximum number of marking threads exceeded");
 983 
 984     G1CMRootRegionScanTask task(this);
 985     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 986                         task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
 987     _parallel_workers->run_task(&task, _parallel_marking_threads);
 988 
 989     // It's possible that has_aborted() is true here without actually
 990     // aborting the survivor scan earlier. This is OK as it's
 991     // mainly used for sanity checking.
 992     root_regions()->scan_finished();
 993   }
 994 }
 995 
 996 void G1ConcurrentMark::concurrent_cycle_start() {
 997   _gc_timer_cm->register_gc_start();
 998 
 999   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1000 
1001   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1002 }
1003 
1004 void G1ConcurrentMark::concurrent_cycle_end() {
1005   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1006 
1007   if (has_aborted()) {
1008     _gc_tracer_cm->report_concurrent_mode_failure();
1009   }
1010 
1011   _gc_timer_cm->register_gc_end();
1012 
1013   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1014 }
1015 
1016 void G1ConcurrentMark::mark_from_roots() {
1017   // we might be tempted to assert that:
1018   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1019   //        "inconsistent argument?");
1020   // However that wouldn't be right, because it's possible that
1021   // a safepoint is indeed in progress as a younger generation
1022   // stop-the-world GC happens even as we mark in this generation.
1023 
1024   _restart_for_overflow = false;
1025 
1026   // _g1h has _n_par_threads
1027   _parallel_marking_threads = calc_parallel_marking_threads();
1028   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1029     "Maximum number of marking threads exceeded");
1030 
1031   uint active_workers = MAX2(1U, parallel_marking_threads());
1032   assert(active_workers > 0, "Should have been set");
1033 
1034   // Setting active workers is not guaranteed since fewer
1035   // worker threads may currently exist and more may not be
1036   // available.
1037   active_workers = _parallel_workers->update_active_workers(active_workers);
1038   // Parallel task terminator is set in "set_concurrency_and_phase()"
1039   set_concurrency_and_phase(active_workers, true /* concurrent */);
1040 
1041   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1042   _parallel_workers->run_task(&markingTask);
1043   print_stats();
1044 }
1045 
1046 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1047   // world is stopped at this checkpoint
1048   assert(SafepointSynchronize::is_at_safepoint(),
1049          "world should be stopped");
1050 
1051   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1052 
1053   // If a full collection has happened, we shouldn't do this.
1054   if (has_aborted()) {
1055     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1056     return;
1057   }
1058 
1059   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1060 
1061   if (VerifyDuringGC) {
1062     HandleMark hm;  // handle scope
1063     g1h->prepare_for_verify();
1064     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1065   }
1066   g1h->verifier()->check_bitmaps("Remark Start");
1067 
1068   G1Policy* g1p = g1h->g1_policy();
1069   g1p->record_concurrent_mark_remark_start();
1070 
1071   double start = os::elapsedTime();
1072 
1073   checkpointRootsFinalWork();
1074 
1075   double mark_work_end = os::elapsedTime();
1076 
1077   weakRefsWork(clear_all_soft_refs);
1078 
1079   if (has_overflown()) {
1080     // We overflowed.  Restart concurrent marking.
1081     _restart_for_overflow = true;
1082 
1083     // Verify the heap w.r.t. the previous marking bitmap.
1084     if (VerifyDuringGC) {
1085       HandleMark hm;  // handle scope
1086       g1h->prepare_for_verify();
1087       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1088     }
1089 
1090     // Clear the marking state because we will be restarting
1091     // marking due to overflowing the global mark stack.
1092     reset_marking_state();
1093   } else {
1094     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1095     // We're done with marking.
1096     // This is the end of  the marking cycle, we're expected all
1097     // threads to have SATB queues with active set to true.
1098     satb_mq_set.set_active_all_threads(false, /* new active value */
1099                                        true /* expected_active */);
1100 
1101     if (VerifyDuringGC) {
1102       HandleMark hm;  // handle scope
1103       g1h->prepare_for_verify();
1104       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1105     }
1106     g1h->verifier()->check_bitmaps("Remark End");
1107     assert(!restart_for_overflow(), "sanity");
1108     // Completely reset the marking state since marking completed
1109     set_non_marking_state();
1110   }
1111 
1112   // Expand the marking stack, if we have to and if we can.
1113   if (_global_mark_stack.should_expand()) {
1114     _global_mark_stack.expand();
1115   }
1116 
1117   // Statistics
1118   double now = os::elapsedTime();
1119   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1120   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1121   _remark_times.add((now - start) * 1000.0);
1122 
1123   g1p->record_concurrent_mark_remark_end();
1124 
1125   G1CMIsAliveClosure is_alive(g1h);
1126   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1127 }
1128 
1129 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1130   G1CollectedHeap* _g1;
1131   size_t _freed_bytes;
1132   FreeRegionList* _local_cleanup_list;
1133   uint _old_regions_removed;
1134   uint _humongous_regions_removed;
1135   HRRSCleanupTask* _hrrs_cleanup_task;
1136 
1137 public:
1138   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1139                              FreeRegionList* local_cleanup_list,
1140                              HRRSCleanupTask* hrrs_cleanup_task) :
1141     _g1(g1),
1142     _freed_bytes(0),
1143     _local_cleanup_list(local_cleanup_list),
1144     _old_regions_removed(0),
1145     _humongous_regions_removed(0),
1146     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1147 
1148   size_t freed_bytes() { return _freed_bytes; }
1149   const uint old_regions_removed() { return _old_regions_removed; }
1150   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1151 
1152   bool doHeapRegion(HeapRegion *hr) {
1153     if (hr->is_archive()) {
1154       return false;
1155     }
1156     _g1->reset_gc_time_stamps(hr);
1157     hr->note_end_of_marking();
1158 
1159     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1160       _freed_bytes += hr->used();
1161       hr->set_containing_set(NULL);
1162       if (hr->is_humongous()) {
1163         _humongous_regions_removed++;
1164         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1165       } else {
1166         _old_regions_removed++;
1167         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1168       }
1169     } else {
1170       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1171     }
1172 
1173     return false;
1174   }
1175 };
1176 
1177 class G1ParNoteEndTask: public AbstractGangTask {
1178   friend class G1NoteEndOfConcMarkClosure;
1179 
1180 protected:
1181   G1CollectedHeap* _g1h;
1182   FreeRegionList* _cleanup_list;
1183   HeapRegionClaimer _hrclaimer;
1184 
1185 public:
1186   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1187       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1188   }
1189 
1190   void work(uint worker_id) {
1191     FreeRegionList local_cleanup_list("Local Cleanup List");
1192     HRRSCleanupTask hrrs_cleanup_task;
1193     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1194                                            &hrrs_cleanup_task);
1195     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1196     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1197 
1198     // Now update the lists
1199     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1200     {
1201       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1202       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1203 
1204       // If we iterate over the global cleanup list at the end of
1205       // cleanup to do this printing we will not guarantee to only
1206       // generate output for the newly-reclaimed regions (the list
1207       // might not be empty at the beginning of cleanup; we might
1208       // still be working on its previous contents). So we do the
1209       // printing here, before we append the new regions to the global
1210       // cleanup list.
1211 
1212       G1HRPrinter* hr_printer = _g1h->hr_printer();
1213       if (hr_printer->is_active()) {
1214         FreeRegionListIterator iter(&local_cleanup_list);
1215         while (iter.more_available()) {
1216           HeapRegion* hr = iter.get_next();
1217           hr_printer->cleanup(hr);
1218         }
1219       }
1220 
1221       _cleanup_list->add_ordered(&local_cleanup_list);
1222       assert(local_cleanup_list.is_empty(), "post-condition");
1223 
1224       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1225     }
1226   }
1227 };
1228 
1229 void G1ConcurrentMark::cleanup() {
1230   // world is stopped at this checkpoint
1231   assert(SafepointSynchronize::is_at_safepoint(),
1232          "world should be stopped");
1233   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1234 
1235   // If a full collection has happened, we shouldn't do this.
1236   if (has_aborted()) {
1237     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1238     return;
1239   }
1240 
1241   g1h->verifier()->verify_region_sets_optional();
1242 
1243   if (VerifyDuringGC) {
1244     HandleMark hm;  // handle scope
1245     g1h->prepare_for_verify();
1246     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1247   }
1248   g1h->verifier()->check_bitmaps("Cleanup Start");
1249 
1250   G1Policy* g1p = g1h->g1_policy();
1251   g1p->record_concurrent_mark_cleanup_start();
1252 
1253   double start = os::elapsedTime();
1254 
1255   HeapRegionRemSet::reset_for_cleanup_tasks();
1256 
1257   {
1258     GCTraceTime(Debug, gc)("Finalize Live Data");
1259     finalize_live_data();
1260   }
1261 
1262   if (VerifyDuringGC) {
1263     GCTraceTime(Debug, gc)("Verify Live Data");
1264     verify_live_data();
1265   }
1266 
1267   g1h->collector_state()->set_mark_in_progress(false);
1268 
1269   double count_end = os::elapsedTime();
1270   double this_final_counting_time = (count_end - start);
1271   _total_counting_time += this_final_counting_time;
1272 
1273   if (log_is_enabled(Trace, gc, liveness)) {
1274     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1275     _g1h->heap_region_iterate(&cl);
1276   }
1277 
1278   // Install newly created mark bitMap as "prev".
1279   swapMarkBitMaps();
1280 
1281   g1h->reset_gc_time_stamp();
1282 
1283   uint n_workers = _g1h->workers()->active_workers();
1284 
1285   // Note end of marking in all heap regions.
1286   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1287   g1h->workers()->run_task(&g1_par_note_end_task);
1288   g1h->check_gc_time_stamps();
1289 
1290   if (!cleanup_list_is_empty()) {
1291     // The cleanup list is not empty, so we'll have to process it
1292     // concurrently. Notify anyone else that might be wanting free
1293     // regions that there will be more free regions coming soon.
1294     g1h->set_free_regions_coming();
1295   }
1296 
1297   // call below, since it affects the metric by which we sort the heap
1298   // regions.
1299   if (G1ScrubRemSets) {
1300     double rs_scrub_start = os::elapsedTime();
1301     g1h->scrub_rem_set();
1302     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1303   }
1304 
1305   // this will also free any regions totally full of garbage objects,
1306   // and sort the regions.
1307   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1308 
1309   // Statistics.
1310   double end = os::elapsedTime();
1311   _cleanup_times.add((end - start) * 1000.0);
1312 
1313   // Clean up will have freed any regions completely full of garbage.
1314   // Update the soft reference policy with the new heap occupancy.
1315   Universe::update_heap_info_at_gc();
1316 
1317   if (VerifyDuringGC) {
1318     HandleMark hm;  // handle scope
1319     g1h->prepare_for_verify();
1320     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1321   }
1322 
1323   g1h->verifier()->check_bitmaps("Cleanup End");
1324 
1325   g1h->verifier()->verify_region_sets_optional();
1326 
1327   // We need to make this be a "collection" so any collection pause that
1328   // races with it goes around and waits for completeCleanup to finish.
1329   g1h->increment_total_collections();
1330 
1331   // Clean out dead classes and update Metaspace sizes.
1332   if (ClassUnloadingWithConcurrentMark) {
1333     ClassLoaderDataGraph::purge();
1334   }
1335   MetaspaceGC::compute_new_size();
1336 
1337   // We reclaimed old regions so we should calculate the sizes to make
1338   // sure we update the old gen/space data.
1339   g1h->g1mm()->update_sizes();
1340   g1h->allocation_context_stats().update_after_mark();
1341 }
1342 
1343 void G1ConcurrentMark::complete_cleanup() {
1344   if (has_aborted()) return;
1345 
1346   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1347 
1348   _cleanup_list.verify_optional();
1349   FreeRegionList tmp_free_list("Tmp Free List");
1350 
1351   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1352                                   "cleanup list has %u entries",
1353                                   _cleanup_list.length());
1354 
1355   // No one else should be accessing the _cleanup_list at this point,
1356   // so it is not necessary to take any locks
1357   while (!_cleanup_list.is_empty()) {
1358     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1359     assert(hr != NULL, "Got NULL from a non-empty list");
1360     hr->par_clear();
1361     tmp_free_list.add_ordered(hr);
1362 
1363     // Instead of adding one region at a time to the secondary_free_list,
1364     // we accumulate them in the local list and move them a few at a
1365     // time. This also cuts down on the number of notify_all() calls
1366     // we do during this process. We'll also append the local list when
1367     // _cleanup_list is empty (which means we just removed the last
1368     // region from the _cleanup_list).
1369     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1370         _cleanup_list.is_empty()) {
1371       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1372                                       "appending %u entries to the secondary_free_list, "
1373                                       "cleanup list still has %u entries",
1374                                       tmp_free_list.length(),
1375                                       _cleanup_list.length());
1376 
1377       {
1378         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1379         g1h->secondary_free_list_add(&tmp_free_list);
1380         SecondaryFreeList_lock->notify_all();
1381       }
1382 #ifndef PRODUCT
1383       if (G1StressConcRegionFreeing) {
1384         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1385           os::sleep(Thread::current(), (jlong) 1, false);
1386         }
1387       }
1388 #endif
1389     }
1390   }
1391   assert(tmp_free_list.is_empty(), "post-condition");
1392 }
1393 
1394 // Supporting Object and Oop closures for reference discovery
1395 // and processing in during marking
1396 
1397 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1398   HeapWord* addr = (HeapWord*)obj;
1399   return addr != NULL &&
1400          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1401 }
1402 
1403 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1404 // Uses the G1CMTask associated with a worker thread (for serial reference
1405 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1406 // trace referent objects.
1407 //
1408 // Using the G1CMTask and embedded local queues avoids having the worker
1409 // threads operating on the global mark stack. This reduces the risk
1410 // of overflowing the stack - which we would rather avoid at this late
1411 // state. Also using the tasks' local queues removes the potential
1412 // of the workers interfering with each other that could occur if
1413 // operating on the global stack.
1414 
1415 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1416   G1ConcurrentMark* _cm;
1417   G1CMTask*         _task;
1418   int               _ref_counter_limit;
1419   int               _ref_counter;
1420   bool              _is_serial;
1421  public:
1422   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1423     _cm(cm), _task(task), _is_serial(is_serial),
1424     _ref_counter_limit(G1RefProcDrainInterval) {
1425     assert(_ref_counter_limit > 0, "sanity");
1426     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1427     _ref_counter = _ref_counter_limit;
1428   }
1429 
1430   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1431   virtual void do_oop(      oop* p) { do_oop_work(p); }
1432 
1433   template <class T> void do_oop_work(T* p) {
1434     if (!_cm->has_overflown()) {
1435       oop obj = oopDesc::load_decode_heap_oop(p);
1436       _task->deal_with_reference(obj);
1437       _ref_counter--;
1438 
1439       if (_ref_counter == 0) {
1440         // We have dealt with _ref_counter_limit references, pushing them
1441         // and objects reachable from them on to the local stack (and
1442         // possibly the global stack). Call G1CMTask::do_marking_step() to
1443         // process these entries.
1444         //
1445         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1446         // there's nothing more to do (i.e. we're done with the entries that
1447         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1448         // above) or we overflow.
1449         //
1450         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1451         // flag while there may still be some work to do. (See the comment at
1452         // the beginning of G1CMTask::do_marking_step() for those conditions -
1453         // one of which is reaching the specified time target.) It is only
1454         // when G1CMTask::do_marking_step() returns without setting the
1455         // has_aborted() flag that the marking step has completed.
1456         do {
1457           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1458           _task->do_marking_step(mark_step_duration_ms,
1459                                  false      /* do_termination */,
1460                                  _is_serial);
1461         } while (_task->has_aborted() && !_cm->has_overflown());
1462         _ref_counter = _ref_counter_limit;
1463       }
1464     }
1465   }
1466 };
1467 
1468 // 'Drain' oop closure used by both serial and parallel reference processing.
1469 // Uses the G1CMTask associated with a given worker thread (for serial
1470 // reference processing the G1CMtask for worker 0 is used). Calls the
1471 // do_marking_step routine, with an unbelievably large timeout value,
1472 // to drain the marking data structures of the remaining entries
1473 // added by the 'keep alive' oop closure above.
1474 
1475 class G1CMDrainMarkingStackClosure: public VoidClosure {
1476   G1ConcurrentMark* _cm;
1477   G1CMTask*         _task;
1478   bool              _is_serial;
1479  public:
1480   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1481     _cm(cm), _task(task), _is_serial(is_serial) {
1482     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1483   }
1484 
1485   void do_void() {
1486     do {
1487       // We call G1CMTask::do_marking_step() to completely drain the local
1488       // and global marking stacks of entries pushed by the 'keep alive'
1489       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1490       //
1491       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1492       // if there's nothing more to do (i.e. we've completely drained the
1493       // entries that were pushed as a a result of applying the 'keep alive'
1494       // closure to the entries on the discovered ref lists) or we overflow
1495       // the global marking stack.
1496       //
1497       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1498       // flag while there may still be some work to do. (See the comment at
1499       // the beginning of G1CMTask::do_marking_step() for those conditions -
1500       // one of which is reaching the specified time target.) It is only
1501       // when G1CMTask::do_marking_step() returns without setting the
1502       // has_aborted() flag that the marking step has completed.
1503 
1504       _task->do_marking_step(1000000000.0 /* something very large */,
1505                              true         /* do_termination */,
1506                              _is_serial);
1507     } while (_task->has_aborted() && !_cm->has_overflown());
1508   }
1509 };
1510 
1511 // Implementation of AbstractRefProcTaskExecutor for parallel
1512 // reference processing at the end of G1 concurrent marking
1513 
1514 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1515 private:
1516   G1CollectedHeap*  _g1h;
1517   G1ConcurrentMark* _cm;
1518   WorkGang*         _workers;
1519   uint              _active_workers;
1520 
1521 public:
1522   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1523                           G1ConcurrentMark* cm,
1524                           WorkGang* workers,
1525                           uint n_workers) :
1526     _g1h(g1h), _cm(cm),
1527     _workers(workers), _active_workers(n_workers) { }
1528 
1529   // Executes the given task using concurrent marking worker threads.
1530   virtual void execute(ProcessTask& task);
1531   virtual void execute(EnqueueTask& task);
1532 };
1533 
1534 class G1CMRefProcTaskProxy: public AbstractGangTask {
1535   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1536   ProcessTask&      _proc_task;
1537   G1CollectedHeap*  _g1h;
1538   G1ConcurrentMark* _cm;
1539 
1540 public:
1541   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1542                        G1CollectedHeap* g1h,
1543                        G1ConcurrentMark* cm) :
1544     AbstractGangTask("Process reference objects in parallel"),
1545     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1546     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1547     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1548   }
1549 
1550   virtual void work(uint worker_id) {
1551     ResourceMark rm;
1552     HandleMark hm;
1553     G1CMTask* task = _cm->task(worker_id);
1554     G1CMIsAliveClosure g1_is_alive(_g1h);
1555     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1556     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1557 
1558     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1559   }
1560 };
1561 
1562 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1563   assert(_workers != NULL, "Need parallel worker threads.");
1564   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1565 
1566   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1567 
1568   // We need to reset the concurrency level before each
1569   // proxy task execution, so that the termination protocol
1570   // and overflow handling in G1CMTask::do_marking_step() knows
1571   // how many workers to wait for.
1572   _cm->set_concurrency(_active_workers);
1573   _workers->run_task(&proc_task_proxy);
1574 }
1575 
1576 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1577   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1578   EnqueueTask& _enq_task;
1579 
1580 public:
1581   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1582     AbstractGangTask("Enqueue reference objects in parallel"),
1583     _enq_task(enq_task) { }
1584 
1585   virtual void work(uint worker_id) {
1586     _enq_task.work(worker_id);
1587   }
1588 };
1589 
1590 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1591   assert(_workers != NULL, "Need parallel worker threads.");
1592   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1593 
1594   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1595 
1596   // Not strictly necessary but...
1597   //
1598   // We need to reset the concurrency level before each
1599   // proxy task execution, so that the termination protocol
1600   // and overflow handling in G1CMTask::do_marking_step() knows
1601   // how many workers to wait for.
1602   _cm->set_concurrency(_active_workers);
1603   _workers->run_task(&enq_task_proxy);
1604 }
1605 
1606 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
1607   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
1608 }
1609 
1610 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1611   if (has_overflown()) {
1612     // Skip processing the discovered references if we have
1613     // overflown the global marking stack. Reference objects
1614     // only get discovered once so it is OK to not
1615     // de-populate the discovered reference lists. We could have,
1616     // but the only benefit would be that, when marking restarts,
1617     // less reference objects are discovered.
1618     return;
1619   }
1620 
1621   ResourceMark rm;
1622   HandleMark   hm;
1623 
1624   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1625 
1626   // Is alive closure.
1627   G1CMIsAliveClosure g1_is_alive(g1h);
1628 
1629   // Inner scope to exclude the cleaning of the string and symbol
1630   // tables from the displayed time.
1631   {
1632     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1633 
1634     ReferenceProcessor* rp = g1h->ref_processor_cm();
1635 
1636     // See the comment in G1CollectedHeap::ref_processing_init()
1637     // about how reference processing currently works in G1.
1638 
1639     // Set the soft reference policy
1640     rp->setup_policy(clear_all_soft_refs);
1641     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1642 
1643     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1644     // in serial reference processing. Note these closures are also
1645     // used for serially processing (by the the current thread) the
1646     // JNI references during parallel reference processing.
1647     //
1648     // These closures do not need to synchronize with the worker
1649     // threads involved in parallel reference processing as these
1650     // instances are executed serially by the current thread (e.g.
1651     // reference processing is not multi-threaded and is thus
1652     // performed by the current thread instead of a gang worker).
1653     //
1654     // The gang tasks involved in parallel reference processing create
1655     // their own instances of these closures, which do their own
1656     // synchronization among themselves.
1657     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1658     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1659 
1660     // We need at least one active thread. If reference processing
1661     // is not multi-threaded we use the current (VMThread) thread,
1662     // otherwise we use the work gang from the G1CollectedHeap and
1663     // we utilize all the worker threads we can.
1664     bool processing_is_mt = rp->processing_is_mt();
1665     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1666     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1667 
1668     // Parallel processing task executor.
1669     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1670                                               g1h->workers(), active_workers);
1671     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1672 
1673     // Set the concurrency level. The phase was already set prior to
1674     // executing the remark task.
1675     set_concurrency(active_workers);
1676 
1677     // Set the degree of MT processing here.  If the discovery was done MT,
1678     // the number of threads involved during discovery could differ from
1679     // the number of active workers.  This is OK as long as the discovered
1680     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1681     rp->set_active_mt_degree(active_workers);
1682 
1683     // Process the weak references.
1684     const ReferenceProcessorStats& stats =
1685         rp->process_discovered_references(&g1_is_alive,
1686                                           &g1_keep_alive,
1687                                           &g1_drain_mark_stack,
1688                                           executor,
1689                                           _gc_timer_cm);
1690     _gc_tracer_cm->report_gc_reference_stats(stats);
1691 
1692     // The do_oop work routines of the keep_alive and drain_marking_stack
1693     // oop closures will set the has_overflown flag if we overflow the
1694     // global marking stack.
1695 
1696     assert(_global_mark_stack.overflow() || _global_mark_stack.is_empty(),
1697             "mark stack should be empty (unless it overflowed)");
1698 
1699     if (_global_mark_stack.overflow()) {
1700       // This should have been done already when we tried to push an
1701       // entry on to the global mark stack. But let's do it again.
1702       set_has_overflown();
1703     }
1704 
1705     assert(rp->num_q() == active_workers, "why not");
1706 
1707     rp->enqueue_discovered_references(executor);
1708 
1709     rp->verify_no_references_recorded();
1710     assert(!rp->discovery_enabled(), "Post condition");
1711   }
1712 
1713   if (has_overflown()) {
1714     // We can not trust g1_is_alive if the marking stack overflowed
1715     return;
1716   }
1717 
1718   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1719 
1720   // Unload Klasses, String, Symbols, Code Cache, etc.
1721   if (ClassUnloadingWithConcurrentMark) {
1722     bool purged_classes;
1723 
1724     {
1725       GCTraceTime(Debug, gc, phases) trace("System Dictionary Unloading", _gc_timer_cm);
1726       purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
1727     }
1728 
1729     {
1730       GCTraceTime(Debug, gc, phases) trace("Parallel Unloading", _gc_timer_cm);
1731       weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
1732     }
1733   }
1734 
1735   if (G1StringDedup::is_enabled()) {
1736     GCTraceTime(Debug, gc, phases) trace("String Deduplication Unlink", _gc_timer_cm);
1737     G1StringDedup::unlink(&g1_is_alive);
1738   }
1739 }
1740 
1741 void G1ConcurrentMark::swapMarkBitMaps() {
1742   G1CMBitMapRO* temp = _prevMarkBitMap;
1743   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
1744   _nextMarkBitMap    = (G1CMBitMap*)  temp;
1745 }
1746 
1747 // Closure for marking entries in SATB buffers.
1748 class G1CMSATBBufferClosure : public SATBBufferClosure {
1749 private:
1750   G1CMTask* _task;
1751   G1CollectedHeap* _g1h;
1752 
1753   // This is very similar to G1CMTask::deal_with_reference, but with
1754   // more relaxed requirements for the argument, so this must be more
1755   // circumspect about treating the argument as an object.
1756   void do_entry(void* entry) const {
1757     _task->increment_refs_reached();
1758     HeapRegion* hr = _g1h->heap_region_containing(entry);
1759     if (entry < hr->next_top_at_mark_start()) {
1760       // Until we get here, we don't know whether entry refers to a valid
1761       // object; it could instead have been a stale reference.
1762       oop obj = static_cast<oop>(entry);
1763       assert(obj->is_oop(true /* ignore mark word */),
1764              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
1765       _task->make_reference_grey(obj);
1766     }
1767   }
1768 
1769 public:
1770   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1771     : _task(task), _g1h(g1h) { }
1772 
1773   virtual void do_buffer(void** buffer, size_t size) {
1774     for (size_t i = 0; i < size; ++i) {
1775       do_entry(buffer[i]);
1776     }
1777   }
1778 };
1779 
1780 class G1RemarkThreadsClosure : public ThreadClosure {
1781   G1CMSATBBufferClosure _cm_satb_cl;
1782   G1CMOopClosure _cm_cl;
1783   MarkingCodeBlobClosure _code_cl;
1784   int _thread_parity;
1785 
1786  public:
1787   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1788     _cm_satb_cl(task, g1h),
1789     _cm_cl(g1h, g1h->concurrent_mark(), task),
1790     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1791     _thread_parity(Threads::thread_claim_parity()) {}
1792 
1793   void do_thread(Thread* thread) {
1794     if (thread->is_Java_thread()) {
1795       if (thread->claim_oops_do(true, _thread_parity)) {
1796         JavaThread* jt = (JavaThread*)thread;
1797 
1798         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1799         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1800         // * Alive if on the stack of an executing method
1801         // * Weakly reachable otherwise
1802         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1803         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1804         jt->nmethods_do(&_code_cl);
1805 
1806         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1807       }
1808     } else if (thread->is_VM_thread()) {
1809       if (thread->claim_oops_do(true, _thread_parity)) {
1810         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1811       }
1812     }
1813   }
1814 };
1815 
1816 class G1CMRemarkTask: public AbstractGangTask {
1817 private:
1818   G1ConcurrentMark* _cm;
1819 public:
1820   void work(uint worker_id) {
1821     // Since all available tasks are actually started, we should
1822     // only proceed if we're supposed to be active.
1823     if (worker_id < _cm->active_tasks()) {
1824       G1CMTask* task = _cm->task(worker_id);
1825       task->record_start_time();
1826       {
1827         ResourceMark rm;
1828         HandleMark hm;
1829 
1830         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1831         Threads::threads_do(&threads_f);
1832       }
1833 
1834       do {
1835         task->do_marking_step(1000000000.0 /* something very large */,
1836                               true         /* do_termination       */,
1837                               false        /* is_serial            */);
1838       } while (task->has_aborted() && !_cm->has_overflown());
1839       // If we overflow, then we do not want to restart. We instead
1840       // want to abort remark and do concurrent marking again.
1841       task->record_end_time();
1842     }
1843   }
1844 
1845   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1846     AbstractGangTask("Par Remark"), _cm(cm) {
1847     _cm->terminator()->reset_for_reuse(active_workers);
1848   }
1849 };
1850 
1851 void G1ConcurrentMark::checkpointRootsFinalWork() {
1852   ResourceMark rm;
1853   HandleMark   hm;
1854   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1855 
1856   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1857 
1858   g1h->ensure_parsability(false);
1859 
1860   // this is remark, so we'll use up all active threads
1861   uint active_workers = g1h->workers()->active_workers();
1862   set_concurrency_and_phase(active_workers, false /* concurrent */);
1863   // Leave _parallel_marking_threads at it's
1864   // value originally calculated in the G1ConcurrentMark
1865   // constructor and pass values of the active workers
1866   // through the gang in the task.
1867 
1868   {
1869     StrongRootsScope srs(active_workers);
1870 
1871     G1CMRemarkTask remarkTask(this, active_workers);
1872     // We will start all available threads, even if we decide that the
1873     // active_workers will be fewer. The extra ones will just bail out
1874     // immediately.
1875     g1h->workers()->run_task(&remarkTask);
1876   }
1877 
1878   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1879   guarantee(has_overflown() ||
1880             satb_mq_set.completed_buffers_num() == 0,
1881             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1882             BOOL_TO_STR(has_overflown()),
1883             satb_mq_set.completed_buffers_num());
1884 
1885   print_stats();
1886 }
1887 
1888 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1889   // Note we are overriding the read-only view of the prev map here, via
1890   // the cast.
1891   ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr);
1892 }
1893 
1894 HeapRegion*
1895 G1ConcurrentMark::claim_region(uint worker_id) {
1896   // "checkpoint" the finger
1897   HeapWord* finger = _finger;
1898 
1899   // _heap_end will not change underneath our feet; it only changes at
1900   // yield points.
1901   while (finger < _heap_end) {
1902     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1903 
1904     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1905 
1906     // Above heap_region_containing may return NULL as we always scan claim
1907     // until the end of the heap. In this case, just jump to the next region.
1908     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1909 
1910     // Is the gap between reading the finger and doing the CAS too long?
1911     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1912     if (res == finger && curr_region != NULL) {
1913       // we succeeded
1914       HeapWord*   bottom        = curr_region->bottom();
1915       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1916 
1917       // notice that _finger == end cannot be guaranteed here since,
1918       // someone else might have moved the finger even further
1919       assert(_finger >= end, "the finger should have moved forward");
1920 
1921       if (limit > bottom) {
1922         return curr_region;
1923       } else {
1924         assert(limit == bottom,
1925                "the region limit should be at bottom");
1926         // we return NULL and the caller should try calling
1927         // claim_region() again.
1928         return NULL;
1929       }
1930     } else {
1931       assert(_finger > finger, "the finger should have moved forward");
1932       // read it again
1933       finger = _finger;
1934     }
1935   }
1936 
1937   return NULL;
1938 }
1939 
1940 #ifndef PRODUCT
1941 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1942 private:
1943   G1CollectedHeap* _g1h;
1944   const char* _phase;
1945   int _info;
1946 
1947 public:
1948   VerifyNoCSetOops(const char* phase, int info = -1) :
1949     _g1h(G1CollectedHeap::heap()),
1950     _phase(phase),
1951     _info(info)
1952   { }
1953 
1954   void operator()(oop obj) const {
1955     guarantee(obj->is_oop(),
1956               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1957               p2i(obj), _phase, _info);
1958     guarantee(!_g1h->obj_in_cs(obj),
1959               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1960               p2i(obj), _phase, _info);
1961   }
1962 };
1963 
1964 void G1ConcurrentMark::verify_no_cset_oops() {
1965   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1966   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
1967     return;
1968   }
1969 
1970   // Verify entries on the global mark stack
1971   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1972 
1973   // Verify entries on the task queues
1974   for (uint i = 0; i < _max_worker_id; ++i) {
1975     G1CMTaskQueue* queue = _task_queues->queue(i);
1976     queue->iterate(VerifyNoCSetOops("Queue", i));
1977   }
1978 
1979   // Verify the global finger
1980   HeapWord* global_finger = finger();
1981   if (global_finger != NULL && global_finger < _heap_end) {
1982     // Since we always iterate over all regions, we might get a NULL HeapRegion
1983     // here.
1984     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1985     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1986               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1987               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1988   }
1989 
1990   // Verify the task fingers
1991   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
1992   for (uint i = 0; i < parallel_marking_threads(); ++i) {
1993     G1CMTask* task = _tasks[i];
1994     HeapWord* task_finger = task->finger();
1995     if (task_finger != NULL && task_finger < _heap_end) {
1996       // See above note on the global finger verification.
1997       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1998       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1999                 !task_hr->in_collection_set(),
2000                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2001                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2002     }
2003   }
2004 }
2005 #endif // PRODUCT
2006 void G1ConcurrentMark::create_live_data() {
2007   _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
2008 }
2009 
2010 void G1ConcurrentMark::finalize_live_data() {
2011   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
2012 }
2013 
2014 void G1ConcurrentMark::verify_live_data() {
2015   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
2016 }
2017 
2018 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
2019   _g1h->g1_rem_set()->clear_card_live_data(workers);
2020 }
2021 
2022 #ifdef ASSERT
2023 void G1ConcurrentMark::verify_live_data_clear() {
2024   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
2025 }
2026 #endif
2027 
2028 void G1ConcurrentMark::print_stats() {
2029   if (!log_is_enabled(Debug, gc, stats)) {
2030     return;
2031   }
2032   log_debug(gc, stats)("---------------------------------------------------------------------");
2033   for (size_t i = 0; i < _active_tasks; ++i) {
2034     _tasks[i]->print_stats();
2035     log_debug(gc, stats)("---------------------------------------------------------------------");
2036   }
2037 }
2038 
2039 void G1ConcurrentMark::abort() {
2040   if (!cmThread()->during_cycle() || _has_aborted) {
2041     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2042     return;
2043   }
2044 
2045   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2046   // concurrent bitmap clearing.
2047   {
2048     GCTraceTime(Debug, gc)("Clear Next Bitmap");
2049     clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2050   }
2051   // Note we cannot clear the previous marking bitmap here
2052   // since VerifyDuringGC verifies the objects marked during
2053   // a full GC against the previous bitmap.
2054 
2055   {
2056     GCTraceTime(Debug, gc)("Clear Live Data");
2057     clear_live_data(_g1h->workers());
2058   }
2059   DEBUG_ONLY({
2060     GCTraceTime(Debug, gc)("Verify Live Data Clear");
2061     verify_live_data_clear();
2062   })
2063   // Empty mark stack
2064   reset_marking_state();
2065   for (uint i = 0; i < _max_worker_id; ++i) {
2066     _tasks[i]->clear_region_fields();
2067   }
2068   _first_overflow_barrier_sync.abort();
2069   _second_overflow_barrier_sync.abort();
2070   _has_aborted = true;
2071 
2072   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2073   satb_mq_set.abandon_partial_marking();
2074   // This can be called either during or outside marking, we'll read
2075   // the expected_active value from the SATB queue set.
2076   satb_mq_set.set_active_all_threads(
2077                                  false, /* new active value */
2078                                  satb_mq_set.is_active() /* expected_active */);
2079 }
2080 
2081 static void print_ms_time_info(const char* prefix, const char* name,
2082                                NumberSeq& ns) {
2083   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2084                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2085   if (ns.num() > 0) {
2086     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2087                            prefix, ns.sd(), ns.maximum());
2088   }
2089 }
2090 
2091 void G1ConcurrentMark::print_summary_info() {
2092   Log(gc, marking) log;
2093   if (!log.is_trace()) {
2094     return;
2095   }
2096 
2097   log.trace(" Concurrent marking:");
2098   print_ms_time_info("  ", "init marks", _init_times);
2099   print_ms_time_info("  ", "remarks", _remark_times);
2100   {
2101     print_ms_time_info("     ", "final marks", _remark_mark_times);
2102     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2103 
2104   }
2105   print_ms_time_info("  ", "cleanups", _cleanup_times);
2106   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2107             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2108   if (G1ScrubRemSets) {
2109     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2110               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2111   }
2112   log.trace("  Total stop_world time = %8.2f s.",
2113             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2114   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2115             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2116 }
2117 
2118 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2119   _parallel_workers->print_worker_threads_on(st);
2120 }
2121 
2122 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2123   _parallel_workers->threads_do(tc);
2124 }
2125 
2126 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2127   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2128       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2129   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2130   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2131 }
2132 
2133 // Closure for iteration over bitmaps
2134 class G1CMBitMapClosure : public BitMapClosure {
2135 private:
2136   // the bitmap that is being iterated over
2137   G1CMBitMap*                 _nextMarkBitMap;
2138   G1ConcurrentMark*           _cm;
2139   G1CMTask*                   _task;
2140 
2141 public:
2142   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2143     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2144 
2145   bool do_bit(size_t offset) {
2146     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2147     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2148     assert( addr < _cm->finger(), "invariant");
2149     assert(addr >= _task->finger(), "invariant");
2150 
2151     // We move that task's local finger along.
2152     _task->move_finger_to(addr);
2153 
2154     _task->scan_object(oop(addr));
2155     // we only partially drain the local queue and global stack
2156     _task->drain_local_queue(true);
2157     _task->drain_global_stack(true);
2158 
2159     // if the has_aborted flag has been raised, we need to bail out of
2160     // the iteration
2161     return !_task->has_aborted();
2162   }
2163 };
2164 
2165 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2166   ReferenceProcessor* result = g1h->ref_processor_cm();
2167   assert(result != NULL, "CM reference processor should not be NULL");
2168   return result;
2169 }
2170 
2171 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2172                                G1ConcurrentMark* cm,
2173                                G1CMTask* task)
2174   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2175     _g1h(g1h), _cm(cm), _task(task)
2176 { }
2177 
2178 void G1CMTask::setup_for_region(HeapRegion* hr) {
2179   assert(hr != NULL,
2180         "claim_region() should have filtered out NULL regions");
2181   _curr_region  = hr;
2182   _finger       = hr->bottom();
2183   update_region_limit();
2184 }
2185 
2186 void G1CMTask::update_region_limit() {
2187   HeapRegion* hr            = _curr_region;
2188   HeapWord* bottom          = hr->bottom();
2189   HeapWord* limit           = hr->next_top_at_mark_start();
2190 
2191   if (limit == bottom) {
2192     // The region was collected underneath our feet.
2193     // We set the finger to bottom to ensure that the bitmap
2194     // iteration that will follow this will not do anything.
2195     // (this is not a condition that holds when we set the region up,
2196     // as the region is not supposed to be empty in the first place)
2197     _finger = bottom;
2198   } else if (limit >= _region_limit) {
2199     assert(limit >= _finger, "peace of mind");
2200   } else {
2201     assert(limit < _region_limit, "only way to get here");
2202     // This can happen under some pretty unusual circumstances.  An
2203     // evacuation pause empties the region underneath our feet (NTAMS
2204     // at bottom). We then do some allocation in the region (NTAMS
2205     // stays at bottom), followed by the region being used as a GC
2206     // alloc region (NTAMS will move to top() and the objects
2207     // originally below it will be grayed). All objects now marked in
2208     // the region are explicitly grayed, if below the global finger,
2209     // and we do not need in fact to scan anything else. So, we simply
2210     // set _finger to be limit to ensure that the bitmap iteration
2211     // doesn't do anything.
2212     _finger = limit;
2213   }
2214 
2215   _region_limit = limit;
2216 }
2217 
2218 void G1CMTask::giveup_current_region() {
2219   assert(_curr_region != NULL, "invariant");
2220   clear_region_fields();
2221 }
2222 
2223 void G1CMTask::clear_region_fields() {
2224   // Values for these three fields that indicate that we're not
2225   // holding on to a region.
2226   _curr_region   = NULL;
2227   _finger        = NULL;
2228   _region_limit  = NULL;
2229 }
2230 
2231 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2232   if (cm_oop_closure == NULL) {
2233     assert(_cm_oop_closure != NULL, "invariant");
2234   } else {
2235     assert(_cm_oop_closure == NULL, "invariant");
2236   }
2237   _cm_oop_closure = cm_oop_closure;
2238 }
2239 
2240 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2241   guarantee(nextMarkBitMap != NULL, "invariant");
2242   _nextMarkBitMap                = nextMarkBitMap;
2243   clear_region_fields();
2244 
2245   _calls                         = 0;
2246   _elapsed_time_ms               = 0.0;
2247   _termination_time_ms           = 0.0;
2248   _termination_start_time_ms     = 0.0;
2249 }
2250 
2251 bool G1CMTask::should_exit_termination() {
2252   regular_clock_call();
2253   // This is called when we are in the termination protocol. We should
2254   // quit if, for some reason, this task wants to abort or the global
2255   // stack is not empty (this means that we can get work from it).
2256   return !_cm->mark_stack_empty() || has_aborted();
2257 }
2258 
2259 void G1CMTask::reached_limit() {
2260   assert(_words_scanned >= _words_scanned_limit ||
2261          _refs_reached >= _refs_reached_limit ,
2262          "shouldn't have been called otherwise");
2263   regular_clock_call();
2264 }
2265 
2266 void G1CMTask::regular_clock_call() {
2267   if (has_aborted()) return;
2268 
2269   // First, we need to recalculate the words scanned and refs reached
2270   // limits for the next clock call.
2271   recalculate_limits();
2272 
2273   // During the regular clock call we do the following
2274 
2275   // (1) If an overflow has been flagged, then we abort.
2276   if (_cm->has_overflown()) {
2277     set_has_aborted();
2278     return;
2279   }
2280 
2281   // If we are not concurrent (i.e. we're doing remark) we don't need
2282   // to check anything else. The other steps are only needed during
2283   // the concurrent marking phase.
2284   if (!concurrent()) return;
2285 
2286   // (2) If marking has been aborted for Full GC, then we also abort.
2287   if (_cm->has_aborted()) {
2288     set_has_aborted();
2289     return;
2290   }
2291 
2292   double curr_time_ms = os::elapsedVTime() * 1000.0;
2293 
2294   // (4) We check whether we should yield. If we have to, then we abort.
2295   if (SuspendibleThreadSet::should_yield()) {
2296     // We should yield. To do this we abort the task. The caller is
2297     // responsible for yielding.
2298     set_has_aborted();
2299     return;
2300   }
2301 
2302   // (5) We check whether we've reached our time quota. If we have,
2303   // then we abort.
2304   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2305   if (elapsed_time_ms > _time_target_ms) {
2306     set_has_aborted();
2307     _has_timed_out = true;
2308     return;
2309   }
2310 
2311   // (6) Finally, we check whether there are enough completed STAB
2312   // buffers available for processing. If there are, we abort.
2313   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2314   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2315     // we do need to process SATB buffers, we'll abort and restart
2316     // the marking task to do so
2317     set_has_aborted();
2318     return;
2319   }
2320 }
2321 
2322 void G1CMTask::recalculate_limits() {
2323   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2324   _words_scanned_limit      = _real_words_scanned_limit;
2325 
2326   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2327   _refs_reached_limit       = _real_refs_reached_limit;
2328 }
2329 
2330 void G1CMTask::decrease_limits() {
2331   // This is called when we believe that we're going to do an infrequent
2332   // operation which will increase the per byte scanned cost (i.e. move
2333   // entries to/from the global stack). It basically tries to decrease the
2334   // scanning limit so that the clock is called earlier.
2335 
2336   _words_scanned_limit = _real_words_scanned_limit -
2337     3 * words_scanned_period / 4;
2338   _refs_reached_limit  = _real_refs_reached_limit -
2339     3 * refs_reached_period / 4;
2340 }
2341 
2342 void G1CMTask::move_entries_to_global_stack() {
2343   // local array where we'll store the entries that will be popped
2344   // from the local queue
2345   oop buffer[global_stack_transfer_size];
2346 
2347   int n = 0;
2348   oop obj;
2349   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2350     buffer[n] = obj;
2351     ++n;
2352   }
2353 
2354   if (n > 0) {
2355     // we popped at least one entry from the local queue
2356 
2357     if (!_cm->mark_stack_push(buffer, n)) {
2358       set_has_aborted();
2359     }
2360   }
2361 
2362   // this operation was quite expensive, so decrease the limits
2363   decrease_limits();
2364 }
2365 
2366 void G1CMTask::get_entries_from_global_stack() {
2367   // local array where we'll store the entries that will be popped
2368   // from the global stack.
2369   oop buffer[global_stack_transfer_size];
2370   size_t n;
2371   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2372   assert(n <= global_stack_transfer_size,
2373          "we should not pop more than the given limit");
2374   if (n > 0) {
2375     // yes, we did actually pop at least one entry
2376     for (size_t i = 0; i < n; ++i) {
2377       bool success = _task_queue->push(buffer[i]);
2378       // We only call this when the local queue is empty or under a
2379       // given target limit. So, we do not expect this push to fail.
2380       assert(success, "invariant");
2381     }
2382   }
2383 
2384   // this operation was quite expensive, so decrease the limits
2385   decrease_limits();
2386 }
2387 
2388 void G1CMTask::drain_local_queue(bool partially) {
2389   if (has_aborted()) return;
2390 
2391   // Decide what the target size is, depending whether we're going to
2392   // drain it partially (so that other tasks can steal if they run out
2393   // of things to do) or totally (at the very end).
2394   size_t target_size;
2395   if (partially) {
2396     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2397   } else {
2398     target_size = 0;
2399   }
2400 
2401   if (_task_queue->size() > target_size) {
2402     oop obj;
2403     bool ret = _task_queue->pop_local(obj);
2404     while (ret) {
2405       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2406       assert(!_g1h->is_on_master_free_list(
2407                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2408 
2409       scan_object(obj);
2410 
2411       if (_task_queue->size() <= target_size || has_aborted()) {
2412         ret = false;
2413       } else {
2414         ret = _task_queue->pop_local(obj);
2415       }
2416     }
2417   }
2418 }
2419 
2420 void G1CMTask::drain_global_stack(bool partially) {
2421   if (has_aborted()) return;
2422 
2423   // We have a policy to drain the local queue before we attempt to
2424   // drain the global stack.
2425   assert(partially || _task_queue->size() == 0, "invariant");
2426 
2427   // Decide what the target size is, depending whether we're going to
2428   // drain it partially (so that other tasks can steal if they run out
2429   // of things to do) or totally (at the very end).  Notice that,
2430   // because we move entries from the global stack in chunks or
2431   // because another task might be doing the same, we might in fact
2432   // drop below the target. But, this is not a problem.
2433   size_t target_size;
2434   if (partially) {
2435     target_size = _cm->partial_mark_stack_size_target();
2436   } else {
2437     target_size = 0;
2438   }
2439 
2440   if (_cm->mark_stack_size() > target_size) {
2441     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2442       get_entries_from_global_stack();
2443       drain_local_queue(partially);
2444     }
2445   }
2446 }
2447 
2448 // SATB Queue has several assumptions on whether to call the par or
2449 // non-par versions of the methods. this is why some of the code is
2450 // replicated. We should really get rid of the single-threaded version
2451 // of the code to simplify things.
2452 void G1CMTask::drain_satb_buffers() {
2453   if (has_aborted()) return;
2454 
2455   // We set this so that the regular clock knows that we're in the
2456   // middle of draining buffers and doesn't set the abort flag when it
2457   // notices that SATB buffers are available for draining. It'd be
2458   // very counter productive if it did that. :-)
2459   _draining_satb_buffers = true;
2460 
2461   G1CMSATBBufferClosure satb_cl(this, _g1h);
2462   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2463 
2464   // This keeps claiming and applying the closure to completed buffers
2465   // until we run out of buffers or we need to abort.
2466   while (!has_aborted() &&
2467          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2468     regular_clock_call();
2469   }
2470 
2471   _draining_satb_buffers = false;
2472 
2473   assert(has_aborted() ||
2474          concurrent() ||
2475          satb_mq_set.completed_buffers_num() == 0, "invariant");
2476 
2477   // again, this was a potentially expensive operation, decrease the
2478   // limits to get the regular clock call early
2479   decrease_limits();
2480 }
2481 
2482 void G1CMTask::print_stats() {
2483   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2484                        _worker_id, _calls);
2485   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2486                        _elapsed_time_ms, _termination_time_ms);
2487   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2488                        _step_times_ms.num(), _step_times_ms.avg(),
2489                        _step_times_ms.sd());
2490   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2491                        _step_times_ms.maximum(), _step_times_ms.sum());
2492 }
2493 
2494 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
2495   return _task_queues->steal(worker_id, hash_seed, obj);
2496 }
2497 
2498 /*****************************************************************************
2499 
2500     The do_marking_step(time_target_ms, ...) method is the building
2501     block of the parallel marking framework. It can be called in parallel
2502     with other invocations of do_marking_step() on different tasks
2503     (but only one per task, obviously) and concurrently with the
2504     mutator threads, or during remark, hence it eliminates the need
2505     for two versions of the code. When called during remark, it will
2506     pick up from where the task left off during the concurrent marking
2507     phase. Interestingly, tasks are also claimable during evacuation
2508     pauses too, since do_marking_step() ensures that it aborts before
2509     it needs to yield.
2510 
2511     The data structures that it uses to do marking work are the
2512     following:
2513 
2514       (1) Marking Bitmap. If there are gray objects that appear only
2515       on the bitmap (this happens either when dealing with an overflow
2516       or when the initial marking phase has simply marked the roots
2517       and didn't push them on the stack), then tasks claim heap
2518       regions whose bitmap they then scan to find gray objects. A
2519       global finger indicates where the end of the last claimed region
2520       is. A local finger indicates how far into the region a task has
2521       scanned. The two fingers are used to determine how to gray an
2522       object (i.e. whether simply marking it is OK, as it will be
2523       visited by a task in the future, or whether it needs to be also
2524       pushed on a stack).
2525 
2526       (2) Local Queue. The local queue of the task which is accessed
2527       reasonably efficiently by the task. Other tasks can steal from
2528       it when they run out of work. Throughout the marking phase, a
2529       task attempts to keep its local queue short but not totally
2530       empty, so that entries are available for stealing by other
2531       tasks. Only when there is no more work, a task will totally
2532       drain its local queue.
2533 
2534       (3) Global Mark Stack. This handles local queue overflow. During
2535       marking only sets of entries are moved between it and the local
2536       queues, as access to it requires a mutex and more fine-grain
2537       interaction with it which might cause contention. If it
2538       overflows, then the marking phase should restart and iterate
2539       over the bitmap to identify gray objects. Throughout the marking
2540       phase, tasks attempt to keep the global mark stack at a small
2541       length but not totally empty, so that entries are available for
2542       popping by other tasks. Only when there is no more work, tasks
2543       will totally drain the global mark stack.
2544 
2545       (4) SATB Buffer Queue. This is where completed SATB buffers are
2546       made available. Buffers are regularly removed from this queue
2547       and scanned for roots, so that the queue doesn't get too
2548       long. During remark, all completed buffers are processed, as
2549       well as the filled in parts of any uncompleted buffers.
2550 
2551     The do_marking_step() method tries to abort when the time target
2552     has been reached. There are a few other cases when the
2553     do_marking_step() method also aborts:
2554 
2555       (1) When the marking phase has been aborted (after a Full GC).
2556 
2557       (2) When a global overflow (on the global stack) has been
2558       triggered. Before the task aborts, it will actually sync up with
2559       the other tasks to ensure that all the marking data structures
2560       (local queues, stacks, fingers etc.)  are re-initialized so that
2561       when do_marking_step() completes, the marking phase can
2562       immediately restart.
2563 
2564       (3) When enough completed SATB buffers are available. The
2565       do_marking_step() method only tries to drain SATB buffers right
2566       at the beginning. So, if enough buffers are available, the
2567       marking step aborts and the SATB buffers are processed at
2568       the beginning of the next invocation.
2569 
2570       (4) To yield. when we have to yield then we abort and yield
2571       right at the end of do_marking_step(). This saves us from a lot
2572       of hassle as, by yielding we might allow a Full GC. If this
2573       happens then objects will be compacted underneath our feet, the
2574       heap might shrink, etc. We save checking for this by just
2575       aborting and doing the yield right at the end.
2576 
2577     From the above it follows that the do_marking_step() method should
2578     be called in a loop (or, otherwise, regularly) until it completes.
2579 
2580     If a marking step completes without its has_aborted() flag being
2581     true, it means it has completed the current marking phase (and
2582     also all other marking tasks have done so and have all synced up).
2583 
2584     A method called regular_clock_call() is invoked "regularly" (in
2585     sub ms intervals) throughout marking. It is this clock method that
2586     checks all the abort conditions which were mentioned above and
2587     decides when the task should abort. A work-based scheme is used to
2588     trigger this clock method: when the number of object words the
2589     marking phase has scanned or the number of references the marking
2590     phase has visited reach a given limit. Additional invocations to
2591     the method clock have been planted in a few other strategic places
2592     too. The initial reason for the clock method was to avoid calling
2593     vtime too regularly, as it is quite expensive. So, once it was in
2594     place, it was natural to piggy-back all the other conditions on it
2595     too and not constantly check them throughout the code.
2596 
2597     If do_termination is true then do_marking_step will enter its
2598     termination protocol.
2599 
2600     The value of is_serial must be true when do_marking_step is being
2601     called serially (i.e. by the VMThread) and do_marking_step should
2602     skip any synchronization in the termination and overflow code.
2603     Examples include the serial remark code and the serial reference
2604     processing closures.
2605 
2606     The value of is_serial must be false when do_marking_step is
2607     being called by any of the worker threads in a work gang.
2608     Examples include the concurrent marking code (CMMarkingTask),
2609     the MT remark code, and the MT reference processing closures.
2610 
2611  *****************************************************************************/
2612 
2613 void G1CMTask::do_marking_step(double time_target_ms,
2614                                bool do_termination,
2615                                bool is_serial) {
2616   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2617   assert(concurrent() == _cm->concurrent(), "they should be the same");
2618 
2619   G1Policy* g1_policy = _g1h->g1_policy();
2620   assert(_task_queues != NULL, "invariant");
2621   assert(_task_queue != NULL, "invariant");
2622   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2623 
2624   assert(!_claimed,
2625          "only one thread should claim this task at any one time");
2626 
2627   // OK, this doesn't safeguard again all possible scenarios, as it is
2628   // possible for two threads to set the _claimed flag at the same
2629   // time. But it is only for debugging purposes anyway and it will
2630   // catch most problems.
2631   _claimed = true;
2632 
2633   _start_time_ms = os::elapsedVTime() * 1000.0;
2634 
2635   // If do_stealing is true then do_marking_step will attempt to
2636   // steal work from the other G1CMTasks. It only makes sense to
2637   // enable stealing when the termination protocol is enabled
2638   // and do_marking_step() is not being called serially.
2639   bool do_stealing = do_termination && !is_serial;
2640 
2641   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2642   _time_target_ms = time_target_ms - diff_prediction_ms;
2643 
2644   // set up the variables that are used in the work-based scheme to
2645   // call the regular clock method
2646   _words_scanned = 0;
2647   _refs_reached  = 0;
2648   recalculate_limits();
2649 
2650   // clear all flags
2651   clear_has_aborted();
2652   _has_timed_out = false;
2653   _draining_satb_buffers = false;
2654 
2655   ++_calls;
2656 
2657   // Set up the bitmap and oop closures. Anything that uses them is
2658   // eventually called from this method, so it is OK to allocate these
2659   // statically.
2660   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
2661   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2662   set_cm_oop_closure(&cm_oop_closure);
2663 
2664   if (_cm->has_overflown()) {
2665     // This can happen if the mark stack overflows during a GC pause
2666     // and this task, after a yield point, restarts. We have to abort
2667     // as we need to get into the overflow protocol which happens
2668     // right at the end of this task.
2669     set_has_aborted();
2670   }
2671 
2672   // First drain any available SATB buffers. After this, we will not
2673   // look at SATB buffers before the next invocation of this method.
2674   // If enough completed SATB buffers are queued up, the regular clock
2675   // will abort this task so that it restarts.
2676   drain_satb_buffers();
2677   // ...then partially drain the local queue and the global stack
2678   drain_local_queue(true);
2679   drain_global_stack(true);
2680 
2681   do {
2682     if (!has_aborted() && _curr_region != NULL) {
2683       // This means that we're already holding on to a region.
2684       assert(_finger != NULL, "if region is not NULL, then the finger "
2685              "should not be NULL either");
2686 
2687       // We might have restarted this task after an evacuation pause
2688       // which might have evacuated the region we're holding on to
2689       // underneath our feet. Let's read its limit again to make sure
2690       // that we do not iterate over a region of the heap that
2691       // contains garbage (update_region_limit() will also move
2692       // _finger to the start of the region if it is found empty).
2693       update_region_limit();
2694       // We will start from _finger not from the start of the region,
2695       // as we might be restarting this task after aborting half-way
2696       // through scanning this region. In this case, _finger points to
2697       // the address where we last found a marked object. If this is a
2698       // fresh region, _finger points to start().
2699       MemRegion mr = MemRegion(_finger, _region_limit);
2700 
2701       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2702              "humongous regions should go around loop once only");
2703 
2704       // Some special cases:
2705       // If the memory region is empty, we can just give up the region.
2706       // If the current region is humongous then we only need to check
2707       // the bitmap for the bit associated with the start of the object,
2708       // scan the object if it's live, and give up the region.
2709       // Otherwise, let's iterate over the bitmap of the part of the region
2710       // that is left.
2711       // If the iteration is successful, give up the region.
2712       if (mr.is_empty()) {
2713         giveup_current_region();
2714         regular_clock_call();
2715       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2716         if (_nextMarkBitMap->isMarked(mr.start())) {
2717           // The object is marked - apply the closure
2718           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
2719           bitmap_closure.do_bit(offset);
2720         }
2721         // Even if this task aborted while scanning the humongous object
2722         // we can (and should) give up the current region.
2723         giveup_current_region();
2724         regular_clock_call();
2725       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2726         giveup_current_region();
2727         regular_clock_call();
2728       } else {
2729         assert(has_aborted(), "currently the only way to do so");
2730         // The only way to abort the bitmap iteration is to return
2731         // false from the do_bit() method. However, inside the
2732         // do_bit() method we move the _finger to point to the
2733         // object currently being looked at. So, if we bail out, we
2734         // have definitely set _finger to something non-null.
2735         assert(_finger != NULL, "invariant");
2736 
2737         // Region iteration was actually aborted. So now _finger
2738         // points to the address of the object we last scanned. If we
2739         // leave it there, when we restart this task, we will rescan
2740         // the object. It is easy to avoid this. We move the finger by
2741         // enough to point to the next possible object header (the
2742         // bitmap knows by how much we need to move it as it knows its
2743         // granularity).
2744         assert(_finger < _region_limit, "invariant");
2745         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
2746         // Check if bitmap iteration was aborted while scanning the last object
2747         if (new_finger >= _region_limit) {
2748           giveup_current_region();
2749         } else {
2750           move_finger_to(new_finger);
2751         }
2752       }
2753     }
2754     // At this point we have either completed iterating over the
2755     // region we were holding on to, or we have aborted.
2756 
2757     // We then partially drain the local queue and the global stack.
2758     // (Do we really need this?)
2759     drain_local_queue(true);
2760     drain_global_stack(true);
2761 
2762     // Read the note on the claim_region() method on why it might
2763     // return NULL with potentially more regions available for
2764     // claiming and why we have to check out_of_regions() to determine
2765     // whether we're done or not.
2766     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2767       // We are going to try to claim a new region. We should have
2768       // given up on the previous one.
2769       // Separated the asserts so that we know which one fires.
2770       assert(_curr_region  == NULL, "invariant");
2771       assert(_finger       == NULL, "invariant");
2772       assert(_region_limit == NULL, "invariant");
2773       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2774       if (claimed_region != NULL) {
2775         // Yes, we managed to claim one
2776         setup_for_region(claimed_region);
2777         assert(_curr_region == claimed_region, "invariant");
2778       }
2779       // It is important to call the regular clock here. It might take
2780       // a while to claim a region if, for example, we hit a large
2781       // block of empty regions. So we need to call the regular clock
2782       // method once round the loop to make sure it's called
2783       // frequently enough.
2784       regular_clock_call();
2785     }
2786 
2787     if (!has_aborted() && _curr_region == NULL) {
2788       assert(_cm->out_of_regions(),
2789              "at this point we should be out of regions");
2790     }
2791   } while ( _curr_region != NULL && !has_aborted());
2792 
2793   if (!has_aborted()) {
2794     // We cannot check whether the global stack is empty, since other
2795     // tasks might be pushing objects to it concurrently.
2796     assert(_cm->out_of_regions(),
2797            "at this point we should be out of regions");
2798     // Try to reduce the number of available SATB buffers so that
2799     // remark has less work to do.
2800     drain_satb_buffers();
2801   }
2802 
2803   // Since we've done everything else, we can now totally drain the
2804   // local queue and global stack.
2805   drain_local_queue(false);
2806   drain_global_stack(false);
2807 
2808   // Attempt at work stealing from other task's queues.
2809   if (do_stealing && !has_aborted()) {
2810     // We have not aborted. This means that we have finished all that
2811     // we could. Let's try to do some stealing...
2812 
2813     // We cannot check whether the global stack is empty, since other
2814     // tasks might be pushing objects to it concurrently.
2815     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2816            "only way to reach here");
2817     while (!has_aborted()) {
2818       oop obj;
2819       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2820         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
2821                "any stolen object should be marked");
2822         scan_object(obj);
2823 
2824         // And since we're towards the end, let's totally drain the
2825         // local queue and global stack.
2826         drain_local_queue(false);
2827         drain_global_stack(false);
2828       } else {
2829         break;
2830       }
2831     }
2832   }
2833 
2834   // We still haven't aborted. Now, let's try to get into the
2835   // termination protocol.
2836   if (do_termination && !has_aborted()) {
2837     // We cannot check whether the global stack is empty, since other
2838     // tasks might be concurrently pushing objects on it.
2839     // Separated the asserts so that we know which one fires.
2840     assert(_cm->out_of_regions(), "only way to reach here");
2841     assert(_task_queue->size() == 0, "only way to reach here");
2842     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2843 
2844     // The G1CMTask class also extends the TerminatorTerminator class,
2845     // hence its should_exit_termination() method will also decide
2846     // whether to exit the termination protocol or not.
2847     bool finished = (is_serial ||
2848                      _cm->terminator()->offer_termination(this));
2849     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2850     _termination_time_ms +=
2851       termination_end_time_ms - _termination_start_time_ms;
2852 
2853     if (finished) {
2854       // We're all done.
2855 
2856       if (_worker_id == 0) {
2857         // let's allow task 0 to do this
2858         if (concurrent()) {
2859           assert(_cm->concurrent_marking_in_progress(), "invariant");
2860           // we need to set this to false before the next
2861           // safepoint. This way we ensure that the marking phase
2862           // doesn't observe any more heap expansions.
2863           _cm->clear_concurrent_marking_in_progress();
2864         }
2865       }
2866 
2867       // We can now guarantee that the global stack is empty, since
2868       // all other tasks have finished. We separated the guarantees so
2869       // that, if a condition is false, we can immediately find out
2870       // which one.
2871       guarantee(_cm->out_of_regions(), "only way to reach here");
2872       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2873       guarantee(_task_queue->size() == 0, "only way to reach here");
2874       guarantee(!_cm->has_overflown(), "only way to reach here");
2875       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
2876     } else {
2877       // Apparently there's more work to do. Let's abort this task. It
2878       // will restart it and we can hopefully find more things to do.
2879       set_has_aborted();
2880     }
2881   }
2882 
2883   // Mainly for debugging purposes to make sure that a pointer to the
2884   // closure which was statically allocated in this frame doesn't
2885   // escape it by accident.
2886   set_cm_oop_closure(NULL);
2887   double end_time_ms = os::elapsedVTime() * 1000.0;
2888   double elapsed_time_ms = end_time_ms - _start_time_ms;
2889   // Update the step history.
2890   _step_times_ms.add(elapsed_time_ms);
2891 
2892   if (has_aborted()) {
2893     // The task was aborted for some reason.
2894     if (_has_timed_out) {
2895       double diff_ms = elapsed_time_ms - _time_target_ms;
2896       // Keep statistics of how well we did with respect to hitting
2897       // our target only if we actually timed out (if we aborted for
2898       // other reasons, then the results might get skewed).
2899       _marking_step_diffs_ms.add(diff_ms);
2900     }
2901 
2902     if (_cm->has_overflown()) {
2903       // This is the interesting one. We aborted because a global
2904       // overflow was raised. This means we have to restart the
2905       // marking phase and start iterating over regions. However, in
2906       // order to do this we have to make sure that all tasks stop
2907       // what they are doing and re-initialize in a safe manner. We
2908       // will achieve this with the use of two barrier sync points.
2909 
2910       if (!is_serial) {
2911         // We only need to enter the sync barrier if being called
2912         // from a parallel context
2913         _cm->enter_first_sync_barrier(_worker_id);
2914 
2915         // When we exit this sync barrier we know that all tasks have
2916         // stopped doing marking work. So, it's now safe to
2917         // re-initialize our data structures. At the end of this method,
2918         // task 0 will clear the global data structures.
2919       }
2920 
2921       // We clear the local state of this task...
2922       clear_region_fields();
2923 
2924       if (!is_serial) {
2925         // ...and enter the second barrier.
2926         _cm->enter_second_sync_barrier(_worker_id);
2927       }
2928       // At this point, if we're during the concurrent phase of
2929       // marking, everything has been re-initialized and we're
2930       // ready to restart.
2931     }
2932   }
2933 
2934   _claimed = false;
2935 }
2936 
2937 G1CMTask::G1CMTask(uint worker_id,
2938                    G1ConcurrentMark* cm,
2939                    G1CMTaskQueue* task_queue,
2940                    G1CMTaskQueueSet* task_queues)
2941   : _g1h(G1CollectedHeap::heap()),
2942     _worker_id(worker_id), _cm(cm),
2943     _claimed(false),
2944     _nextMarkBitMap(NULL), _hash_seed(17),
2945     _task_queue(task_queue),
2946     _task_queues(task_queues),
2947     _cm_oop_closure(NULL) {
2948   guarantee(task_queue != NULL, "invariant");
2949   guarantee(task_queues != NULL, "invariant");
2950 
2951   _marking_step_diffs_ms.add(0.5);
2952 }
2953 
2954 // These are formatting macros that are used below to ensure
2955 // consistent formatting. The *_H_* versions are used to format the
2956 // header for a particular value and they should be kept consistent
2957 // with the corresponding macro. Also note that most of the macros add
2958 // the necessary white space (as a prefix) which makes them a bit
2959 // easier to compose.
2960 
2961 // All the output lines are prefixed with this string to be able to
2962 // identify them easily in a large log file.
2963 #define G1PPRL_LINE_PREFIX            "###"
2964 
2965 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2966 #ifdef _LP64
2967 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2968 #else // _LP64
2969 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2970 #endif // _LP64
2971 
2972 // For per-region info
2973 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2974 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2975 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2976 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2977 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2978 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2979 
2980 // For summary info
2981 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2982 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2983 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2984 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2985 
2986 G1PrintRegionLivenessInfoClosure::
2987 G1PrintRegionLivenessInfoClosure(const char* phase_name)
2988   : _total_used_bytes(0), _total_capacity_bytes(0),
2989     _total_prev_live_bytes(0), _total_next_live_bytes(0),
2990     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
2991   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2992   MemRegion g1_reserved = g1h->g1_reserved();
2993   double now = os::elapsedTime();
2994 
2995   // Print the header of the output.
2996   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2997   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2998                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2999                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3000                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3001                           HeapRegion::GrainBytes);
3002   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3003   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3004                           G1PPRL_TYPE_H_FORMAT
3005                           G1PPRL_ADDR_BASE_H_FORMAT
3006                           G1PPRL_BYTE_H_FORMAT
3007                           G1PPRL_BYTE_H_FORMAT
3008                           G1PPRL_BYTE_H_FORMAT
3009                           G1PPRL_DOUBLE_H_FORMAT
3010                           G1PPRL_BYTE_H_FORMAT
3011                           G1PPRL_BYTE_H_FORMAT,
3012                           "type", "address-range",
3013                           "used", "prev-live", "next-live", "gc-eff",
3014                           "remset", "code-roots");
3015   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3016                           G1PPRL_TYPE_H_FORMAT
3017                           G1PPRL_ADDR_BASE_H_FORMAT
3018                           G1PPRL_BYTE_H_FORMAT
3019                           G1PPRL_BYTE_H_FORMAT
3020                           G1PPRL_BYTE_H_FORMAT
3021                           G1PPRL_DOUBLE_H_FORMAT
3022                           G1PPRL_BYTE_H_FORMAT
3023                           G1PPRL_BYTE_H_FORMAT,
3024                           "", "",
3025                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3026                           "(bytes)", "(bytes)");
3027 }
3028 
3029 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3030   const char* type       = r->get_type_str();
3031   HeapWord* bottom       = r->bottom();
3032   HeapWord* end          = r->end();
3033   size_t capacity_bytes  = r->capacity();
3034   size_t used_bytes      = r->used();
3035   size_t prev_live_bytes = r->live_bytes();
3036   size_t next_live_bytes = r->next_live_bytes();
3037   double gc_eff          = r->gc_efficiency();
3038   size_t remset_bytes    = r->rem_set()->mem_size();
3039   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3040 
3041   _total_used_bytes      += used_bytes;
3042   _total_capacity_bytes  += capacity_bytes;
3043   _total_prev_live_bytes += prev_live_bytes;
3044   _total_next_live_bytes += next_live_bytes;
3045   _total_remset_bytes    += remset_bytes;
3046   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3047 
3048   // Print a line for this particular region.
3049   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3050                           G1PPRL_TYPE_FORMAT
3051                           G1PPRL_ADDR_BASE_FORMAT
3052                           G1PPRL_BYTE_FORMAT
3053                           G1PPRL_BYTE_FORMAT
3054                           G1PPRL_BYTE_FORMAT
3055                           G1PPRL_DOUBLE_FORMAT
3056                           G1PPRL_BYTE_FORMAT
3057                           G1PPRL_BYTE_FORMAT,
3058                           type, p2i(bottom), p2i(end),
3059                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3060                           remset_bytes, strong_code_roots_bytes);
3061 
3062   return false;
3063 }
3064 
3065 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3066   // add static memory usages to remembered set sizes
3067   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3068   // Print the footer of the output.
3069   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3070   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3071                          " SUMMARY"
3072                          G1PPRL_SUM_MB_FORMAT("capacity")
3073                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3074                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3075                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3076                          G1PPRL_SUM_MB_FORMAT("remset")
3077                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3078                          bytes_to_mb(_total_capacity_bytes),
3079                          bytes_to_mb(_total_used_bytes),
3080                          perc(_total_used_bytes, _total_capacity_bytes),
3081                          bytes_to_mb(_total_prev_live_bytes),
3082                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3083                          bytes_to_mb(_total_next_live_bytes),
3084                          perc(_total_next_live_bytes, _total_capacity_bytes),
3085                          bytes_to_mb(_total_remset_bytes),
3086                          bytes_to_mb(_total_strong_code_roots_bytes));
3087 }