1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1RemSet.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionManager.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.inline.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 
  61 // Concurrent marking bit map wrapper
  62 
  63 CMBitMapRO::CMBitMapRO(int shifter) :
  64   _bm(),
  65   _shifter(shifter) {
  66   _bmStartWord = 0;
  67   _bmWordSize = 0;
  68 }
  69 
  70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  71                                                const HeapWord* limit) const {
  72   // First we must round addr *up* to a possible object boundary.
  73   addr = (HeapWord*)align_size_up((intptr_t)addr,
  74                                   HeapWordSize << _shifter);
  75   size_t addrOffset = heapWordToOffset(addr);
  76   if (limit == NULL) {
  77     limit = _bmStartWord + _bmWordSize;
  78   }
  79   size_t limitOffset = heapWordToOffset(limit);
  80   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  81   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  82   assert(nextAddr >= addr, "get_next_one postcondition");
  83   assert(nextAddr == limit || isMarked(nextAddr),
  84          "get_next_one postcondition");
  85   return nextAddr;
  86 }
  87 
  88 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  89                                                  const HeapWord* limit) const {
  90   size_t addrOffset = heapWordToOffset(addr);
  91   if (limit == NULL) {
  92     limit = _bmStartWord + _bmWordSize;
  93   }
  94   size_t limitOffset = heapWordToOffset(limit);
  95   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  96   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  97   assert(nextAddr >= addr, "get_next_one postcondition");
  98   assert(nextAddr == limit || !isMarked(nextAddr),
  99          "get_next_one postcondition");
 100   return nextAddr;
 101 }
 102 
 103 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 104   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 105   return (int) (diff >> _shifter);
 106 }
 107 
 108 #ifndef PRODUCT
 109 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 110   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 111   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 112          "size inconsistency");
 113   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 114          _bmWordSize  == heap_rs.word_size();
 115 }
 116 #endif
 117 
 118 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 119   _bm.print_on_error(st, prefix);
 120 }
 121 
 122 size_t CMBitMap::compute_size(size_t heap_size) {
 123   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 124 }
 125 
 126 size_t CMBitMap::mark_distance() {
 127   return MinObjAlignmentInBytes * BitsPerByte;
 128 }
 129 
 130 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 131   _bmStartWord = heap.start();
 132   _bmWordSize = heap.word_size();
 133 
 134   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 135   _bm.set_size(_bmWordSize >> _shifter);
 136 
 137   storage->set_mapping_changed_listener(&_listener);
 138 }
 139 
 140 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 141   if (zero_filled) {
 142     return;
 143   }
 144   // We need to clear the bitmap on commit, removing any existing information.
 145   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 146   _bm->clearRange(mr);
 147 }
 148 
 149 // Closure used for clearing the given mark bitmap.
 150 class ClearBitmapHRClosure : public HeapRegionClosure {
 151  private:
 152   ConcurrentMark* _cm;
 153   CMBitMap* _bitmap;
 154   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 155  public:
 156   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 157     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 158   }
 159 
 160   virtual bool doHeapRegion(HeapRegion* r) {
 161     size_t const chunk_size_in_words = M / HeapWordSize;
 162 
 163     HeapWord* cur = r->bottom();
 164     HeapWord* const end = r->end();
 165 
 166     while (cur < end) {
 167       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 168       _bitmap->clearRange(mr);
 169 
 170       cur += chunk_size_in_words;
 171 
 172       // Abort iteration if after yielding the marking has been aborted.
 173       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 174         return true;
 175       }
 176       // Repeat the asserts from before the start of the closure. We will do them
 177       // as asserts here to minimize their overhead on the product. However, we
 178       // will have them as guarantees at the beginning / end of the bitmap
 179       // clearing to get some checking in the product.
 180       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 181       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 182     }
 183 
 184     return false;
 185   }
 186 };
 187 
 188 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 189   ClearBitmapHRClosure* _cl;
 190   HeapRegionClaimer     _hrclaimer;
 191   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 192 
 193 public:
 194   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 195       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 196 
 197   void work(uint worker_id) {
 198     SuspendibleThreadSetJoiner sts_join(_suspendible);
 199     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 200   }
 201 };
 202 
 203 void CMBitMap::clearAll() {
 204   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 205   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 206   uint n_workers = g1h->workers()->active_workers();
 207   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 208   g1h->workers()->run_task(&task);
 209   guarantee(cl.complete(), "Must have completed iteration.");
 210   return;
 211 }
 212 
 213 void CMBitMap::markRange(MemRegion mr) {
 214   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 215   assert(!mr.is_empty(), "unexpected empty region");
 216   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 217           ((HeapWord *) mr.end())),
 218          "markRange memory region end is not card aligned");
 219   // convert address range into offset range
 220   _bm.at_put_range(heapWordToOffset(mr.start()),
 221                    heapWordToOffset(mr.end()), true);
 222 }
 223 
 224 void CMBitMap::clearRange(MemRegion mr) {
 225   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 226   assert(!mr.is_empty(), "unexpected empty region");
 227   // convert address range into offset range
 228   _bm.at_put_range(heapWordToOffset(mr.start()),
 229                    heapWordToOffset(mr.end()), false);
 230 }
 231 
 232 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 233                                             HeapWord* end_addr) {
 234   HeapWord* start = getNextMarkedWordAddress(addr);
 235   start = MIN2(start, end_addr);
 236   HeapWord* end   = getNextUnmarkedWordAddress(start);
 237   end = MIN2(end, end_addr);
 238   assert(start <= end, "Consistency check");
 239   MemRegion mr(start, end);
 240   if (!mr.is_empty()) {
 241     clearRange(mr);
 242   }
 243   return mr;
 244 }
 245 
 246 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 247   _base(NULL), _cm(cm)
 248 {}
 249 
 250 bool CMMarkStack::allocate(size_t capacity) {
 251   // allocate a stack of the requisite depth
 252   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 253   if (!rs.is_reserved()) {
 254     warning("ConcurrentMark MarkStack allocation failure");
 255     return false;
 256   }
 257   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 258   if (!_virtual_space.initialize(rs, rs.size())) {
 259     warning("ConcurrentMark MarkStack backing store failure");
 260     // Release the virtual memory reserved for the marking stack
 261     rs.release();
 262     return false;
 263   }
 264   assert(_virtual_space.committed_size() == rs.size(),
 265          "Didn't reserve backing store for all of ConcurrentMark stack?");
 266   _base = (oop*) _virtual_space.low();
 267   setEmpty();
 268   _capacity = (jint) capacity;
 269   _saved_index = -1;
 270   _should_expand = false;
 271   return true;
 272 }
 273 
 274 void CMMarkStack::expand() {
 275   // Called, during remark, if we've overflown the marking stack during marking.
 276   assert(isEmpty(), "stack should been emptied while handling overflow");
 277   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 278   // Clear expansion flag
 279   _should_expand = false;
 280   if (_capacity == (jint) MarkStackSizeMax) {
 281     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 282     return;
 283   }
 284   // Double capacity if possible
 285   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 286   // Do not give up existing stack until we have managed to
 287   // get the double capacity that we desired.
 288   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 289                                                            sizeof(oop)));
 290   if (rs.is_reserved()) {
 291     // Release the backing store associated with old stack
 292     _virtual_space.release();
 293     // Reinitialize virtual space for new stack
 294     if (!_virtual_space.initialize(rs, rs.size())) {
 295       fatal("Not enough swap for expanded marking stack capacity");
 296     }
 297     _base = (oop*)(_virtual_space.low());
 298     _index = 0;
 299     _capacity = new_capacity;
 300   } else {
 301     // Failed to double capacity, continue;
 302     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 303                   _capacity / K, new_capacity / K);
 304   }
 305 }
 306 
 307 void CMMarkStack::set_should_expand() {
 308   // If we're resetting the marking state because of an
 309   // marking stack overflow, record that we should, if
 310   // possible, expand the stack.
 311   _should_expand = _cm->has_overflown();
 312 }
 313 
 314 CMMarkStack::~CMMarkStack() {
 315   if (_base != NULL) {
 316     _base = NULL;
 317     _virtual_space.release();
 318   }
 319 }
 320 
 321 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 322   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 323   jint start = _index;
 324   jint next_index = start + n;
 325   if (next_index > _capacity) {
 326     _overflow = true;
 327     return;
 328   }
 329   // Otherwise.
 330   _index = next_index;
 331   for (int i = 0; i < n; i++) {
 332     int ind = start + i;
 333     assert(ind < _capacity, "By overflow test above.");
 334     _base[ind] = ptr_arr[i];
 335   }
 336 }
 337 
 338 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 339   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 340   jint index = _index;
 341   if (index == 0) {
 342     *n = 0;
 343     return false;
 344   } else {
 345     int k = MIN2(max, index);
 346     jint  new_ind = index - k;
 347     for (int j = 0; j < k; j++) {
 348       ptr_arr[j] = _base[new_ind + j];
 349     }
 350     _index = new_ind;
 351     *n = k;
 352     return true;
 353   }
 354 }
 355 
 356 void CMMarkStack::note_start_of_gc() {
 357   assert(_saved_index == -1,
 358          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 359   _saved_index = _index;
 360 }
 361 
 362 void CMMarkStack::note_end_of_gc() {
 363   // This is intentionally a guarantee, instead of an assert. If we
 364   // accidentally add something to the mark stack during GC, it
 365   // will be a correctness issue so it's better if we crash. we'll
 366   // only check this once per GC anyway, so it won't be a performance
 367   // issue in any way.
 368   guarantee(_saved_index == _index,
 369             "saved index: %d index: %d", _saved_index, _index);
 370   _saved_index = -1;
 371 }
 372 
 373 CMRootRegions::CMRootRegions() :
 374   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 375   _should_abort(false),  _next_survivor(NULL) { }
 376 
 377 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 378   _young_list = g1h->young_list();
 379   _cm = cm;
 380 }
 381 
 382 void CMRootRegions::prepare_for_scan() {
 383   assert(!scan_in_progress(), "pre-condition");
 384 
 385   // Currently, only survivors can be root regions.
 386   assert(_next_survivor == NULL, "pre-condition");
 387   _next_survivor = _young_list->first_survivor_region();
 388   _scan_in_progress = (_next_survivor != NULL);
 389   _should_abort = false;
 390 }
 391 
 392 HeapRegion* CMRootRegions::claim_next() {
 393   if (_should_abort) {
 394     // If someone has set the should_abort flag, we return NULL to
 395     // force the caller to bail out of their loop.
 396     return NULL;
 397   }
 398 
 399   // Currently, only survivors can be root regions.
 400   HeapRegion* res = _next_survivor;
 401   if (res != NULL) {
 402     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 403     // Read it again in case it changed while we were waiting for the lock.
 404     res = _next_survivor;
 405     if (res != NULL) {
 406       if (res == _young_list->last_survivor_region()) {
 407         // We just claimed the last survivor so store NULL to indicate
 408         // that we're done.
 409         _next_survivor = NULL;
 410       } else {
 411         _next_survivor = res->get_next_young_region();
 412       }
 413     } else {
 414       // Someone else claimed the last survivor while we were trying
 415       // to take the lock so nothing else to do.
 416     }
 417   }
 418   assert(res == NULL || res->is_survivor(), "post-condition");
 419 
 420   return res;
 421 }
 422 
 423 void CMRootRegions::scan_finished() {
 424   assert(scan_in_progress(), "pre-condition");
 425 
 426   // Currently, only survivors can be root regions.
 427   if (!_should_abort) {
 428     assert(_next_survivor == NULL, "we should have claimed all survivors");
 429   }
 430   _next_survivor = NULL;
 431 
 432   {
 433     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 434     _scan_in_progress = false;
 435     RootRegionScan_lock->notify_all();
 436   }
 437 }
 438 
 439 bool CMRootRegions::wait_until_scan_finished() {
 440   if (!scan_in_progress()) return false;
 441 
 442   {
 443     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 444     while (scan_in_progress()) {
 445       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 446     }
 447   }
 448   return true;
 449 }
 450 
 451 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 452   return MAX2((n_par_threads + 2) / 4, 1U);
 453 }
 454 
 455 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 456   _g1h(g1h),
 457   _markBitMap1(),
 458   _markBitMap2(),
 459   _parallel_marking_threads(0),
 460   _max_parallel_marking_threads(0),
 461   _sleep_factor(0.0),
 462   _marking_task_overhead(1.0),
 463   _cleanup_sleep_factor(0.0),
 464   _cleanup_task_overhead(1.0),
 465   _cleanup_list("Cleanup List"),
 466   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 467   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 468             CardTableModRefBS::card_shift,
 469             false /* in_resource_area*/),
 470 
 471   _prevMarkBitMap(&_markBitMap1),
 472   _nextMarkBitMap(&_markBitMap2),
 473 
 474   _markStack(this),
 475   // _finger set in set_non_marking_state
 476 
 477   _max_worker_id(ParallelGCThreads),
 478   // _active_tasks set in set_non_marking_state
 479   // _tasks set inside the constructor
 480   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 481   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 482 
 483   _has_overflown(false),
 484   _concurrent(false),
 485   _has_aborted(false),
 486   _restart_for_overflow(false),
 487   _concurrent_marking_in_progress(false),
 488 
 489   // _verbose_level set below
 490 
 491   _init_times(),
 492   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 493   _cleanup_times(),
 494   _total_counting_time(0.0),
 495   _total_rs_scrub_time(0.0),
 496 
 497   _parallel_workers(NULL),
 498 
 499   _count_card_bitmaps(NULL),
 500   _count_marked_bytes(NULL),
 501   _completed_initialization(false) {
 502 
 503   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 504   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 505 
 506   // Create & start a ConcurrentMark thread.
 507   _cmThread = new ConcurrentMarkThread(this);
 508   assert(cmThread() != NULL, "CM Thread should have been created");
 509   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 510   if (_cmThread->osthread() == NULL) {
 511       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 512   }
 513 
 514   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 515   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 516   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 517 
 518   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 519   satb_qs.set_buffer_size(G1SATBBufferSize);
 520 
 521   _root_regions.init(_g1h, this);
 522 
 523   if (ConcGCThreads > ParallelGCThreads) {
 524     warning("Can't have more ConcGCThreads (%u) "
 525             "than ParallelGCThreads (%u).",
 526             ConcGCThreads, ParallelGCThreads);
 527     return;
 528   }
 529   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 530     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 531     // if both are set
 532     _sleep_factor             = 0.0;
 533     _marking_task_overhead    = 1.0;
 534   } else if (G1MarkingOverheadPercent > 0) {
 535     // We will calculate the number of parallel marking threads based
 536     // on a target overhead with respect to the soft real-time goal
 537     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 538     double overall_cm_overhead =
 539       (double) MaxGCPauseMillis * marking_overhead /
 540       (double) GCPauseIntervalMillis;
 541     double cpu_ratio = 1.0 / (double) os::processor_count();
 542     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 543     double marking_task_overhead =
 544       overall_cm_overhead / marking_thread_num *
 545                                               (double) os::processor_count();
 546     double sleep_factor =
 547                        (1.0 - marking_task_overhead) / marking_task_overhead;
 548 
 549     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 550     _sleep_factor             = sleep_factor;
 551     _marking_task_overhead    = marking_task_overhead;
 552   } else {
 553     // Calculate the number of parallel marking threads by scaling
 554     // the number of parallel GC threads.
 555     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 556     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 557     _sleep_factor             = 0.0;
 558     _marking_task_overhead    = 1.0;
 559   }
 560 
 561   assert(ConcGCThreads > 0, "Should have been set");
 562   _parallel_marking_threads = ConcGCThreads;
 563   _max_parallel_marking_threads = _parallel_marking_threads;
 564 
 565   if (parallel_marking_threads() > 1) {
 566     _cleanup_task_overhead = 1.0;
 567   } else {
 568     _cleanup_task_overhead = marking_task_overhead();
 569   }
 570   _cleanup_sleep_factor =
 571                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 572 
 573   _parallel_workers = new WorkGang("G1 Marker",
 574        _max_parallel_marking_threads, false, true);
 575   if (_parallel_workers == NULL) {
 576     vm_exit_during_initialization("Failed necessary allocation.");
 577   } else {
 578     _parallel_workers->initialize_workers();
 579   }
 580 
 581   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 582     size_t mark_stack_size =
 583       MIN2(MarkStackSizeMax,
 584           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 585     // Verify that the calculated value for MarkStackSize is in range.
 586     // It would be nice to use the private utility routine from Arguments.
 587     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 588       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 589               "must be between 1 and " SIZE_FORMAT,
 590               mark_stack_size, MarkStackSizeMax);
 591       return;
 592     }
 593     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 594   } else {
 595     // Verify MarkStackSize is in range.
 596     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 597       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 598         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 599           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 600                   "must be between 1 and " SIZE_FORMAT,
 601                   MarkStackSize, MarkStackSizeMax);
 602           return;
 603         }
 604       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 605         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 606           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 607                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 608                   MarkStackSize, MarkStackSizeMax);
 609           return;
 610         }
 611       }
 612     }
 613   }
 614 
 615   if (!_markStack.allocate(MarkStackSize)) {
 616     warning("Failed to allocate CM marking stack");
 617     return;
 618   }
 619 
 620   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 621   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 622 
 623   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 624   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 625 
 626   BitMap::idx_t card_bm_size = _card_bm.size();
 627 
 628   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 629   _active_tasks = _max_worker_id;
 630 
 631   uint max_regions = _g1h->max_regions();
 632   for (uint i = 0; i < _max_worker_id; ++i) {
 633     CMTaskQueue* task_queue = new CMTaskQueue();
 634     task_queue->initialize();
 635     _task_queues->register_queue(i, task_queue);
 636 
 637     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 638     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 639 
 640     _tasks[i] = new CMTask(i, this,
 641                            _count_marked_bytes[i],
 642                            &_count_card_bitmaps[i],
 643                            task_queue, _task_queues);
 644 
 645     _accum_task_vtime[i] = 0.0;
 646   }
 647 
 648   // Calculate the card number for the bottom of the heap. Used
 649   // in biasing indexes into the accounting card bitmaps.
 650   _heap_bottom_card_num =
 651     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 652                                 CardTableModRefBS::card_shift);
 653 
 654   // Clear all the liveness counting data
 655   clear_all_count_data();
 656 
 657   // so that the call below can read a sensible value
 658   _heap_start = g1h->reserved_region().start();
 659   set_non_marking_state();
 660   _completed_initialization = true;
 661 }
 662 
 663 void ConcurrentMark::reset() {
 664   // Starting values for these two. This should be called in a STW
 665   // phase.
 666   MemRegion reserved = _g1h->g1_reserved();
 667   _heap_start = reserved.start();
 668   _heap_end   = reserved.end();
 669 
 670   // Separated the asserts so that we know which one fires.
 671   assert(_heap_start != NULL, "heap bounds should look ok");
 672   assert(_heap_end != NULL, "heap bounds should look ok");
 673   assert(_heap_start < _heap_end, "heap bounds should look ok");
 674 
 675   // Reset all the marking data structures and any necessary flags
 676   reset_marking_state();
 677 
 678   // We do reset all of them, since different phases will use
 679   // different number of active threads. So, it's easiest to have all
 680   // of them ready.
 681   for (uint i = 0; i < _max_worker_id; ++i) {
 682     _tasks[i]->reset(_nextMarkBitMap);
 683   }
 684 
 685   // we need this to make sure that the flag is on during the evac
 686   // pause with initial mark piggy-backed
 687   set_concurrent_marking_in_progress();
 688 }
 689 
 690 
 691 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 692   _markStack.set_should_expand();
 693   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 694   if (clear_overflow) {
 695     clear_has_overflown();
 696   } else {
 697     assert(has_overflown(), "pre-condition");
 698   }
 699   _finger = _heap_start;
 700 
 701   for (uint i = 0; i < _max_worker_id; ++i) {
 702     CMTaskQueue* queue = _task_queues->queue(i);
 703     queue->set_empty();
 704   }
 705 }
 706 
 707 void ConcurrentMark::set_concurrency(uint active_tasks) {
 708   assert(active_tasks <= _max_worker_id, "we should not have more");
 709 
 710   _active_tasks = active_tasks;
 711   // Need to update the three data structures below according to the
 712   // number of active threads for this phase.
 713   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 714   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 715   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 716 }
 717 
 718 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 719   set_concurrency(active_tasks);
 720 
 721   _concurrent = concurrent;
 722   // We propagate this to all tasks, not just the active ones.
 723   for (uint i = 0; i < _max_worker_id; ++i)
 724     _tasks[i]->set_concurrent(concurrent);
 725 
 726   if (concurrent) {
 727     set_concurrent_marking_in_progress();
 728   } else {
 729     // We currently assume that the concurrent flag has been set to
 730     // false before we start remark. At this point we should also be
 731     // in a STW phase.
 732     assert(!concurrent_marking_in_progress(), "invariant");
 733     assert(out_of_regions(),
 734            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 735            p2i(_finger), p2i(_heap_end));
 736   }
 737 }
 738 
 739 void ConcurrentMark::set_non_marking_state() {
 740   // We set the global marking state to some default values when we're
 741   // not doing marking.
 742   reset_marking_state();
 743   _active_tasks = 0;
 744   clear_concurrent_marking_in_progress();
 745 }
 746 
 747 ConcurrentMark::~ConcurrentMark() {
 748   // The ConcurrentMark instance is never freed.
 749   ShouldNotReachHere();
 750 }
 751 
 752 void ConcurrentMark::clearNextBitmap() {
 753   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 754 
 755   // Make sure that the concurrent mark thread looks to still be in
 756   // the current cycle.
 757   guarantee(cmThread()->during_cycle(), "invariant");
 758 
 759   // We are finishing up the current cycle by clearing the next
 760   // marking bitmap and getting it ready for the next cycle. During
 761   // this time no other cycle can start. So, let's make sure that this
 762   // is the case.
 763   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 764 
 765   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 766   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 767   _parallel_workers->run_task(&task);
 768 
 769   // Clear the liveness counting data. If the marking has been aborted, the abort()
 770   // call already did that.
 771   if (cl.complete()) {
 772     clear_all_count_data();
 773   }
 774 
 775   // Repeat the asserts from above.
 776   guarantee(cmThread()->during_cycle(), "invariant");
 777   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 778 }
 779 
 780 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 781   CMBitMap* _bitmap;
 782   bool _error;
 783  public:
 784   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 785   }
 786 
 787   virtual bool doHeapRegion(HeapRegion* r) {
 788     // This closure can be called concurrently to the mutator, so we must make sure
 789     // that the result of the getNextMarkedWordAddress() call is compared to the
 790     // value passed to it as limit to detect any found bits.
 791     // end never changes in G1.
 792     HeapWord* end = r->end();
 793     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 794   }
 795 };
 796 
 797 bool ConcurrentMark::nextMarkBitmapIsClear() {
 798   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 799   _g1h->heap_region_iterate(&cl);
 800   return cl.complete();
 801 }
 802 
 803 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 804 public:
 805   bool doHeapRegion(HeapRegion* r) {
 806     r->note_start_of_marking();
 807     return false;
 808   }
 809 };
 810 
 811 void ConcurrentMark::checkpointRootsInitialPre() {
 812   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 813   G1CollectorPolicy* g1p = g1h->g1_policy();
 814 
 815   _has_aborted = false;
 816 
 817   // Initialize marking structures. This has to be done in a STW phase.
 818   reset();
 819 
 820   // For each region note start of marking.
 821   NoteStartOfMarkHRClosure startcl;
 822   g1h->heap_region_iterate(&startcl);
 823 }
 824 
 825 
 826 void ConcurrentMark::checkpointRootsInitialPost() {
 827   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 828 
 829   // If we force an overflow during remark, the remark operation will
 830   // actually abort and we'll restart concurrent marking. If we always
 831   // force an overflow during remark we'll never actually complete the
 832   // marking phase. So, we initialize this here, at the start of the
 833   // cycle, so that at the remaining overflow number will decrease at
 834   // every remark and we'll eventually not need to cause one.
 835   force_overflow_stw()->init();
 836 
 837   // Start Concurrent Marking weak-reference discovery.
 838   ReferenceProcessor* rp = g1h->ref_processor_cm();
 839   // enable ("weak") refs discovery
 840   rp->enable_discovery();
 841   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 842 
 843   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 844   // This is the start of  the marking cycle, we're expected all
 845   // threads to have SATB queues with active set to false.
 846   satb_mq_set.set_active_all_threads(true, /* new active value */
 847                                      false /* expected_active */);
 848 
 849   _root_regions.prepare_for_scan();
 850 
 851   // update_g1_committed() will be called at the end of an evac pause
 852   // when marking is on. So, it's also called at the end of the
 853   // initial-mark pause to update the heap end, if the heap expands
 854   // during it. No need to call it here.
 855 }
 856 
 857 /*
 858  * Notice that in the next two methods, we actually leave the STS
 859  * during the barrier sync and join it immediately afterwards. If we
 860  * do not do this, the following deadlock can occur: one thread could
 861  * be in the barrier sync code, waiting for the other thread to also
 862  * sync up, whereas another one could be trying to yield, while also
 863  * waiting for the other threads to sync up too.
 864  *
 865  * Note, however, that this code is also used during remark and in
 866  * this case we should not attempt to leave / enter the STS, otherwise
 867  * we'll either hit an assert (debug / fastdebug) or deadlock
 868  * (product). So we should only leave / enter the STS if we are
 869  * operating concurrently.
 870  *
 871  * Because the thread that does the sync barrier has left the STS, it
 872  * is possible to be suspended for a Full GC or an evacuation pause
 873  * could occur. This is actually safe, since the entering the sync
 874  * barrier is one of the last things do_marking_step() does, and it
 875  * doesn't manipulate any data structures afterwards.
 876  */
 877 
 878 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 879   bool barrier_aborted;
 880   {
 881     SuspendibleThreadSetLeaver sts_leave(concurrent());
 882     barrier_aborted = !_first_overflow_barrier_sync.enter();
 883   }
 884 
 885   // at this point everyone should have synced up and not be doing any
 886   // more work
 887 
 888   if (barrier_aborted) {
 889     // If the barrier aborted we ignore the overflow condition and
 890     // just abort the whole marking phase as quickly as possible.
 891     return;
 892   }
 893 
 894   // If we're executing the concurrent phase of marking, reset the marking
 895   // state; otherwise the marking state is reset after reference processing,
 896   // during the remark pause.
 897   // If we reset here as a result of an overflow during the remark we will
 898   // see assertion failures from any subsequent set_concurrency_and_phase()
 899   // calls.
 900   if (concurrent()) {
 901     // let the task associated with with worker 0 do this
 902     if (worker_id == 0) {
 903       // task 0 is responsible for clearing the global data structures
 904       // We should be here because of an overflow. During STW we should
 905       // not clear the overflow flag since we rely on it being true when
 906       // we exit this method to abort the pause and restart concurrent
 907       // marking.
 908       reset_marking_state(true /* clear_overflow */);
 909       force_overflow()->update();
 910 
 911       log_info(gc)("Concurrent Mark reset for overflow");
 912     }
 913   }
 914 
 915   // after this, each task should reset its own data structures then
 916   // then go into the second barrier
 917 }
 918 
 919 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 920   SuspendibleThreadSetLeaver sts_leave(concurrent());
 921   _second_overflow_barrier_sync.enter();
 922 
 923   // at this point everything should be re-initialized and ready to go
 924 }
 925 
 926 #ifndef PRODUCT
 927 void ForceOverflowSettings::init() {
 928   _num_remaining = G1ConcMarkForceOverflow;
 929   _force = false;
 930   update();
 931 }
 932 
 933 void ForceOverflowSettings::update() {
 934   if (_num_remaining > 0) {
 935     _num_remaining -= 1;
 936     _force = true;
 937   } else {
 938     _force = false;
 939   }
 940 }
 941 
 942 bool ForceOverflowSettings::should_force() {
 943   if (_force) {
 944     _force = false;
 945     return true;
 946   } else {
 947     return false;
 948   }
 949 }
 950 #endif // !PRODUCT
 951 
 952 class CMConcurrentMarkingTask: public AbstractGangTask {
 953 private:
 954   ConcurrentMark*       _cm;
 955   ConcurrentMarkThread* _cmt;
 956 
 957 public:
 958   void work(uint worker_id) {
 959     assert(Thread::current()->is_ConcurrentGC_thread(),
 960            "this should only be done by a conc GC thread");
 961     ResourceMark rm;
 962 
 963     double start_vtime = os::elapsedVTime();
 964 
 965     {
 966       SuspendibleThreadSetJoiner sts_join;
 967 
 968       assert(worker_id < _cm->active_tasks(), "invariant");
 969       CMTask* the_task = _cm->task(worker_id);
 970       the_task->record_start_time();
 971       if (!_cm->has_aborted()) {
 972         do {
 973           double start_vtime_sec = os::elapsedVTime();
 974           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 975 
 976           the_task->do_marking_step(mark_step_duration_ms,
 977                                     true  /* do_termination */,
 978                                     false /* is_serial*/);
 979 
 980           double end_vtime_sec = os::elapsedVTime();
 981           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 982           _cm->clear_has_overflown();
 983 
 984           _cm->do_yield_check(worker_id);
 985 
 986           jlong sleep_time_ms;
 987           if (!_cm->has_aborted() && the_task->has_aborted()) {
 988             sleep_time_ms =
 989               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 990             {
 991               SuspendibleThreadSetLeaver sts_leave;
 992               os::sleep(Thread::current(), sleep_time_ms, false);
 993             }
 994           }
 995         } while (!_cm->has_aborted() && the_task->has_aborted());
 996       }
 997       the_task->record_end_time();
 998       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 999     }
1000 
1001     double end_vtime = os::elapsedVTime();
1002     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1003   }
1004 
1005   CMConcurrentMarkingTask(ConcurrentMark* cm,
1006                           ConcurrentMarkThread* cmt) :
1007       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1008 
1009   ~CMConcurrentMarkingTask() { }
1010 };
1011 
1012 // Calculates the number of active workers for a concurrent
1013 // phase.
1014 uint ConcurrentMark::calc_parallel_marking_threads() {
1015   uint n_conc_workers = 0;
1016   if (!UseDynamicNumberOfGCThreads ||
1017       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1018        !ForceDynamicNumberOfGCThreads)) {
1019     n_conc_workers = max_parallel_marking_threads();
1020   } else {
1021     n_conc_workers =
1022       AdaptiveSizePolicy::calc_default_active_workers(
1023                                    max_parallel_marking_threads(),
1024                                    1, /* Minimum workers */
1025                                    parallel_marking_threads(),
1026                                    Threads::number_of_non_daemon_threads());
1027     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1028     // that scaling has already gone into "_max_parallel_marking_threads".
1029   }
1030   assert(n_conc_workers > 0, "Always need at least 1");
1031   return n_conc_workers;
1032 }
1033 
1034 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1035   // Currently, only survivors can be root regions.
1036   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1037   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1038 
1039   const uintx interval = PrefetchScanIntervalInBytes;
1040   HeapWord* curr = hr->bottom();
1041   const HeapWord* end = hr->top();
1042   while (curr < end) {
1043     Prefetch::read(curr, interval);
1044     oop obj = oop(curr);
1045     int size = obj->oop_iterate_size(&cl);
1046     assert(size == obj->size(), "sanity");
1047     curr += size;
1048   }
1049 }
1050 
1051 class CMRootRegionScanTask : public AbstractGangTask {
1052 private:
1053   ConcurrentMark* _cm;
1054 
1055 public:
1056   CMRootRegionScanTask(ConcurrentMark* cm) :
1057     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1058 
1059   void work(uint worker_id) {
1060     assert(Thread::current()->is_ConcurrentGC_thread(),
1061            "this should only be done by a conc GC thread");
1062 
1063     CMRootRegions* root_regions = _cm->root_regions();
1064     HeapRegion* hr = root_regions->claim_next();
1065     while (hr != NULL) {
1066       _cm->scanRootRegion(hr, worker_id);
1067       hr = root_regions->claim_next();
1068     }
1069   }
1070 };
1071 
1072 void ConcurrentMark::scanRootRegions() {
1073   jlong scan_start = os::elapsed_counter();
1074 
1075   // Start of concurrent marking.
1076   ClassLoaderDataGraph::clear_claimed_marks();
1077 
1078   // scan_in_progress() will have been set to true only if there was
1079   // at least one root region to scan. So, if it's false, we
1080   // should not attempt to do any further work.
1081   if (root_regions()->scan_in_progress()) {
1082     log_info(gc)("Concurrent Root Region Scan (%.3fs)", TimeHelper::counter_to_seconds(scan_start));
1083 
1084     _parallel_marking_threads = calc_parallel_marking_threads();
1085     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1086            "Maximum number of marking threads exceeded");
1087     uint active_workers = MAX2(1U, parallel_marking_threads());
1088 
1089     CMRootRegionScanTask task(this);
1090     _parallel_workers->set_active_workers(active_workers);
1091     _parallel_workers->run_task(&task);
1092 
1093     double end_time_ms = os::elapsed_counter();
1094     log_info(gc)("Concurrent Root Region Scan (%.3fs, %.3fs) %.3fms",
1095                  TimeHelper::counter_to_seconds(scan_start),
1096                  TimeHelper::counter_to_seconds(end_time_ms),
1097                  TimeHelper::counter_to_millis(end_time_ms - scan_start));
1098 
1099     // It's possible that has_aborted() is true here without actually
1100     // aborting the survivor scan earlier. This is OK as it's
1101     // mainly used for sanity checking.
1102     root_regions()->scan_finished();
1103   }
1104 }
1105 
1106 void ConcurrentMark::markFromRoots() {
1107   // we might be tempted to assert that:
1108   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1109   //        "inconsistent argument?");
1110   // However that wouldn't be right, because it's possible that
1111   // a safepoint is indeed in progress as a younger generation
1112   // stop-the-world GC happens even as we mark in this generation.
1113 
1114   _restart_for_overflow = false;
1115   force_overflow_conc()->init();
1116 
1117   // _g1h has _n_par_threads
1118   _parallel_marking_threads = calc_parallel_marking_threads();
1119   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1120     "Maximum number of marking threads exceeded");
1121 
1122   uint active_workers = MAX2(1U, parallel_marking_threads());
1123   assert(active_workers > 0, "Should have been set");
1124 
1125   // Parallel task terminator is set in "set_concurrency_and_phase()"
1126   set_concurrency_and_phase(active_workers, true /* concurrent */);
1127 
1128   CMConcurrentMarkingTask markingTask(this, cmThread());
1129   _parallel_workers->set_active_workers(active_workers);
1130   _parallel_workers->run_task(&markingTask);
1131   print_stats();
1132 }
1133 
1134 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1135   // world is stopped at this checkpoint
1136   assert(SafepointSynchronize::is_at_safepoint(),
1137          "world should be stopped");
1138 
1139   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1140 
1141   // If a full collection has happened, we shouldn't do this.
1142   if (has_aborted()) {
1143     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1144     return;
1145   }
1146 
1147   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1148 
1149   if (VerifyDuringGC) {
1150     HandleMark hm;  // handle scope
1151     g1h->prepare_for_verify();
1152     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1153   }
1154   g1h->check_bitmaps("Remark Start");
1155 
1156   G1CollectorPolicy* g1p = g1h->g1_policy();
1157   g1p->record_concurrent_mark_remark_start();
1158 
1159   double start = os::elapsedTime();
1160 
1161   checkpointRootsFinalWork();
1162 
1163   double mark_work_end = os::elapsedTime();
1164 
1165   weakRefsWork(clear_all_soft_refs);
1166 
1167   if (has_overflown()) {
1168     // Oops.  We overflowed.  Restart concurrent marking.
1169     _restart_for_overflow = true;
1170     log_develop(gc)("Remark led to restart for overflow.");
1171 
1172     // Verify the heap w.r.t. the previous marking bitmap.
1173     if (VerifyDuringGC) {
1174       HandleMark hm;  // handle scope
1175       g1h->prepare_for_verify();
1176       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1177     }
1178 
1179     // Clear the marking state because we will be restarting
1180     // marking due to overflowing the global mark stack.
1181     reset_marking_state();
1182   } else {
1183     {
1184       GCTraceTime(Debug, gc) trace("GC aggregate-data", g1h->gc_timer_cm());
1185 
1186       // Aggregate the per-task counting data that we have accumulated
1187       // while marking.
1188       aggregate_count_data();
1189     }
1190 
1191     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1192     // We're done with marking.
1193     // This is the end of  the marking cycle, we're expected all
1194     // threads to have SATB queues with active set to true.
1195     satb_mq_set.set_active_all_threads(false, /* new active value */
1196                                        true /* expected_active */);
1197 
1198     if (VerifyDuringGC) {
1199       HandleMark hm;  // handle scope
1200       g1h->prepare_for_verify();
1201       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1202     }
1203     g1h->check_bitmaps("Remark End");
1204     assert(!restart_for_overflow(), "sanity");
1205     // Completely reset the marking state since marking completed
1206     set_non_marking_state();
1207   }
1208 
1209   // Expand the marking stack, if we have to and if we can.
1210   if (_markStack.should_expand()) {
1211     _markStack.expand();
1212   }
1213 
1214   // Statistics
1215   double now = os::elapsedTime();
1216   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1217   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1218   _remark_times.add((now - start) * 1000.0);
1219 
1220   g1p->record_concurrent_mark_remark_end();
1221 
1222   G1CMIsAliveClosure is_alive(g1h);
1223   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1224 }
1225 
1226 // Base class of the closures that finalize and verify the
1227 // liveness counting data.
1228 class CMCountDataClosureBase: public HeapRegionClosure {
1229 protected:
1230   G1CollectedHeap* _g1h;
1231   ConcurrentMark* _cm;
1232   CardTableModRefBS* _ct_bs;
1233 
1234   BitMap* _region_bm;
1235   BitMap* _card_bm;
1236 
1237   // Takes a region that's not empty (i.e., it has at least one
1238   // live object in it and sets its corresponding bit on the region
1239   // bitmap to 1.
1240   void set_bit_for_region(HeapRegion* hr) {
1241     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1242     _region_bm->par_at_put(index, true);
1243   }
1244 
1245 public:
1246   CMCountDataClosureBase(G1CollectedHeap* g1h,
1247                          BitMap* region_bm, BitMap* card_bm):
1248     _g1h(g1h), _cm(g1h->concurrent_mark()),
1249     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1250     _region_bm(region_bm), _card_bm(card_bm) { }
1251 };
1252 
1253 // Closure that calculates the # live objects per region. Used
1254 // for verification purposes during the cleanup pause.
1255 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1256   CMBitMapRO* _bm;
1257   size_t _region_marked_bytes;
1258 
1259 public:
1260   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1261                          BitMap* region_bm, BitMap* card_bm) :
1262     CMCountDataClosureBase(g1h, region_bm, card_bm),
1263     _bm(bm), _region_marked_bytes(0) { }
1264 
1265   bool doHeapRegion(HeapRegion* hr) {
1266     HeapWord* ntams = hr->next_top_at_mark_start();
1267     HeapWord* start = hr->bottom();
1268 
1269     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1270            "Preconditions not met - "
1271            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1272            p2i(start), p2i(ntams), p2i(hr->end()));
1273 
1274     // Find the first marked object at or after "start".
1275     start = _bm->getNextMarkedWordAddress(start, ntams);
1276 
1277     size_t marked_bytes = 0;
1278 
1279     while (start < ntams) {
1280       oop obj = oop(start);
1281       int obj_sz = obj->size();
1282       HeapWord* obj_end = start + obj_sz;
1283 
1284       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1285       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1286 
1287       // Note: if we're looking at the last region in heap - obj_end
1288       // could be actually just beyond the end of the heap; end_idx
1289       // will then correspond to a (non-existent) card that is also
1290       // just beyond the heap.
1291       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1292         // end of object is not card aligned - increment to cover
1293         // all the cards spanned by the object
1294         end_idx += 1;
1295       }
1296 
1297       // Set the bits in the card BM for the cards spanned by this object.
1298       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1299 
1300       // Add the size of this object to the number of marked bytes.
1301       marked_bytes += (size_t)obj_sz * HeapWordSize;
1302 
1303       // This will happen if we are handling a humongous object that spans
1304       // several heap regions.
1305       if (obj_end > hr->end()) {
1306         break;
1307       }
1308       // Find the next marked object after this one.
1309       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1310     }
1311 
1312     // Mark the allocated-since-marking portion...
1313     HeapWord* top = hr->top();
1314     if (ntams < top) {
1315       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1316       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1317 
1318       // Note: if we're looking at the last region in heap - top
1319       // could be actually just beyond the end of the heap; end_idx
1320       // will then correspond to a (non-existent) card that is also
1321       // just beyond the heap.
1322       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1323         // end of object is not card aligned - increment to cover
1324         // all the cards spanned by the object
1325         end_idx += 1;
1326       }
1327       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1328 
1329       // This definitely means the region has live objects.
1330       set_bit_for_region(hr);
1331     }
1332 
1333     // Update the live region bitmap.
1334     if (marked_bytes > 0) {
1335       set_bit_for_region(hr);
1336     }
1337 
1338     // Set the marked bytes for the current region so that
1339     // it can be queried by a calling verification routine
1340     _region_marked_bytes = marked_bytes;
1341 
1342     return false;
1343   }
1344 
1345   size_t region_marked_bytes() const { return _region_marked_bytes; }
1346 };
1347 
1348 // Heap region closure used for verifying the counting data
1349 // that was accumulated concurrently and aggregated during
1350 // the remark pause. This closure is applied to the heap
1351 // regions during the STW cleanup pause.
1352 
1353 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1354   G1CollectedHeap* _g1h;
1355   ConcurrentMark* _cm;
1356   CalcLiveObjectsClosure _calc_cl;
1357   BitMap* _region_bm;   // Region BM to be verified
1358   BitMap* _card_bm;     // Card BM to be verified
1359 
1360   BitMap* _exp_region_bm; // Expected Region BM values
1361   BitMap* _exp_card_bm;   // Expected card BM values
1362 
1363   int _failures;
1364 
1365 public:
1366   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1367                                 BitMap* region_bm,
1368                                 BitMap* card_bm,
1369                                 BitMap* exp_region_bm,
1370                                 BitMap* exp_card_bm) :
1371     _g1h(g1h), _cm(g1h->concurrent_mark()),
1372     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1373     _region_bm(region_bm), _card_bm(card_bm),
1374     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1375     _failures(0) { }
1376 
1377   int failures() const { return _failures; }
1378 
1379   bool doHeapRegion(HeapRegion* hr) {
1380     int failures = 0;
1381 
1382     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1383     // this region and set the corresponding bits in the expected region
1384     // and card bitmaps.
1385     bool res = _calc_cl.doHeapRegion(hr);
1386     assert(res == false, "should be continuing");
1387 
1388     // Verify the marked bytes for this region.
1389     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1390     size_t act_marked_bytes = hr->next_marked_bytes();
1391 
1392     if (exp_marked_bytes > act_marked_bytes) {
1393       if (hr->is_starts_humongous()) {
1394         // For start_humongous regions, the size of the whole object will be
1395         // in exp_marked_bytes.
1396         HeapRegion* region = hr;
1397         int num_regions;
1398         for (num_regions = 0; region != NULL; num_regions++) {
1399           region = _g1h->next_region_in_humongous(region);
1400         }
1401         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1402           failures += 1;
1403         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1404           failures += 1;
1405         }
1406       } else {
1407         // We're not OK if expected marked bytes > actual marked bytes. It means
1408         // we have missed accounting some objects during the actual marking.
1409         failures += 1;
1410       }
1411     }
1412 
1413     // Verify the bit, for this region, in the actual and expected
1414     // (which was just calculated) region bit maps.
1415     // We're not OK if the bit in the calculated expected region
1416     // bitmap is set and the bit in the actual region bitmap is not.
1417     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1418 
1419     bool expected = _exp_region_bm->at(index);
1420     bool actual = _region_bm->at(index);
1421     if (expected && !actual) {
1422       failures += 1;
1423     }
1424 
1425     // Verify that the card bit maps for the cards spanned by the current
1426     // region match. We have an error if we have a set bit in the expected
1427     // bit map and the corresponding bit in the actual bitmap is not set.
1428 
1429     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1430     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1431 
1432     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1433       expected = _exp_card_bm->at(i);
1434       actual = _card_bm->at(i);
1435 
1436       if (expected && !actual) {
1437         failures += 1;
1438       }
1439     }
1440 
1441     _failures += failures;
1442 
1443     // We could stop iteration over the heap when we
1444     // find the first violating region by returning true.
1445     return false;
1446   }
1447 };
1448 
1449 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1450 protected:
1451   G1CollectedHeap* _g1h;
1452   ConcurrentMark* _cm;
1453   BitMap* _actual_region_bm;
1454   BitMap* _actual_card_bm;
1455 
1456   uint    _n_workers;
1457 
1458   BitMap* _expected_region_bm;
1459   BitMap* _expected_card_bm;
1460 
1461   int  _failures;
1462 
1463   HeapRegionClaimer _hrclaimer;
1464 
1465 public:
1466   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1467                             BitMap* region_bm, BitMap* card_bm,
1468                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1469     : AbstractGangTask("G1 verify final counting"),
1470       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1471       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1472       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1473       _failures(0),
1474       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1475     assert(VerifyDuringGC, "don't call this otherwise");
1476     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1477     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1478   }
1479 
1480   void work(uint worker_id) {
1481     assert(worker_id < _n_workers, "invariant");
1482 
1483     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1484                                             _actual_region_bm, _actual_card_bm,
1485                                             _expected_region_bm,
1486                                             _expected_card_bm);
1487 
1488     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1489 
1490     Atomic::add(verify_cl.failures(), &_failures);
1491   }
1492 
1493   int failures() const { return _failures; }
1494 };
1495 
1496 // Closure that finalizes the liveness counting data.
1497 // Used during the cleanup pause.
1498 // Sets the bits corresponding to the interval [NTAMS, top]
1499 // (which contains the implicitly live objects) in the
1500 // card liveness bitmap. Also sets the bit for each region,
1501 // containing live data, in the region liveness bitmap.
1502 
1503 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1504  public:
1505   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1506                               BitMap* region_bm,
1507                               BitMap* card_bm) :
1508     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1509 
1510   bool doHeapRegion(HeapRegion* hr) {
1511     HeapWord* ntams = hr->next_top_at_mark_start();
1512     HeapWord* top   = hr->top();
1513 
1514     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1515 
1516     // Mark the allocated-since-marking portion...
1517     if (ntams < top) {
1518       // This definitely means the region has live objects.
1519       set_bit_for_region(hr);
1520 
1521       // Now set the bits in the card bitmap for [ntams, top)
1522       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1523       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1524 
1525       // Note: if we're looking at the last region in heap - top
1526       // could be actually just beyond the end of the heap; end_idx
1527       // will then correspond to a (non-existent) card that is also
1528       // just beyond the heap.
1529       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1530         // end of object is not card aligned - increment to cover
1531         // all the cards spanned by the object
1532         end_idx += 1;
1533       }
1534 
1535       assert(end_idx <= _card_bm->size(),
1536              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1537              end_idx, _card_bm->size());
1538       assert(start_idx < _card_bm->size(),
1539              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1540              start_idx, _card_bm->size());
1541 
1542       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1543     }
1544 
1545     // Set the bit for the region if it contains live data
1546     if (hr->next_marked_bytes() > 0) {
1547       set_bit_for_region(hr);
1548     }
1549 
1550     return false;
1551   }
1552 };
1553 
1554 class G1ParFinalCountTask: public AbstractGangTask {
1555 protected:
1556   G1CollectedHeap* _g1h;
1557   ConcurrentMark* _cm;
1558   BitMap* _actual_region_bm;
1559   BitMap* _actual_card_bm;
1560 
1561   uint    _n_workers;
1562   HeapRegionClaimer _hrclaimer;
1563 
1564 public:
1565   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1566     : AbstractGangTask("G1 final counting"),
1567       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1568       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1569       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1570   }
1571 
1572   void work(uint worker_id) {
1573     assert(worker_id < _n_workers, "invariant");
1574 
1575     FinalCountDataUpdateClosure final_update_cl(_g1h,
1576                                                 _actual_region_bm,
1577                                                 _actual_card_bm);
1578 
1579     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1580   }
1581 };
1582 
1583 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1584   G1CollectedHeap* _g1;
1585   size_t _freed_bytes;
1586   FreeRegionList* _local_cleanup_list;
1587   HeapRegionSetCount _old_regions_removed;
1588   HeapRegionSetCount _humongous_regions_removed;
1589   HRRSCleanupTask* _hrrs_cleanup_task;
1590 
1591 public:
1592   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1593                              FreeRegionList* local_cleanup_list,
1594                              HRRSCleanupTask* hrrs_cleanup_task) :
1595     _g1(g1),
1596     _freed_bytes(0),
1597     _local_cleanup_list(local_cleanup_list),
1598     _old_regions_removed(),
1599     _humongous_regions_removed(),
1600     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1601 
1602   size_t freed_bytes() { return _freed_bytes; }
1603   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1604   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1605 
1606   bool doHeapRegion(HeapRegion *hr) {
1607     if (hr->is_archive()) {
1608       return false;
1609     }
1610     // We use a claim value of zero here because all regions
1611     // were claimed with value 1 in the FinalCount task.
1612     _g1->reset_gc_time_stamps(hr);
1613     hr->note_end_of_marking();
1614 
1615     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1616       _freed_bytes += hr->used();
1617       hr->set_containing_set(NULL);
1618       if (hr->is_humongous()) {
1619         _humongous_regions_removed.increment(1u, hr->capacity());
1620         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1621       } else {
1622         _old_regions_removed.increment(1u, hr->capacity());
1623         _g1->free_region(hr, _local_cleanup_list, true);
1624       }
1625     } else {
1626       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1627     }
1628 
1629     return false;
1630   }
1631 };
1632 
1633 class G1ParNoteEndTask: public AbstractGangTask {
1634   friend class G1NoteEndOfConcMarkClosure;
1635 
1636 protected:
1637   G1CollectedHeap* _g1h;
1638   FreeRegionList* _cleanup_list;
1639   HeapRegionClaimer _hrclaimer;
1640 
1641 public:
1642   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1643       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1644   }
1645 
1646   void work(uint worker_id) {
1647     FreeRegionList local_cleanup_list("Local Cleanup List");
1648     HRRSCleanupTask hrrs_cleanup_task;
1649     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1650                                            &hrrs_cleanup_task);
1651     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1652     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1653 
1654     // Now update the lists
1655     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1656     {
1657       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1658       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1659 
1660       // If we iterate over the global cleanup list at the end of
1661       // cleanup to do this printing we will not guarantee to only
1662       // generate output for the newly-reclaimed regions (the list
1663       // might not be empty at the beginning of cleanup; we might
1664       // still be working on its previous contents). So we do the
1665       // printing here, before we append the new regions to the global
1666       // cleanup list.
1667 
1668       G1HRPrinter* hr_printer = _g1h->hr_printer();
1669       if (hr_printer->is_active()) {
1670         FreeRegionListIterator iter(&local_cleanup_list);
1671         while (iter.more_available()) {
1672           HeapRegion* hr = iter.get_next();
1673           hr_printer->cleanup(hr);
1674         }
1675       }
1676 
1677       _cleanup_list->add_ordered(&local_cleanup_list);
1678       assert(local_cleanup_list.is_empty(), "post-condition");
1679 
1680       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1681     }
1682   }
1683 };
1684 
1685 class G1ParScrubRemSetTask: public AbstractGangTask {
1686 protected:
1687   G1RemSet* _g1rs;
1688   BitMap* _region_bm;
1689   BitMap* _card_bm;
1690   HeapRegionClaimer _hrclaimer;
1691 
1692 public:
1693   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1694       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1695   }
1696 
1697   void work(uint worker_id) {
1698     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1699   }
1700 
1701 };
1702 
1703 void ConcurrentMark::cleanup() {
1704   // world is stopped at this checkpoint
1705   assert(SafepointSynchronize::is_at_safepoint(),
1706          "world should be stopped");
1707   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1708 
1709   // If a full collection has happened, we shouldn't do this.
1710   if (has_aborted()) {
1711     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1712     return;
1713   }
1714 
1715   g1h->verify_region_sets_optional();
1716 
1717   if (VerifyDuringGC) {
1718     HandleMark hm;  // handle scope
1719     g1h->prepare_for_verify();
1720     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1721   }
1722   g1h->check_bitmaps("Cleanup Start");
1723 
1724   G1CollectorPolicy* g1p = g1h->g1_policy();
1725   g1p->record_concurrent_mark_cleanup_start();
1726 
1727   double start = os::elapsedTime();
1728 
1729   HeapRegionRemSet::reset_for_cleanup_tasks();
1730 
1731   // Do counting once more with the world stopped for good measure.
1732   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1733 
1734   g1h->workers()->run_task(&g1_par_count_task);
1735 
1736   if (VerifyDuringGC) {
1737     // Verify that the counting data accumulated during marking matches
1738     // that calculated by walking the marking bitmap.
1739 
1740     // Bitmaps to hold expected values
1741     BitMap expected_region_bm(_region_bm.size(), true);
1742     BitMap expected_card_bm(_card_bm.size(), true);
1743 
1744     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1745                                                  &_region_bm,
1746                                                  &_card_bm,
1747                                                  &expected_region_bm,
1748                                                  &expected_card_bm);
1749 
1750     g1h->workers()->run_task(&g1_par_verify_task);
1751 
1752     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1753   }
1754 
1755   size_t start_used_bytes = g1h->used();
1756   g1h->collector_state()->set_mark_in_progress(false);
1757 
1758   double count_end = os::elapsedTime();
1759   double this_final_counting_time = (count_end - start);
1760   _total_counting_time += this_final_counting_time;
1761 
1762   if (Log<LOG_TAGS(gc, liveness)>::is_trace()) {
1763     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1764     _g1h->heap_region_iterate(&cl);
1765   }
1766 
1767   // Install newly created mark bitMap as "prev".
1768   swapMarkBitMaps();
1769 
1770   g1h->reset_gc_time_stamp();
1771 
1772   uint n_workers = _g1h->workers()->active_workers();
1773 
1774   // Note end of marking in all heap regions.
1775   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1776   g1h->workers()->run_task(&g1_par_note_end_task);
1777   g1h->check_gc_time_stamps();
1778 
1779   if (!cleanup_list_is_empty()) {
1780     // The cleanup list is not empty, so we'll have to process it
1781     // concurrently. Notify anyone else that might be wanting free
1782     // regions that there will be more free regions coming soon.
1783     g1h->set_free_regions_coming();
1784   }
1785 
1786   // call below, since it affects the metric by which we sort the heap
1787   // regions.
1788   if (G1ScrubRemSets) {
1789     double rs_scrub_start = os::elapsedTime();
1790     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1791     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1792 
1793     double rs_scrub_end = os::elapsedTime();
1794     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1795     _total_rs_scrub_time += this_rs_scrub_time;
1796   }
1797 
1798   // this will also free any regions totally full of garbage objects,
1799   // and sort the regions.
1800   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1801 
1802   // Statistics.
1803   double end = os::elapsedTime();
1804   _cleanup_times.add((end - start) * 1000.0);
1805 
1806   // Clean up will have freed any regions completely full of garbage.
1807   // Update the soft reference policy with the new heap occupancy.
1808   Universe::update_heap_info_at_gc();
1809 
1810   if (VerifyDuringGC) {
1811     HandleMark hm;  // handle scope
1812     g1h->prepare_for_verify();
1813     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1814   }
1815 
1816   g1h->check_bitmaps("Cleanup End");
1817 
1818   g1h->verify_region_sets_optional();
1819 
1820   // We need to make this be a "collection" so any collection pause that
1821   // races with it goes around and waits for completeCleanup to finish.
1822   g1h->increment_total_collections();
1823 
1824   // Clean out dead classes and update Metaspace sizes.
1825   if (ClassUnloadingWithConcurrentMark) {
1826     ClassLoaderDataGraph::purge();
1827   }
1828   MetaspaceGC::compute_new_size();
1829 
1830   // We reclaimed old regions so we should calculate the sizes to make
1831   // sure we update the old gen/space data.
1832   g1h->g1mm()->update_sizes();
1833   g1h->allocation_context_stats().update_after_mark();
1834 
1835   g1h->trace_heap_after_concurrent_cycle();
1836 }
1837 
1838 void ConcurrentMark::completeCleanup() {
1839   if (has_aborted()) return;
1840 
1841   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1842 
1843   _cleanup_list.verify_optional();
1844   FreeRegionList tmp_free_list("Tmp Free List");
1845 
1846   log_develop(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1847                             "cleanup list has %u entries",
1848                             _cleanup_list.length());
1849 
1850   // No one else should be accessing the _cleanup_list at this point,
1851   // so it is not necessary to take any locks
1852   while (!_cleanup_list.is_empty()) {
1853     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1854     assert(hr != NULL, "Got NULL from a non-empty list");
1855     hr->par_clear();
1856     tmp_free_list.add_ordered(hr);
1857 
1858     // Instead of adding one region at a time to the secondary_free_list,
1859     // we accumulate them in the local list and move them a few at a
1860     // time. This also cuts down on the number of notify_all() calls
1861     // we do during this process. We'll also append the local list when
1862     // _cleanup_list is empty (which means we just removed the last
1863     // region from the _cleanup_list).
1864     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1865         _cleanup_list.is_empty()) {
1866       log_develop(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1867                                 "appending %u entries to the secondary_free_list, "
1868                                 "cleanup list still has %u entries",
1869                                 tmp_free_list.length(),
1870                                 _cleanup_list.length());
1871 
1872       {
1873         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1874         g1h->secondary_free_list_add(&tmp_free_list);
1875         SecondaryFreeList_lock->notify_all();
1876       }
1877 #ifndef PRODUCT
1878       if (G1StressConcRegionFreeing) {
1879         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1880           os::sleep(Thread::current(), (jlong) 1, false);
1881         }
1882       }
1883 #endif
1884     }
1885   }
1886   assert(tmp_free_list.is_empty(), "post-condition");
1887 }
1888 
1889 // Supporting Object and Oop closures for reference discovery
1890 // and processing in during marking
1891 
1892 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1893   HeapWord* addr = (HeapWord*)obj;
1894   return addr != NULL &&
1895          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1896 }
1897 
1898 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1899 // Uses the CMTask associated with a worker thread (for serial reference
1900 // processing the CMTask for worker 0 is used) to preserve (mark) and
1901 // trace referent objects.
1902 //
1903 // Using the CMTask and embedded local queues avoids having the worker
1904 // threads operating on the global mark stack. This reduces the risk
1905 // of overflowing the stack - which we would rather avoid at this late
1906 // state. Also using the tasks' local queues removes the potential
1907 // of the workers interfering with each other that could occur if
1908 // operating on the global stack.
1909 
1910 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1911   ConcurrentMark* _cm;
1912   CMTask*         _task;
1913   int             _ref_counter_limit;
1914   int             _ref_counter;
1915   bool            _is_serial;
1916  public:
1917   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1918     _cm(cm), _task(task), _is_serial(is_serial),
1919     _ref_counter_limit(G1RefProcDrainInterval) {
1920     assert(_ref_counter_limit > 0, "sanity");
1921     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1922     _ref_counter = _ref_counter_limit;
1923   }
1924 
1925   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1926   virtual void do_oop(      oop* p) { do_oop_work(p); }
1927 
1928   template <class T> void do_oop_work(T* p) {
1929     if (!_cm->has_overflown()) {
1930       oop obj = oopDesc::load_decode_heap_oop(p);
1931       _task->deal_with_reference(obj);
1932       _ref_counter--;
1933 
1934       if (_ref_counter == 0) {
1935         // We have dealt with _ref_counter_limit references, pushing them
1936         // and objects reachable from them on to the local stack (and
1937         // possibly the global stack). Call CMTask::do_marking_step() to
1938         // process these entries.
1939         //
1940         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1941         // there's nothing more to do (i.e. we're done with the entries that
1942         // were pushed as a result of the CMTask::deal_with_reference() calls
1943         // above) or we overflow.
1944         //
1945         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1946         // flag while there may still be some work to do. (See the comment at
1947         // the beginning of CMTask::do_marking_step() for those conditions -
1948         // one of which is reaching the specified time target.) It is only
1949         // when CMTask::do_marking_step() returns without setting the
1950         // has_aborted() flag that the marking step has completed.
1951         do {
1952           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1953           _task->do_marking_step(mark_step_duration_ms,
1954                                  false      /* do_termination */,
1955                                  _is_serial);
1956         } while (_task->has_aborted() && !_cm->has_overflown());
1957         _ref_counter = _ref_counter_limit;
1958       }
1959     }
1960   }
1961 };
1962 
1963 // 'Drain' oop closure used by both serial and parallel reference processing.
1964 // Uses the CMTask associated with a given worker thread (for serial
1965 // reference processing the CMtask for worker 0 is used). Calls the
1966 // do_marking_step routine, with an unbelievably large timeout value,
1967 // to drain the marking data structures of the remaining entries
1968 // added by the 'keep alive' oop closure above.
1969 
1970 class G1CMDrainMarkingStackClosure: public VoidClosure {
1971   ConcurrentMark* _cm;
1972   CMTask*         _task;
1973   bool            _is_serial;
1974  public:
1975   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1976     _cm(cm), _task(task), _is_serial(is_serial) {
1977     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1978   }
1979 
1980   void do_void() {
1981     do {
1982       // We call CMTask::do_marking_step() to completely drain the local
1983       // and global marking stacks of entries pushed by the 'keep alive'
1984       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1985       //
1986       // CMTask::do_marking_step() is called in a loop, which we'll exit
1987       // if there's nothing more to do (i.e. we've completely drained the
1988       // entries that were pushed as a a result of applying the 'keep alive'
1989       // closure to the entries on the discovered ref lists) or we overflow
1990       // the global marking stack.
1991       //
1992       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1993       // flag while there may still be some work to do. (See the comment at
1994       // the beginning of CMTask::do_marking_step() for those conditions -
1995       // one of which is reaching the specified time target.) It is only
1996       // when CMTask::do_marking_step() returns without setting the
1997       // has_aborted() flag that the marking step has completed.
1998 
1999       _task->do_marking_step(1000000000.0 /* something very large */,
2000                              true         /* do_termination */,
2001                              _is_serial);
2002     } while (_task->has_aborted() && !_cm->has_overflown());
2003   }
2004 };
2005 
2006 // Implementation of AbstractRefProcTaskExecutor for parallel
2007 // reference processing at the end of G1 concurrent marking
2008 
2009 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2010 private:
2011   G1CollectedHeap* _g1h;
2012   ConcurrentMark*  _cm;
2013   WorkGang*        _workers;
2014   uint             _active_workers;
2015 
2016 public:
2017   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2018                           ConcurrentMark* cm,
2019                           WorkGang* workers,
2020                           uint n_workers) :
2021     _g1h(g1h), _cm(cm),
2022     _workers(workers), _active_workers(n_workers) { }
2023 
2024   // Executes the given task using concurrent marking worker threads.
2025   virtual void execute(ProcessTask& task);
2026   virtual void execute(EnqueueTask& task);
2027 };
2028 
2029 class G1CMRefProcTaskProxy: public AbstractGangTask {
2030   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2031   ProcessTask&     _proc_task;
2032   G1CollectedHeap* _g1h;
2033   ConcurrentMark*  _cm;
2034 
2035 public:
2036   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2037                      G1CollectedHeap* g1h,
2038                      ConcurrentMark* cm) :
2039     AbstractGangTask("Process reference objects in parallel"),
2040     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2041     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2042     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2043   }
2044 
2045   virtual void work(uint worker_id) {
2046     ResourceMark rm;
2047     HandleMark hm;
2048     CMTask* task = _cm->task(worker_id);
2049     G1CMIsAliveClosure g1_is_alive(_g1h);
2050     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2051     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2052 
2053     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2054   }
2055 };
2056 
2057 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2058   assert(_workers != NULL, "Need parallel worker threads.");
2059   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2060 
2061   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2062 
2063   // We need to reset the concurrency level before each
2064   // proxy task execution, so that the termination protocol
2065   // and overflow handling in CMTask::do_marking_step() knows
2066   // how many workers to wait for.
2067   _cm->set_concurrency(_active_workers);
2068   _workers->run_task(&proc_task_proxy);
2069 }
2070 
2071 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2072   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2073   EnqueueTask& _enq_task;
2074 
2075 public:
2076   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2077     AbstractGangTask("Enqueue reference objects in parallel"),
2078     _enq_task(enq_task) { }
2079 
2080   virtual void work(uint worker_id) {
2081     _enq_task.work(worker_id);
2082   }
2083 };
2084 
2085 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2086   assert(_workers != NULL, "Need parallel worker threads.");
2087   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2088 
2089   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2090 
2091   // Not strictly necessary but...
2092   //
2093   // We need to reset the concurrency level before each
2094   // proxy task execution, so that the termination protocol
2095   // and overflow handling in CMTask::do_marking_step() knows
2096   // how many workers to wait for.
2097   _cm->set_concurrency(_active_workers);
2098   _workers->run_task(&enq_task_proxy);
2099 }
2100 
2101 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2102   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2103 }
2104 
2105 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2106   if (has_overflown()) {
2107     // Skip processing the discovered references if we have
2108     // overflown the global marking stack. Reference objects
2109     // only get discovered once so it is OK to not
2110     // de-populate the discovered reference lists. We could have,
2111     // but the only benefit would be that, when marking restarts,
2112     // less reference objects are discovered.
2113     return;
2114   }
2115 
2116   ResourceMark rm;
2117   HandleMark   hm;
2118 
2119   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2120 
2121   // Is alive closure.
2122   G1CMIsAliveClosure g1_is_alive(g1h);
2123 
2124   // Inner scope to exclude the cleaning of the string and symbol
2125   // tables from the displayed time.
2126   {
2127     GCTraceTime(Debug, gc) trace("GC ref-proc", g1h->gc_timer_cm());
2128 
2129     ReferenceProcessor* rp = g1h->ref_processor_cm();
2130 
2131     // See the comment in G1CollectedHeap::ref_processing_init()
2132     // about how reference processing currently works in G1.
2133 
2134     // Set the soft reference policy
2135     rp->setup_policy(clear_all_soft_refs);
2136     assert(_markStack.isEmpty(), "mark stack should be empty");
2137 
2138     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2139     // in serial reference processing. Note these closures are also
2140     // used for serially processing (by the the current thread) the
2141     // JNI references during parallel reference processing.
2142     //
2143     // These closures do not need to synchronize with the worker
2144     // threads involved in parallel reference processing as these
2145     // instances are executed serially by the current thread (e.g.
2146     // reference processing is not multi-threaded and is thus
2147     // performed by the current thread instead of a gang worker).
2148     //
2149     // The gang tasks involved in parallel reference processing create
2150     // their own instances of these closures, which do their own
2151     // synchronization among themselves.
2152     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2153     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2154 
2155     // We need at least one active thread. If reference processing
2156     // is not multi-threaded we use the current (VMThread) thread,
2157     // otherwise we use the work gang from the G1CollectedHeap and
2158     // we utilize all the worker threads we can.
2159     bool processing_is_mt = rp->processing_is_mt();
2160     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2161     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2162 
2163     // Parallel processing task executor.
2164     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2165                                               g1h->workers(), active_workers);
2166     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2167 
2168     // Set the concurrency level. The phase was already set prior to
2169     // executing the remark task.
2170     set_concurrency(active_workers);
2171 
2172     // Set the degree of MT processing here.  If the discovery was done MT,
2173     // the number of threads involved during discovery could differ from
2174     // the number of active workers.  This is OK as long as the discovered
2175     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2176     rp->set_active_mt_degree(active_workers);
2177 
2178     // Process the weak references.
2179     const ReferenceProcessorStats& stats =
2180         rp->process_discovered_references(&g1_is_alive,
2181                                           &g1_keep_alive,
2182                                           &g1_drain_mark_stack,
2183                                           executor,
2184                                           g1h->gc_timer_cm());
2185     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2186 
2187     // The do_oop work routines of the keep_alive and drain_marking_stack
2188     // oop closures will set the has_overflown flag if we overflow the
2189     // global marking stack.
2190 
2191     assert(_markStack.overflow() || _markStack.isEmpty(),
2192             "mark stack should be empty (unless it overflowed)");
2193 
2194     if (_markStack.overflow()) {
2195       // This should have been done already when we tried to push an
2196       // entry on to the global mark stack. But let's do it again.
2197       set_has_overflown();
2198     }
2199 
2200     assert(rp->num_q() == active_workers, "why not");
2201 
2202     rp->enqueue_discovered_references(executor);
2203 
2204     rp->verify_no_references_recorded();
2205     assert(!rp->discovery_enabled(), "Post condition");
2206   }
2207 
2208   if (has_overflown()) {
2209     // We can not trust g1_is_alive if the marking stack overflowed
2210     return;
2211   }
2212 
2213   assert(_markStack.isEmpty(), "Marking should have completed");
2214 
2215   // Unload Klasses, String, Symbols, Code Cache, etc.
2216   {
2217     GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
2218 
2219     if (ClassUnloadingWithConcurrentMark) {
2220       bool purged_classes;
2221 
2222       {
2223         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2224         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2225       }
2226 
2227       {
2228         GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
2229         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2230       }
2231     }
2232 
2233     if (G1StringDedup::is_enabled()) {
2234       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2235       G1StringDedup::unlink(&g1_is_alive);
2236     }
2237   }
2238 }
2239 
2240 void ConcurrentMark::swapMarkBitMaps() {
2241   CMBitMapRO* temp = _prevMarkBitMap;
2242   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2243   _nextMarkBitMap  = (CMBitMap*)  temp;
2244 }
2245 
2246 // Closure for marking entries in SATB buffers.
2247 class CMSATBBufferClosure : public SATBBufferClosure {
2248 private:
2249   CMTask* _task;
2250   G1CollectedHeap* _g1h;
2251 
2252   // This is very similar to CMTask::deal_with_reference, but with
2253   // more relaxed requirements for the argument, so this must be more
2254   // circumspect about treating the argument as an object.
2255   void do_entry(void* entry) const {
2256     _task->increment_refs_reached();
2257     HeapRegion* hr = _g1h->heap_region_containing(entry);
2258     if (entry < hr->next_top_at_mark_start()) {
2259       // Until we get here, we don't know whether entry refers to a valid
2260       // object; it could instead have been a stale reference.
2261       oop obj = static_cast<oop>(entry);
2262       assert(obj->is_oop(true /* ignore mark word */),
2263              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2264       _task->make_reference_grey(obj, hr);
2265     }
2266   }
2267 
2268 public:
2269   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2270     : _task(task), _g1h(g1h) { }
2271 
2272   virtual void do_buffer(void** buffer, size_t size) {
2273     for (size_t i = 0; i < size; ++i) {
2274       do_entry(buffer[i]);
2275     }
2276   }
2277 };
2278 
2279 class G1RemarkThreadsClosure : public ThreadClosure {
2280   CMSATBBufferClosure _cm_satb_cl;
2281   G1CMOopClosure _cm_cl;
2282   MarkingCodeBlobClosure _code_cl;
2283   int _thread_parity;
2284 
2285  public:
2286   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2287     _cm_satb_cl(task, g1h),
2288     _cm_cl(g1h, g1h->concurrent_mark(), task),
2289     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2290     _thread_parity(Threads::thread_claim_parity()) {}
2291 
2292   void do_thread(Thread* thread) {
2293     if (thread->is_Java_thread()) {
2294       if (thread->claim_oops_do(true, _thread_parity)) {
2295         JavaThread* jt = (JavaThread*)thread;
2296 
2297         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2298         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2299         // * Alive if on the stack of an executing method
2300         // * Weakly reachable otherwise
2301         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2302         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2303         jt->nmethods_do(&_code_cl);
2304 
2305         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2306       }
2307     } else if (thread->is_VM_thread()) {
2308       if (thread->claim_oops_do(true, _thread_parity)) {
2309         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2310       }
2311     }
2312   }
2313 };
2314 
2315 class CMRemarkTask: public AbstractGangTask {
2316 private:
2317   ConcurrentMark* _cm;
2318 public:
2319   void work(uint worker_id) {
2320     // Since all available tasks are actually started, we should
2321     // only proceed if we're supposed to be active.
2322     if (worker_id < _cm->active_tasks()) {
2323       CMTask* task = _cm->task(worker_id);
2324       task->record_start_time();
2325       {
2326         ResourceMark rm;
2327         HandleMark hm;
2328 
2329         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2330         Threads::threads_do(&threads_f);
2331       }
2332 
2333       do {
2334         task->do_marking_step(1000000000.0 /* something very large */,
2335                               true         /* do_termination       */,
2336                               false        /* is_serial            */);
2337       } while (task->has_aborted() && !_cm->has_overflown());
2338       // If we overflow, then we do not want to restart. We instead
2339       // want to abort remark and do concurrent marking again.
2340       task->record_end_time();
2341     }
2342   }
2343 
2344   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2345     AbstractGangTask("Par Remark"), _cm(cm) {
2346     _cm->terminator()->reset_for_reuse(active_workers);
2347   }
2348 };
2349 
2350 void ConcurrentMark::checkpointRootsFinalWork() {
2351   ResourceMark rm;
2352   HandleMark   hm;
2353   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2354 
2355   GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
2356 
2357   g1h->ensure_parsability(false);
2358 
2359   // this is remark, so we'll use up all active threads
2360   uint active_workers = g1h->workers()->active_workers();
2361   set_concurrency_and_phase(active_workers, false /* concurrent */);
2362   // Leave _parallel_marking_threads at it's
2363   // value originally calculated in the ConcurrentMark
2364   // constructor and pass values of the active workers
2365   // through the gang in the task.
2366 
2367   {
2368     StrongRootsScope srs(active_workers);
2369 
2370     CMRemarkTask remarkTask(this, active_workers);
2371     // We will start all available threads, even if we decide that the
2372     // active_workers will be fewer. The extra ones will just bail out
2373     // immediately.
2374     g1h->workers()->run_task(&remarkTask);
2375   }
2376 
2377   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2378   guarantee(has_overflown() ||
2379             satb_mq_set.completed_buffers_num() == 0,
2380             "Invariant: has_overflown = %s, num buffers = %d",
2381             BOOL_TO_STR(has_overflown()),
2382             satb_mq_set.completed_buffers_num());
2383 
2384   print_stats();
2385 }
2386 
2387 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2388   // Note we are overriding the read-only view of the prev map here, via
2389   // the cast.
2390   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2391 }
2392 
2393 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2394   _nextMarkBitMap->clearRange(mr);
2395 }
2396 
2397 HeapRegion*
2398 ConcurrentMark::claim_region(uint worker_id) {
2399   // "checkpoint" the finger
2400   HeapWord* finger = _finger;
2401 
2402   // _heap_end will not change underneath our feet; it only changes at
2403   // yield points.
2404   while (finger < _heap_end) {
2405     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2406 
2407     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2408 
2409     // Above heap_region_containing may return NULL as we always scan claim
2410     // until the end of the heap. In this case, just jump to the next region.
2411     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2412 
2413     // Is the gap between reading the finger and doing the CAS too long?
2414     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2415     if (res == finger && curr_region != NULL) {
2416       // we succeeded
2417       HeapWord*   bottom        = curr_region->bottom();
2418       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2419 
2420       // notice that _finger == end cannot be guaranteed here since,
2421       // someone else might have moved the finger even further
2422       assert(_finger >= end, "the finger should have moved forward");
2423 
2424       if (limit > bottom) {
2425         return curr_region;
2426       } else {
2427         assert(limit == bottom,
2428                "the region limit should be at bottom");
2429         // we return NULL and the caller should try calling
2430         // claim_region() again.
2431         return NULL;
2432       }
2433     } else {
2434       assert(_finger > finger, "the finger should have moved forward");
2435       // read it again
2436       finger = _finger;
2437     }
2438   }
2439 
2440   return NULL;
2441 }
2442 
2443 #ifndef PRODUCT
2444 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2445 private:
2446   G1CollectedHeap* _g1h;
2447   const char* _phase;
2448   int _info;
2449 
2450 public:
2451   VerifyNoCSetOops(const char* phase, int info = -1) :
2452     _g1h(G1CollectedHeap::heap()),
2453     _phase(phase),
2454     _info(info)
2455   { }
2456 
2457   void operator()(oop obj) const {
2458     guarantee(obj->is_oop(),
2459               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2460               p2i(obj), _phase, _info);
2461     guarantee(!_g1h->obj_in_cs(obj),
2462               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2463               p2i(obj), _phase, _info);
2464   }
2465 };
2466 
2467 void ConcurrentMark::verify_no_cset_oops() {
2468   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2469   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2470     return;
2471   }
2472 
2473   // Verify entries on the global mark stack
2474   _markStack.iterate(VerifyNoCSetOops("Stack"));
2475 
2476   // Verify entries on the task queues
2477   for (uint i = 0; i < _max_worker_id; ++i) {
2478     CMTaskQueue* queue = _task_queues->queue(i);
2479     queue->iterate(VerifyNoCSetOops("Queue", i));
2480   }
2481 
2482   // Verify the global finger
2483   HeapWord* global_finger = finger();
2484   if (global_finger != NULL && global_finger < _heap_end) {
2485     // Since we always iterate over all regions, we might get a NULL HeapRegion
2486     // here.
2487     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2488     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2489               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2490               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2491   }
2492 
2493   // Verify the task fingers
2494   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2495   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2496     CMTask* task = _tasks[i];
2497     HeapWord* task_finger = task->finger();
2498     if (task_finger != NULL && task_finger < _heap_end) {
2499       // See above note on the global finger verification.
2500       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2501       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2502                 !task_hr->in_collection_set(),
2503                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2504                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2505     }
2506   }
2507 }
2508 #endif // PRODUCT
2509 
2510 // Aggregate the counting data that was constructed concurrently
2511 // with marking.
2512 class AggregateCountDataHRClosure: public HeapRegionClosure {
2513   G1CollectedHeap* _g1h;
2514   ConcurrentMark* _cm;
2515   CardTableModRefBS* _ct_bs;
2516   BitMap* _cm_card_bm;
2517   uint _max_worker_id;
2518 
2519  public:
2520   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2521                               BitMap* cm_card_bm,
2522                               uint max_worker_id) :
2523     _g1h(g1h), _cm(g1h->concurrent_mark()),
2524     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2525     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2526 
2527   bool doHeapRegion(HeapRegion* hr) {
2528     HeapWord* start = hr->bottom();
2529     HeapWord* limit = hr->next_top_at_mark_start();
2530     HeapWord* end = hr->end();
2531 
2532     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2533            "Preconditions not met - "
2534            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2535            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2536            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2537 
2538     assert(hr->next_marked_bytes() == 0, "Precondition");
2539 
2540     if (start == limit) {
2541       // NTAMS of this region has not been set so nothing to do.
2542       return false;
2543     }
2544 
2545     // 'start' should be in the heap.
2546     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2547     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2548     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2549 
2550     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2551     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2552     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2553 
2554     // If ntams is not card aligned then we bump card bitmap index
2555     // for limit so that we get the all the cards spanned by
2556     // the object ending at ntams.
2557     // Note: if this is the last region in the heap then ntams
2558     // could be actually just beyond the end of the the heap;
2559     // limit_idx will then  correspond to a (non-existent) card
2560     // that is also outside the heap.
2561     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2562       limit_idx += 1;
2563     }
2564 
2565     assert(limit_idx <= end_idx, "or else use atomics");
2566 
2567     // Aggregate the "stripe" in the count data associated with hr.
2568     uint hrm_index = hr->hrm_index();
2569     size_t marked_bytes = 0;
2570 
2571     for (uint i = 0; i < _max_worker_id; i += 1) {
2572       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2573       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2574 
2575       // Fetch the marked_bytes in this region for task i and
2576       // add it to the running total for this region.
2577       marked_bytes += marked_bytes_array[hrm_index];
2578 
2579       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2580       // into the global card bitmap.
2581       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2582 
2583       while (scan_idx < limit_idx) {
2584         assert(task_card_bm->at(scan_idx) == true, "should be");
2585         _cm_card_bm->set_bit(scan_idx);
2586         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2587 
2588         // BitMap::get_next_one_offset() can handle the case when
2589         // its left_offset parameter is greater than its right_offset
2590         // parameter. It does, however, have an early exit if
2591         // left_offset == right_offset. So let's limit the value
2592         // passed in for left offset here.
2593         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2594         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2595       }
2596     }
2597 
2598     // Update the marked bytes for this region.
2599     hr->add_to_marked_bytes(marked_bytes);
2600 
2601     // Next heap region
2602     return false;
2603   }
2604 };
2605 
2606 class G1AggregateCountDataTask: public AbstractGangTask {
2607 protected:
2608   G1CollectedHeap* _g1h;
2609   ConcurrentMark* _cm;
2610   BitMap* _cm_card_bm;
2611   uint _max_worker_id;
2612   uint _active_workers;
2613   HeapRegionClaimer _hrclaimer;
2614 
2615 public:
2616   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2617                            ConcurrentMark* cm,
2618                            BitMap* cm_card_bm,
2619                            uint max_worker_id,
2620                            uint n_workers) :
2621       AbstractGangTask("Count Aggregation"),
2622       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2623       _max_worker_id(max_worker_id),
2624       _active_workers(n_workers),
2625       _hrclaimer(_active_workers) {
2626   }
2627 
2628   void work(uint worker_id) {
2629     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2630 
2631     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2632   }
2633 };
2634 
2635 
2636 void ConcurrentMark::aggregate_count_data() {
2637   uint n_workers = _g1h->workers()->active_workers();
2638 
2639   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2640                                            _max_worker_id, n_workers);
2641 
2642   _g1h->workers()->run_task(&g1_par_agg_task);
2643 }
2644 
2645 // Clear the per-worker arrays used to store the per-region counting data
2646 void ConcurrentMark::clear_all_count_data() {
2647   // Clear the global card bitmap - it will be filled during
2648   // liveness count aggregation (during remark) and the
2649   // final counting task.
2650   _card_bm.clear();
2651 
2652   // Clear the global region bitmap - it will be filled as part
2653   // of the final counting task.
2654   _region_bm.clear();
2655 
2656   uint max_regions = _g1h->max_regions();
2657   assert(_max_worker_id > 0, "uninitialized");
2658 
2659   for (uint i = 0; i < _max_worker_id; i += 1) {
2660     BitMap* task_card_bm = count_card_bitmap_for(i);
2661     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2662 
2663     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2664     assert(marked_bytes_array != NULL, "uninitialized");
2665 
2666     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2667     task_card_bm->clear();
2668   }
2669 }
2670 
2671 void ConcurrentMark::print_stats() {
2672   LogHandle(gc, stats) log;
2673   if (!log.is_debug()) {
2674     return;
2675   }
2676   log.debug("---------------------------------------------------------------------");
2677   for (size_t i = 0; i < _active_tasks; ++i) {
2678     _tasks[i]->print_stats();
2679     log.debug("---------------------------------------------------------------------");
2680   }
2681 }
2682 
2683 // abandon current marking iteration due to a Full GC
2684 void ConcurrentMark::abort() {
2685   if (!cmThread()->during_cycle() || _has_aborted) {
2686     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2687     return;
2688   }
2689 
2690   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2691   // concurrent bitmap clearing.
2692   _nextMarkBitMap->clearAll();
2693 
2694   // Note we cannot clear the previous marking bitmap here
2695   // since VerifyDuringGC verifies the objects marked during
2696   // a full GC against the previous bitmap.
2697 
2698   // Clear the liveness counting data
2699   clear_all_count_data();
2700   // Empty mark stack
2701   reset_marking_state();
2702   for (uint i = 0; i < _max_worker_id; ++i) {
2703     _tasks[i]->clear_region_fields();
2704   }
2705   _first_overflow_barrier_sync.abort();
2706   _second_overflow_barrier_sync.abort();
2707   _has_aborted = true;
2708 
2709   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2710   satb_mq_set.abandon_partial_marking();
2711   // This can be called either during or outside marking, we'll read
2712   // the expected_active value from the SATB queue set.
2713   satb_mq_set.set_active_all_threads(
2714                                  false, /* new active value */
2715                                  satb_mq_set.is_active() /* expected_active */);
2716 
2717   _g1h->trace_heap_after_concurrent_cycle();
2718   _g1h->register_concurrent_cycle_end();
2719 }
2720 
2721 static void print_ms_time_info(const char* prefix, const char* name,
2722                                NumberSeq& ns) {
2723   log_trace(gc, marking, stats, exit)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2724                                       prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2725   if (ns.num() > 0) {
2726     log_trace(gc, marking, stats, exit)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2727                                         prefix, ns.sd(), ns.maximum());
2728   }
2729 }
2730 
2731 void ConcurrentMark::print_summary_info() {
2732   LogHandle(gc, marking, stats, exit) log;
2733   if (!log.is_trace()) {
2734     return;
2735   }
2736 
2737   log.trace(" Concurrent marking:");
2738   print_ms_time_info("  ", "init marks", _init_times);
2739   print_ms_time_info("  ", "remarks", _remark_times);
2740   {
2741     print_ms_time_info("     ", "final marks", _remark_mark_times);
2742     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2743 
2744   }
2745   print_ms_time_info("  ", "cleanups", _cleanup_times);
2746   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2747             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2748   if (G1ScrubRemSets) {
2749     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2750               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2751   }
2752   log.trace("  Total stop_world time = %8.2f s.",
2753             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2754   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2755             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2756 }
2757 
2758 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2759   _parallel_workers->print_worker_threads_on(st);
2760 }
2761 
2762 void ConcurrentMark::print_on_error(outputStream* st) const {
2763   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2764       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2765   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2766   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2767 }
2768 
2769 // We take a break if someone is trying to stop the world.
2770 bool ConcurrentMark::do_yield_check(uint worker_id) {
2771   if (SuspendibleThreadSet::should_yield()) {
2772     if (worker_id == 0) {
2773       _g1h->g1_policy()->record_concurrent_pause();
2774     }
2775     SuspendibleThreadSet::yield();
2776     return true;
2777   } else {
2778     return false;
2779   }
2780 }
2781 
2782 // Closure for iteration over bitmaps
2783 class CMBitMapClosure : public BitMapClosure {
2784 private:
2785   // the bitmap that is being iterated over
2786   CMBitMap*                   _nextMarkBitMap;
2787   ConcurrentMark*             _cm;
2788   CMTask*                     _task;
2789 
2790 public:
2791   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2792     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2793 
2794   bool do_bit(size_t offset) {
2795     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2796     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2797     assert( addr < _cm->finger(), "invariant");
2798     assert(addr >= _task->finger(), "invariant");
2799 
2800     // We move that task's local finger along.
2801     _task->move_finger_to(addr);
2802 
2803     _task->scan_object(oop(addr));
2804     // we only partially drain the local queue and global stack
2805     _task->drain_local_queue(true);
2806     _task->drain_global_stack(true);
2807 
2808     // if the has_aborted flag has been raised, we need to bail out of
2809     // the iteration
2810     return !_task->has_aborted();
2811   }
2812 };
2813 
2814 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2815   ReferenceProcessor* result = NULL;
2816   if (G1UseConcMarkReferenceProcessing) {
2817     result = g1h->ref_processor_cm();
2818     assert(result != NULL, "should not be NULL");
2819   }
2820   return result;
2821 }
2822 
2823 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2824                                ConcurrentMark* cm,
2825                                CMTask* task)
2826   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2827     _g1h(g1h), _cm(cm), _task(task)
2828 { }
2829 
2830 void CMTask::setup_for_region(HeapRegion* hr) {
2831   assert(hr != NULL,
2832         "claim_region() should have filtered out NULL regions");
2833   _curr_region  = hr;
2834   _finger       = hr->bottom();
2835   update_region_limit();
2836 }
2837 
2838 void CMTask::update_region_limit() {
2839   HeapRegion* hr            = _curr_region;
2840   HeapWord* bottom          = hr->bottom();
2841   HeapWord* limit           = hr->next_top_at_mark_start();
2842 
2843   if (limit == bottom) {
2844     // The region was collected underneath our feet.
2845     // We set the finger to bottom to ensure that the bitmap
2846     // iteration that will follow this will not do anything.
2847     // (this is not a condition that holds when we set the region up,
2848     // as the region is not supposed to be empty in the first place)
2849     _finger = bottom;
2850   } else if (limit >= _region_limit) {
2851     assert(limit >= _finger, "peace of mind");
2852   } else {
2853     assert(limit < _region_limit, "only way to get here");
2854     // This can happen under some pretty unusual circumstances.  An
2855     // evacuation pause empties the region underneath our feet (NTAMS
2856     // at bottom). We then do some allocation in the region (NTAMS
2857     // stays at bottom), followed by the region being used as a GC
2858     // alloc region (NTAMS will move to top() and the objects
2859     // originally below it will be grayed). All objects now marked in
2860     // the region are explicitly grayed, if below the global finger,
2861     // and we do not need in fact to scan anything else. So, we simply
2862     // set _finger to be limit to ensure that the bitmap iteration
2863     // doesn't do anything.
2864     _finger = limit;
2865   }
2866 
2867   _region_limit = limit;
2868 }
2869 
2870 void CMTask::giveup_current_region() {
2871   assert(_curr_region != NULL, "invariant");
2872   clear_region_fields();
2873 }
2874 
2875 void CMTask::clear_region_fields() {
2876   // Values for these three fields that indicate that we're not
2877   // holding on to a region.
2878   _curr_region   = NULL;
2879   _finger        = NULL;
2880   _region_limit  = NULL;
2881 }
2882 
2883 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2884   if (cm_oop_closure == NULL) {
2885     assert(_cm_oop_closure != NULL, "invariant");
2886   } else {
2887     assert(_cm_oop_closure == NULL, "invariant");
2888   }
2889   _cm_oop_closure = cm_oop_closure;
2890 }
2891 
2892 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2893   guarantee(nextMarkBitMap != NULL, "invariant");
2894   _nextMarkBitMap                = nextMarkBitMap;
2895   clear_region_fields();
2896 
2897   _calls                         = 0;
2898   _elapsed_time_ms               = 0.0;
2899   _termination_time_ms           = 0.0;
2900   _termination_start_time_ms     = 0.0;
2901 }
2902 
2903 bool CMTask::should_exit_termination() {
2904   regular_clock_call();
2905   // This is called when we are in the termination protocol. We should
2906   // quit if, for some reason, this task wants to abort or the global
2907   // stack is not empty (this means that we can get work from it).
2908   return !_cm->mark_stack_empty() || has_aborted();
2909 }
2910 
2911 void CMTask::reached_limit() {
2912   assert(_words_scanned >= _words_scanned_limit ||
2913          _refs_reached >= _refs_reached_limit ,
2914          "shouldn't have been called otherwise");
2915   regular_clock_call();
2916 }
2917 
2918 void CMTask::regular_clock_call() {
2919   if (has_aborted()) return;
2920 
2921   // First, we need to recalculate the words scanned and refs reached
2922   // limits for the next clock call.
2923   recalculate_limits();
2924 
2925   // During the regular clock call we do the following
2926 
2927   // (1) If an overflow has been flagged, then we abort.
2928   if (_cm->has_overflown()) {
2929     set_has_aborted();
2930     return;
2931   }
2932 
2933   // If we are not concurrent (i.e. we're doing remark) we don't need
2934   // to check anything else. The other steps are only needed during
2935   // the concurrent marking phase.
2936   if (!concurrent()) return;
2937 
2938   // (2) If marking has been aborted for Full GC, then we also abort.
2939   if (_cm->has_aborted()) {
2940     set_has_aborted();
2941     return;
2942   }
2943 
2944   double curr_time_ms = os::elapsedVTime() * 1000.0;
2945 
2946   // (4) We check whether we should yield. If we have to, then we abort.
2947   if (SuspendibleThreadSet::should_yield()) {
2948     // We should yield. To do this we abort the task. The caller is
2949     // responsible for yielding.
2950     set_has_aborted();
2951     return;
2952   }
2953 
2954   // (5) We check whether we've reached our time quota. If we have,
2955   // then we abort.
2956   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2957   if (elapsed_time_ms > _time_target_ms) {
2958     set_has_aborted();
2959     _has_timed_out = true;
2960     return;
2961   }
2962 
2963   // (6) Finally, we check whether there are enough completed STAB
2964   // buffers available for processing. If there are, we abort.
2965   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2966   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2967     // we do need to process SATB buffers, we'll abort and restart
2968     // the marking task to do so
2969     set_has_aborted();
2970     return;
2971   }
2972 }
2973 
2974 void CMTask::recalculate_limits() {
2975   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2976   _words_scanned_limit      = _real_words_scanned_limit;
2977 
2978   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2979   _refs_reached_limit       = _real_refs_reached_limit;
2980 }
2981 
2982 void CMTask::decrease_limits() {
2983   // This is called when we believe that we're going to do an infrequent
2984   // operation which will increase the per byte scanned cost (i.e. move
2985   // entries to/from the global stack). It basically tries to decrease the
2986   // scanning limit so that the clock is called earlier.
2987 
2988   _words_scanned_limit = _real_words_scanned_limit -
2989     3 * words_scanned_period / 4;
2990   _refs_reached_limit  = _real_refs_reached_limit -
2991     3 * refs_reached_period / 4;
2992 }
2993 
2994 void CMTask::move_entries_to_global_stack() {
2995   // local array where we'll store the entries that will be popped
2996   // from the local queue
2997   oop buffer[global_stack_transfer_size];
2998 
2999   int n = 0;
3000   oop obj;
3001   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3002     buffer[n] = obj;
3003     ++n;
3004   }
3005 
3006   if (n > 0) {
3007     // we popped at least one entry from the local queue
3008 
3009     if (!_cm->mark_stack_push(buffer, n)) {
3010       set_has_aborted();
3011     }
3012   }
3013 
3014   // this operation was quite expensive, so decrease the limits
3015   decrease_limits();
3016 }
3017 
3018 void CMTask::get_entries_from_global_stack() {
3019   // local array where we'll store the entries that will be popped
3020   // from the global stack.
3021   oop buffer[global_stack_transfer_size];
3022   int n;
3023   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3024   assert(n <= global_stack_transfer_size,
3025          "we should not pop more than the given limit");
3026   if (n > 0) {
3027     // yes, we did actually pop at least one entry
3028     for (int i = 0; i < n; ++i) {
3029       bool success = _task_queue->push(buffer[i]);
3030       // We only call this when the local queue is empty or under a
3031       // given target limit. So, we do not expect this push to fail.
3032       assert(success, "invariant");
3033     }
3034   }
3035 
3036   // this operation was quite expensive, so decrease the limits
3037   decrease_limits();
3038 }
3039 
3040 void CMTask::drain_local_queue(bool partially) {
3041   if (has_aborted()) return;
3042 
3043   // Decide what the target size is, depending whether we're going to
3044   // drain it partially (so that other tasks can steal if they run out
3045   // of things to do) or totally (at the very end).
3046   size_t target_size;
3047   if (partially) {
3048     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3049   } else {
3050     target_size = 0;
3051   }
3052 
3053   if (_task_queue->size() > target_size) {
3054     oop obj;
3055     bool ret = _task_queue->pop_local(obj);
3056     while (ret) {
3057       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3058       assert(!_g1h->is_on_master_free_list(
3059                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3060 
3061       scan_object(obj);
3062 
3063       if (_task_queue->size() <= target_size || has_aborted()) {
3064         ret = false;
3065       } else {
3066         ret = _task_queue->pop_local(obj);
3067       }
3068     }
3069   }
3070 }
3071 
3072 void CMTask::drain_global_stack(bool partially) {
3073   if (has_aborted()) return;
3074 
3075   // We have a policy to drain the local queue before we attempt to
3076   // drain the global stack.
3077   assert(partially || _task_queue->size() == 0, "invariant");
3078 
3079   // Decide what the target size is, depending whether we're going to
3080   // drain it partially (so that other tasks can steal if they run out
3081   // of things to do) or totally (at the very end).  Notice that,
3082   // because we move entries from the global stack in chunks or
3083   // because another task might be doing the same, we might in fact
3084   // drop below the target. But, this is not a problem.
3085   size_t target_size;
3086   if (partially) {
3087     target_size = _cm->partial_mark_stack_size_target();
3088   } else {
3089     target_size = 0;
3090   }
3091 
3092   if (_cm->mark_stack_size() > target_size) {
3093     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3094       get_entries_from_global_stack();
3095       drain_local_queue(partially);
3096     }
3097   }
3098 }
3099 
3100 // SATB Queue has several assumptions on whether to call the par or
3101 // non-par versions of the methods. this is why some of the code is
3102 // replicated. We should really get rid of the single-threaded version
3103 // of the code to simplify things.
3104 void CMTask::drain_satb_buffers() {
3105   if (has_aborted()) return;
3106 
3107   // We set this so that the regular clock knows that we're in the
3108   // middle of draining buffers and doesn't set the abort flag when it
3109   // notices that SATB buffers are available for draining. It'd be
3110   // very counter productive if it did that. :-)
3111   _draining_satb_buffers = true;
3112 
3113   CMSATBBufferClosure satb_cl(this, _g1h);
3114   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3115 
3116   // This keeps claiming and applying the closure to completed buffers
3117   // until we run out of buffers or we need to abort.
3118   while (!has_aborted() &&
3119          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3120     regular_clock_call();
3121   }
3122 
3123   _draining_satb_buffers = false;
3124 
3125   assert(has_aborted() ||
3126          concurrent() ||
3127          satb_mq_set.completed_buffers_num() == 0, "invariant");
3128 
3129   // again, this was a potentially expensive operation, decrease the
3130   // limits to get the regular clock call early
3131   decrease_limits();
3132 }
3133 
3134 void CMTask::print_stats() {
3135   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3136                        _worker_id, _calls);
3137   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3138                        _elapsed_time_ms, _termination_time_ms);
3139   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3140                        _step_times_ms.num(), _step_times_ms.avg(),
3141                        _step_times_ms.sd());
3142   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3143                        _step_times_ms.maximum(), _step_times_ms.sum());
3144 }
3145 
3146 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3147   return _task_queues->steal(worker_id, hash_seed, obj);
3148 }
3149 
3150 /*****************************************************************************
3151 
3152     The do_marking_step(time_target_ms, ...) method is the building
3153     block of the parallel marking framework. It can be called in parallel
3154     with other invocations of do_marking_step() on different tasks
3155     (but only one per task, obviously) and concurrently with the
3156     mutator threads, or during remark, hence it eliminates the need
3157     for two versions of the code. When called during remark, it will
3158     pick up from where the task left off during the concurrent marking
3159     phase. Interestingly, tasks are also claimable during evacuation
3160     pauses too, since do_marking_step() ensures that it aborts before
3161     it needs to yield.
3162 
3163     The data structures that it uses to do marking work are the
3164     following:
3165 
3166       (1) Marking Bitmap. If there are gray objects that appear only
3167       on the bitmap (this happens either when dealing with an overflow
3168       or when the initial marking phase has simply marked the roots
3169       and didn't push them on the stack), then tasks claim heap
3170       regions whose bitmap they then scan to find gray objects. A
3171       global finger indicates where the end of the last claimed region
3172       is. A local finger indicates how far into the region a task has
3173       scanned. The two fingers are used to determine how to gray an
3174       object (i.e. whether simply marking it is OK, as it will be
3175       visited by a task in the future, or whether it needs to be also
3176       pushed on a stack).
3177 
3178       (2) Local Queue. The local queue of the task which is accessed
3179       reasonably efficiently by the task. Other tasks can steal from
3180       it when they run out of work. Throughout the marking phase, a
3181       task attempts to keep its local queue short but not totally
3182       empty, so that entries are available for stealing by other
3183       tasks. Only when there is no more work, a task will totally
3184       drain its local queue.
3185 
3186       (3) Global Mark Stack. This handles local queue overflow. During
3187       marking only sets of entries are moved between it and the local
3188       queues, as access to it requires a mutex and more fine-grain
3189       interaction with it which might cause contention. If it
3190       overflows, then the marking phase should restart and iterate
3191       over the bitmap to identify gray objects. Throughout the marking
3192       phase, tasks attempt to keep the global mark stack at a small
3193       length but not totally empty, so that entries are available for
3194       popping by other tasks. Only when there is no more work, tasks
3195       will totally drain the global mark stack.
3196 
3197       (4) SATB Buffer Queue. This is where completed SATB buffers are
3198       made available. Buffers are regularly removed from this queue
3199       and scanned for roots, so that the queue doesn't get too
3200       long. During remark, all completed buffers are processed, as
3201       well as the filled in parts of any uncompleted buffers.
3202 
3203     The do_marking_step() method tries to abort when the time target
3204     has been reached. There are a few other cases when the
3205     do_marking_step() method also aborts:
3206 
3207       (1) When the marking phase has been aborted (after a Full GC).
3208 
3209       (2) When a global overflow (on the global stack) has been
3210       triggered. Before the task aborts, it will actually sync up with
3211       the other tasks to ensure that all the marking data structures
3212       (local queues, stacks, fingers etc.)  are re-initialized so that
3213       when do_marking_step() completes, the marking phase can
3214       immediately restart.
3215 
3216       (3) When enough completed SATB buffers are available. The
3217       do_marking_step() method only tries to drain SATB buffers right
3218       at the beginning. So, if enough buffers are available, the
3219       marking step aborts and the SATB buffers are processed at
3220       the beginning of the next invocation.
3221 
3222       (4) To yield. when we have to yield then we abort and yield
3223       right at the end of do_marking_step(). This saves us from a lot
3224       of hassle as, by yielding we might allow a Full GC. If this
3225       happens then objects will be compacted underneath our feet, the
3226       heap might shrink, etc. We save checking for this by just
3227       aborting and doing the yield right at the end.
3228 
3229     From the above it follows that the do_marking_step() method should
3230     be called in a loop (or, otherwise, regularly) until it completes.
3231 
3232     If a marking step completes without its has_aborted() flag being
3233     true, it means it has completed the current marking phase (and
3234     also all other marking tasks have done so and have all synced up).
3235 
3236     A method called regular_clock_call() is invoked "regularly" (in
3237     sub ms intervals) throughout marking. It is this clock method that
3238     checks all the abort conditions which were mentioned above and
3239     decides when the task should abort. A work-based scheme is used to
3240     trigger this clock method: when the number of object words the
3241     marking phase has scanned or the number of references the marking
3242     phase has visited reach a given limit. Additional invocations to
3243     the method clock have been planted in a few other strategic places
3244     too. The initial reason for the clock method was to avoid calling
3245     vtime too regularly, as it is quite expensive. So, once it was in
3246     place, it was natural to piggy-back all the other conditions on it
3247     too and not constantly check them throughout the code.
3248 
3249     If do_termination is true then do_marking_step will enter its
3250     termination protocol.
3251 
3252     The value of is_serial must be true when do_marking_step is being
3253     called serially (i.e. by the VMThread) and do_marking_step should
3254     skip any synchronization in the termination and overflow code.
3255     Examples include the serial remark code and the serial reference
3256     processing closures.
3257 
3258     The value of is_serial must be false when do_marking_step is
3259     being called by any of the worker threads in a work gang.
3260     Examples include the concurrent marking code (CMMarkingTask),
3261     the MT remark code, and the MT reference processing closures.
3262 
3263  *****************************************************************************/
3264 
3265 void CMTask::do_marking_step(double time_target_ms,
3266                              bool do_termination,
3267                              bool is_serial) {
3268   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3269   assert(concurrent() == _cm->concurrent(), "they should be the same");
3270 
3271   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3272   assert(_task_queues != NULL, "invariant");
3273   assert(_task_queue != NULL, "invariant");
3274   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3275 
3276   assert(!_claimed,
3277          "only one thread should claim this task at any one time");
3278 
3279   // OK, this doesn't safeguard again all possible scenarios, as it is
3280   // possible for two threads to set the _claimed flag at the same
3281   // time. But it is only for debugging purposes anyway and it will
3282   // catch most problems.
3283   _claimed = true;
3284 
3285   _start_time_ms = os::elapsedVTime() * 1000.0;
3286 
3287   // If do_stealing is true then do_marking_step will attempt to
3288   // steal work from the other CMTasks. It only makes sense to
3289   // enable stealing when the termination protocol is enabled
3290   // and do_marking_step() is not being called serially.
3291   bool do_stealing = do_termination && !is_serial;
3292 
3293   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3294   _time_target_ms = time_target_ms - diff_prediction_ms;
3295 
3296   // set up the variables that are used in the work-based scheme to
3297   // call the regular clock method
3298   _words_scanned = 0;
3299   _refs_reached  = 0;
3300   recalculate_limits();
3301 
3302   // clear all flags
3303   clear_has_aborted();
3304   _has_timed_out = false;
3305   _draining_satb_buffers = false;
3306 
3307   ++_calls;
3308 
3309   // Set up the bitmap and oop closures. Anything that uses them is
3310   // eventually called from this method, so it is OK to allocate these
3311   // statically.
3312   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3313   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3314   set_cm_oop_closure(&cm_oop_closure);
3315 
3316   if (_cm->has_overflown()) {
3317     // This can happen if the mark stack overflows during a GC pause
3318     // and this task, after a yield point, restarts. We have to abort
3319     // as we need to get into the overflow protocol which happens
3320     // right at the end of this task.
3321     set_has_aborted();
3322   }
3323 
3324   // First drain any available SATB buffers. After this, we will not
3325   // look at SATB buffers before the next invocation of this method.
3326   // If enough completed SATB buffers are queued up, the regular clock
3327   // will abort this task so that it restarts.
3328   drain_satb_buffers();
3329   // ...then partially drain the local queue and the global stack
3330   drain_local_queue(true);
3331   drain_global_stack(true);
3332 
3333   do {
3334     if (!has_aborted() && _curr_region != NULL) {
3335       // This means that we're already holding on to a region.
3336       assert(_finger != NULL, "if region is not NULL, then the finger "
3337              "should not be NULL either");
3338 
3339       // We might have restarted this task after an evacuation pause
3340       // which might have evacuated the region we're holding on to
3341       // underneath our feet. Let's read its limit again to make sure
3342       // that we do not iterate over a region of the heap that
3343       // contains garbage (update_region_limit() will also move
3344       // _finger to the start of the region if it is found empty).
3345       update_region_limit();
3346       // We will start from _finger not from the start of the region,
3347       // as we might be restarting this task after aborting half-way
3348       // through scanning this region. In this case, _finger points to
3349       // the address where we last found a marked object. If this is a
3350       // fresh region, _finger points to start().
3351       MemRegion mr = MemRegion(_finger, _region_limit);
3352 
3353       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3354              "humongous regions should go around loop once only");
3355 
3356       // Some special cases:
3357       // If the memory region is empty, we can just give up the region.
3358       // If the current region is humongous then we only need to check
3359       // the bitmap for the bit associated with the start of the object,
3360       // scan the object if it's live, and give up the region.
3361       // Otherwise, let's iterate over the bitmap of the part of the region
3362       // that is left.
3363       // If the iteration is successful, give up the region.
3364       if (mr.is_empty()) {
3365         giveup_current_region();
3366         regular_clock_call();
3367       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3368         if (_nextMarkBitMap->isMarked(mr.start())) {
3369           // The object is marked - apply the closure
3370           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3371           bitmap_closure.do_bit(offset);
3372         }
3373         // Even if this task aborted while scanning the humongous object
3374         // we can (and should) give up the current region.
3375         giveup_current_region();
3376         regular_clock_call();
3377       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3378         giveup_current_region();
3379         regular_clock_call();
3380       } else {
3381         assert(has_aborted(), "currently the only way to do so");
3382         // The only way to abort the bitmap iteration is to return
3383         // false from the do_bit() method. However, inside the
3384         // do_bit() method we move the _finger to point to the
3385         // object currently being looked at. So, if we bail out, we
3386         // have definitely set _finger to something non-null.
3387         assert(_finger != NULL, "invariant");
3388 
3389         // Region iteration was actually aborted. So now _finger
3390         // points to the address of the object we last scanned. If we
3391         // leave it there, when we restart this task, we will rescan
3392         // the object. It is easy to avoid this. We move the finger by
3393         // enough to point to the next possible object header (the
3394         // bitmap knows by how much we need to move it as it knows its
3395         // granularity).
3396         assert(_finger < _region_limit, "invariant");
3397         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3398         // Check if bitmap iteration was aborted while scanning the last object
3399         if (new_finger >= _region_limit) {
3400           giveup_current_region();
3401         } else {
3402           move_finger_to(new_finger);
3403         }
3404       }
3405     }
3406     // At this point we have either completed iterating over the
3407     // region we were holding on to, or we have aborted.
3408 
3409     // We then partially drain the local queue and the global stack.
3410     // (Do we really need this?)
3411     drain_local_queue(true);
3412     drain_global_stack(true);
3413 
3414     // Read the note on the claim_region() method on why it might
3415     // return NULL with potentially more regions available for
3416     // claiming and why we have to check out_of_regions() to determine
3417     // whether we're done or not.
3418     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3419       // We are going to try to claim a new region. We should have
3420       // given up on the previous one.
3421       // Separated the asserts so that we know which one fires.
3422       assert(_curr_region  == NULL, "invariant");
3423       assert(_finger       == NULL, "invariant");
3424       assert(_region_limit == NULL, "invariant");
3425       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3426       if (claimed_region != NULL) {
3427         // Yes, we managed to claim one
3428         setup_for_region(claimed_region);
3429         assert(_curr_region == claimed_region, "invariant");
3430       }
3431       // It is important to call the regular clock here. It might take
3432       // a while to claim a region if, for example, we hit a large
3433       // block of empty regions. So we need to call the regular clock
3434       // method once round the loop to make sure it's called
3435       // frequently enough.
3436       regular_clock_call();
3437     }
3438 
3439     if (!has_aborted() && _curr_region == NULL) {
3440       assert(_cm->out_of_regions(),
3441              "at this point we should be out of regions");
3442     }
3443   } while ( _curr_region != NULL && !has_aborted());
3444 
3445   if (!has_aborted()) {
3446     // We cannot check whether the global stack is empty, since other
3447     // tasks might be pushing objects to it concurrently.
3448     assert(_cm->out_of_regions(),
3449            "at this point we should be out of regions");
3450     // Try to reduce the number of available SATB buffers so that
3451     // remark has less work to do.
3452     drain_satb_buffers();
3453   }
3454 
3455   // Since we've done everything else, we can now totally drain the
3456   // local queue and global stack.
3457   drain_local_queue(false);
3458   drain_global_stack(false);
3459 
3460   // Attempt at work stealing from other task's queues.
3461   if (do_stealing && !has_aborted()) {
3462     // We have not aborted. This means that we have finished all that
3463     // we could. Let's try to do some stealing...
3464 
3465     // We cannot check whether the global stack is empty, since other
3466     // tasks might be pushing objects to it concurrently.
3467     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3468            "only way to reach here");
3469     while (!has_aborted()) {
3470       oop obj;
3471       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3472         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3473                "any stolen object should be marked");
3474         scan_object(obj);
3475 
3476         // And since we're towards the end, let's totally drain the
3477         // local queue and global stack.
3478         drain_local_queue(false);
3479         drain_global_stack(false);
3480       } else {
3481         break;
3482       }
3483     }
3484   }
3485 
3486   // If we are about to wrap up and go into termination, check if we
3487   // should raise the overflow flag.
3488   if (do_termination && !has_aborted()) {
3489     if (_cm->force_overflow()->should_force()) {
3490       _cm->set_has_overflown();
3491       regular_clock_call();
3492     }
3493   }
3494 
3495   // We still haven't aborted. Now, let's try to get into the
3496   // termination protocol.
3497   if (do_termination && !has_aborted()) {
3498     // We cannot check whether the global stack is empty, since other
3499     // tasks might be concurrently pushing objects on it.
3500     // Separated the asserts so that we know which one fires.
3501     assert(_cm->out_of_regions(), "only way to reach here");
3502     assert(_task_queue->size() == 0, "only way to reach here");
3503     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3504 
3505     // The CMTask class also extends the TerminatorTerminator class,
3506     // hence its should_exit_termination() method will also decide
3507     // whether to exit the termination protocol or not.
3508     bool finished = (is_serial ||
3509                      _cm->terminator()->offer_termination(this));
3510     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3511     _termination_time_ms +=
3512       termination_end_time_ms - _termination_start_time_ms;
3513 
3514     if (finished) {
3515       // We're all done.
3516 
3517       if (_worker_id == 0) {
3518         // let's allow task 0 to do this
3519         if (concurrent()) {
3520           assert(_cm->concurrent_marking_in_progress(), "invariant");
3521           // we need to set this to false before the next
3522           // safepoint. This way we ensure that the marking phase
3523           // doesn't observe any more heap expansions.
3524           _cm->clear_concurrent_marking_in_progress();
3525         }
3526       }
3527 
3528       // We can now guarantee that the global stack is empty, since
3529       // all other tasks have finished. We separated the guarantees so
3530       // that, if a condition is false, we can immediately find out
3531       // which one.
3532       guarantee(_cm->out_of_regions(), "only way to reach here");
3533       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3534       guarantee(_task_queue->size() == 0, "only way to reach here");
3535       guarantee(!_cm->has_overflown(), "only way to reach here");
3536       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3537     } else {
3538       // Apparently there's more work to do. Let's abort this task. It
3539       // will restart it and we can hopefully find more things to do.
3540       set_has_aborted();
3541     }
3542   }
3543 
3544   // Mainly for debugging purposes to make sure that a pointer to the
3545   // closure which was statically allocated in this frame doesn't
3546   // escape it by accident.
3547   set_cm_oop_closure(NULL);
3548   double end_time_ms = os::elapsedVTime() * 1000.0;
3549   double elapsed_time_ms = end_time_ms - _start_time_ms;
3550   // Update the step history.
3551   _step_times_ms.add(elapsed_time_ms);
3552 
3553   if (has_aborted()) {
3554     // The task was aborted for some reason.
3555     if (_has_timed_out) {
3556       double diff_ms = elapsed_time_ms - _time_target_ms;
3557       // Keep statistics of how well we did with respect to hitting
3558       // our target only if we actually timed out (if we aborted for
3559       // other reasons, then the results might get skewed).
3560       _marking_step_diffs_ms.add(diff_ms);
3561     }
3562 
3563     if (_cm->has_overflown()) {
3564       // This is the interesting one. We aborted because a global
3565       // overflow was raised. This means we have to restart the
3566       // marking phase and start iterating over regions. However, in
3567       // order to do this we have to make sure that all tasks stop
3568       // what they are doing and re-initialize in a safe manner. We
3569       // will achieve this with the use of two barrier sync points.
3570 
3571       if (!is_serial) {
3572         // We only need to enter the sync barrier if being called
3573         // from a parallel context
3574         _cm->enter_first_sync_barrier(_worker_id);
3575 
3576         // When we exit this sync barrier we know that all tasks have
3577         // stopped doing marking work. So, it's now safe to
3578         // re-initialize our data structures. At the end of this method,
3579         // task 0 will clear the global data structures.
3580       }
3581 
3582       // We clear the local state of this task...
3583       clear_region_fields();
3584 
3585       if (!is_serial) {
3586         // ...and enter the second barrier.
3587         _cm->enter_second_sync_barrier(_worker_id);
3588       }
3589       // At this point, if we're during the concurrent phase of
3590       // marking, everything has been re-initialized and we're
3591       // ready to restart.
3592     }
3593   }
3594 
3595   _claimed = false;
3596 }
3597 
3598 CMTask::CMTask(uint worker_id,
3599                ConcurrentMark* cm,
3600                size_t* marked_bytes,
3601                BitMap* card_bm,
3602                CMTaskQueue* task_queue,
3603                CMTaskQueueSet* task_queues)
3604   : _g1h(G1CollectedHeap::heap()),
3605     _worker_id(worker_id), _cm(cm),
3606     _claimed(false),
3607     _nextMarkBitMap(NULL), _hash_seed(17),
3608     _task_queue(task_queue),
3609     _task_queues(task_queues),
3610     _cm_oop_closure(NULL),
3611     _marked_bytes_array(marked_bytes),
3612     _card_bm(card_bm) {
3613   guarantee(task_queue != NULL, "invariant");
3614   guarantee(task_queues != NULL, "invariant");
3615 
3616   _marking_step_diffs_ms.add(0.5);
3617 }
3618 
3619 // These are formatting macros that are used below to ensure
3620 // consistent formatting. The *_H_* versions are used to format the
3621 // header for a particular value and they should be kept consistent
3622 // with the corresponding macro. Also note that most of the macros add
3623 // the necessary white space (as a prefix) which makes them a bit
3624 // easier to compose.
3625 
3626 // All the output lines are prefixed with this string to be able to
3627 // identify them easily in a large log file.
3628 #define G1PPRL_LINE_PREFIX            "###"
3629 
3630 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3631 #ifdef _LP64
3632 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3633 #else // _LP64
3634 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3635 #endif // _LP64
3636 
3637 // For per-region info
3638 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3639 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3640 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3641 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3642 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3643 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3644 
3645 // For summary info
3646 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3647 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3648 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3649 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3650 
3651 G1PrintRegionLivenessInfoClosure::
3652 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3653   : _total_used_bytes(0), _total_capacity_bytes(0),
3654     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3655     _hum_used_bytes(0), _hum_capacity_bytes(0),
3656     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3657     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3658   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3659   MemRegion g1_reserved = g1h->g1_reserved();
3660   double now = os::elapsedTime();
3661 
3662   // Print the header of the output.
3663   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3664   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3665                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3666                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3667                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3668                           HeapRegion::GrainBytes);
3669   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3670   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3671                           G1PPRL_TYPE_H_FORMAT
3672                           G1PPRL_ADDR_BASE_H_FORMAT
3673                           G1PPRL_BYTE_H_FORMAT
3674                           G1PPRL_BYTE_H_FORMAT
3675                           G1PPRL_BYTE_H_FORMAT
3676                           G1PPRL_DOUBLE_H_FORMAT
3677                           G1PPRL_BYTE_H_FORMAT
3678                           G1PPRL_BYTE_H_FORMAT,
3679                           "type", "address-range",
3680                           "used", "prev-live", "next-live", "gc-eff",
3681                           "remset", "code-roots");
3682   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3683                           G1PPRL_TYPE_H_FORMAT
3684                           G1PPRL_ADDR_BASE_H_FORMAT
3685                           G1PPRL_BYTE_H_FORMAT
3686                           G1PPRL_BYTE_H_FORMAT
3687                           G1PPRL_BYTE_H_FORMAT
3688                           G1PPRL_DOUBLE_H_FORMAT
3689                           G1PPRL_BYTE_H_FORMAT
3690                           G1PPRL_BYTE_H_FORMAT,
3691                           "", "",
3692                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3693                           "(bytes)", "(bytes)");
3694 }
3695 
3696 // It takes as a parameter a reference to one of the _hum_* fields, it
3697 // deduces the corresponding value for a region in a humongous region
3698 // series (either the region size, or what's left if the _hum_* field
3699 // is < the region size), and updates the _hum_* field accordingly.
3700 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3701   size_t bytes = 0;
3702   // The > 0 check is to deal with the prev and next live bytes which
3703   // could be 0.
3704   if (*hum_bytes > 0) {
3705     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3706     *hum_bytes -= bytes;
3707   }
3708   return bytes;
3709 }
3710 
3711 // It deduces the values for a region in a humongous region series
3712 // from the _hum_* fields and updates those accordingly. It assumes
3713 // that that _hum_* fields have already been set up from the "starts
3714 // humongous" region and we visit the regions in address order.
3715 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3716                                                      size_t* capacity_bytes,
3717                                                      size_t* prev_live_bytes,
3718                                                      size_t* next_live_bytes) {
3719   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3720   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3721   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3722   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3723   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3724 }
3725 
3726 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3727   const char* type       = r->get_type_str();
3728   HeapWord* bottom       = r->bottom();
3729   HeapWord* end          = r->end();
3730   size_t capacity_bytes  = r->capacity();
3731   size_t used_bytes      = r->used();
3732   size_t prev_live_bytes = r->live_bytes();
3733   size_t next_live_bytes = r->next_live_bytes();
3734   double gc_eff          = r->gc_efficiency();
3735   size_t remset_bytes    = r->rem_set()->mem_size();
3736   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3737 
3738   if (r->is_starts_humongous()) {
3739     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3740            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3741            "they should have been zeroed after the last time we used them");
3742     // Set up the _hum_* fields.
3743     _hum_capacity_bytes  = capacity_bytes;
3744     _hum_used_bytes      = used_bytes;
3745     _hum_prev_live_bytes = prev_live_bytes;
3746     _hum_next_live_bytes = next_live_bytes;
3747     get_hum_bytes(&used_bytes, &capacity_bytes,
3748                   &prev_live_bytes, &next_live_bytes);
3749     end = bottom + HeapRegion::GrainWords;
3750   } else if (r->is_continues_humongous()) {
3751     get_hum_bytes(&used_bytes, &capacity_bytes,
3752                   &prev_live_bytes, &next_live_bytes);
3753     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3754   }
3755 
3756   _total_used_bytes      += used_bytes;
3757   _total_capacity_bytes  += capacity_bytes;
3758   _total_prev_live_bytes += prev_live_bytes;
3759   _total_next_live_bytes += next_live_bytes;
3760   _total_remset_bytes    += remset_bytes;
3761   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3762 
3763   // Print a line for this particular region.
3764   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3765                           G1PPRL_TYPE_FORMAT
3766                           G1PPRL_ADDR_BASE_FORMAT
3767                           G1PPRL_BYTE_FORMAT
3768                           G1PPRL_BYTE_FORMAT
3769                           G1PPRL_BYTE_FORMAT
3770                           G1PPRL_DOUBLE_FORMAT
3771                           G1PPRL_BYTE_FORMAT
3772                           G1PPRL_BYTE_FORMAT,
3773                           type, p2i(bottom), p2i(end),
3774                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3775                           remset_bytes, strong_code_roots_bytes);
3776 
3777   return false;
3778 }
3779 
3780 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3781   // add static memory usages to remembered set sizes
3782   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3783   // Print the footer of the output.
3784   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3785   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3786                          " SUMMARY"
3787                          G1PPRL_SUM_MB_FORMAT("capacity")
3788                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3789                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3790                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3791                          G1PPRL_SUM_MB_FORMAT("remset")
3792                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3793                          bytes_to_mb(_total_capacity_bytes),
3794                          bytes_to_mb(_total_used_bytes),
3795                          perc(_total_used_bytes, _total_capacity_bytes),
3796                          bytes_to_mb(_total_prev_live_bytes),
3797                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3798                          bytes_to_mb(_total_next_live_bytes),
3799                          perc(_total_next_live_bytes, _total_capacity_bytes),
3800                          bytes_to_mb(_total_remset_bytes),
3801                          bytes_to_mb(_total_strong_code_roots_bytes));
3802 }