Old src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, 1U);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 466   _g1h(g1h),
 467   _markBitMap1(),
 468   _markBitMap2(),
 469   _parallel_marking_threads(0),
 470   _max_parallel_marking_threads(0),
 471   _sleep_factor(0.0),
 472   _marking_task_overhead(1.0),
 473   _cleanup_sleep_factor(0.0),
 474   _cleanup_task_overhead(1.0),
 475   _cleanup_list("Cleanup List"),
 476   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 477   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 478             CardTableModRefBS::card_shift,
 479             false /* in_resource_area*/),
 480 
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483 
 484   _markStack(this),
 485   // _finger set in set_non_marking_state
 486 
 487   _max_worker_id(ParallelGCThreads),
 488   // _active_tasks set in set_non_marking_state
 489   // _tasks set inside the constructor
 490   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 491   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 492 
 493   _has_overflown(false),
 494   _concurrent(false),
 495   _has_aborted(false),
 496   _restart_for_overflow(false),
 497   _concurrent_marking_in_progress(false),
 498 
 499   // _verbose_level set below
 500 
 501   _init_times(),
 502   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 503   _cleanup_times(),
 504   _total_counting_time(0.0),
 505   _total_rs_scrub_time(0.0),
 506 
 507   _parallel_workers(NULL),
 508 
 509   _count_card_bitmaps(NULL),
 510   _count_marked_bytes(NULL),
 511   _completed_initialization(false) {
 512 
 513   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 514   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 515 
 516   // Create & start a ConcurrentMark thread.
 517   _cmThread = new ConcurrentMarkThread(this);
 518   assert(cmThread() != NULL, "CM Thread should have been created");
 519   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 520   if (_cmThread->osthread() == NULL) {
 521       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 522   }
 523 
 524   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 525   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 526   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 527 
 528   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 529   satb_qs.set_buffer_size(G1SATBBufferSize);
 530 
 531   _root_regions.init(_g1h, this);
 532 
 533   if (ConcGCThreads > ParallelGCThreads) {
 534     warning("Can't have more ConcGCThreads (%u) "
 535             "than ParallelGCThreads (%u).",
 536             ConcGCThreads, ParallelGCThreads);
 537     return;
 538   }
 539   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 540     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 541     // if both are set
 542     _sleep_factor             = 0.0;
 543     _marking_task_overhead    = 1.0;
 544   } else if (G1MarkingOverheadPercent > 0) {
 545     // We will calculate the number of parallel marking threads based
 546     // on a target overhead with respect to the soft real-time goal
 547     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 548     double overall_cm_overhead =
 549       (double) MaxGCPauseMillis * marking_overhead /
 550       (double) GCPauseIntervalMillis;
 551     double cpu_ratio = 1.0 / (double) os::processor_count();
 552     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 553     double marking_task_overhead =
 554       overall_cm_overhead / marking_thread_num *
 555                                               (double) os::processor_count();
 556     double sleep_factor =
 557                        (1.0 - marking_task_overhead) / marking_task_overhead;
 558 
 559     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 560     _sleep_factor             = sleep_factor;
 561     _marking_task_overhead    = marking_task_overhead;
 562   } else {
 563     // Calculate the number of parallel marking threads by scaling
 564     // the number of parallel GC threads.
 565     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 566     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 567     _sleep_factor             = 0.0;
 568     _marking_task_overhead    = 1.0;
 569   }
 570 
 571   assert(ConcGCThreads > 0, "Should have been set");
 572   _parallel_marking_threads = ConcGCThreads;
 573   _max_parallel_marking_threads = _parallel_marking_threads;
 574 
 575   if (parallel_marking_threads() > 1) {
 576     _cleanup_task_overhead = 1.0;
 577   } else {
 578     _cleanup_task_overhead = marking_task_overhead();
 579   }
 580   _cleanup_sleep_factor =
 581                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 582 
 583 #if 0
 584   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 585   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 586   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 587   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 588   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 589 #endif
 590 
 591   _parallel_workers = new WorkGang("G1 Marker",
 592        _max_parallel_marking_threads, false, true);
 593   if (_parallel_workers == NULL) {
 594     vm_exit_during_initialization("Failed necessary allocation.");
 595   } else {
 596     _parallel_workers->initialize_workers();
 597   }
 598 
 599   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 600     size_t mark_stack_size =
 601       MIN2(MarkStackSizeMax,
 602           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 603     // Verify that the calculated value for MarkStackSize is in range.
 604     // It would be nice to use the private utility routine from Arguments.
 605     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 606       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 607               "must be between 1 and " SIZE_FORMAT,
 608               mark_stack_size, MarkStackSizeMax);
 609       return;
 610     }
 611     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 612   } else {
 613     // Verify MarkStackSize is in range.
 614     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 615       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 616         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 617           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 618                   "must be between 1 and " SIZE_FORMAT,
 619                   MarkStackSize, MarkStackSizeMax);
 620           return;
 621         }
 622       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 623         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 624           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 625                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 626                   MarkStackSize, MarkStackSizeMax);
 627           return;
 628         }
 629       }
 630     }
 631   }
 632 
 633   if (!_markStack.allocate(MarkStackSize)) {
 634     warning("Failed to allocate CM marking stack");
 635     return;
 636   }
 637 
 638   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 639   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 640 
 641   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 642   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 643 
 644   BitMap::idx_t card_bm_size = _card_bm.size();
 645 
 646   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 647   _active_tasks = _max_worker_id;
 648 
 649   uint max_regions = _g1h->max_regions();
 650   for (uint i = 0; i < _max_worker_id; ++i) {
 651     CMTaskQueue* task_queue = new CMTaskQueue();
 652     task_queue->initialize();
 653     _task_queues->register_queue(i, task_queue);
 654 
 655     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 656     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 657 
 658     _tasks[i] = new CMTask(i, this,
 659                            _count_marked_bytes[i],
 660                            &_count_card_bitmaps[i],
 661                            task_queue, _task_queues);
 662 
 663     _accum_task_vtime[i] = 0.0;
 664   }
 665 
 666   // Calculate the card number for the bottom of the heap. Used
 667   // in biasing indexes into the accounting card bitmaps.
 668   _heap_bottom_card_num =
 669     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 670                                 CardTableModRefBS::card_shift);
 671 
 672   // Clear all the liveness counting data
 673   clear_all_count_data();
 674 
 675   // so that the call below can read a sensible value
 676   _heap_start = g1h->reserved_region().start();
 677   set_non_marking_state();
 678   _completed_initialization = true;
 679 }
 680 
 681 void ConcurrentMark::reset() {
 682   // Starting values for these two. This should be called in a STW
 683   // phase.
 684   MemRegion reserved = _g1h->g1_reserved();
 685   _heap_start = reserved.start();
 686   _heap_end   = reserved.end();
 687 
 688   // Separated the asserts so that we know which one fires.
 689   assert(_heap_start != NULL, "heap bounds should look ok");
 690   assert(_heap_end != NULL, "heap bounds should look ok");
 691   assert(_heap_start < _heap_end, "heap bounds should look ok");
 692 
 693   // Reset all the marking data structures and any necessary flags
 694   reset_marking_state();
 695 
 696   // We do reset all of them, since different phases will use
 697   // different number of active threads. So, it's easiest to have all
 698   // of them ready.
 699   for (uint i = 0; i < _max_worker_id; ++i) {
 700     _tasks[i]->reset(_nextMarkBitMap);
 701   }
 702 
 703   // we need this to make sure that the flag is on during the evac
 704   // pause with initial mark piggy-backed
 705   set_concurrent_marking_in_progress();
 706 }
 707 
 708 
 709 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 710   _markStack.set_should_expand();
 711   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 712   if (clear_overflow) {
 713     clear_has_overflown();
 714   } else {
 715     assert(has_overflown(), "pre-condition");
 716   }
 717   _finger = _heap_start;
 718 
 719   for (uint i = 0; i < _max_worker_id; ++i) {
 720     CMTaskQueue* queue = _task_queues->queue(i);
 721     queue->set_empty();
 722   }
 723 }
 724 
 725 void ConcurrentMark::set_concurrency(uint active_tasks) {
 726   assert(active_tasks <= _max_worker_id, "we should not have more");
 727 
 728   _active_tasks = active_tasks;
 729   // Need to update the three data structures below according to the
 730   // number of active threads for this phase.
 731   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 732   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 733   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 734 }
 735 
 736 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 737   set_concurrency(active_tasks);
 738 
 739   _concurrent = concurrent;
 740   // We propagate this to all tasks, not just the active ones.
 741   for (uint i = 0; i < _max_worker_id; ++i)
 742     _tasks[i]->set_concurrent(concurrent);
 743 
 744   if (concurrent) {
 745     set_concurrent_marking_in_progress();
 746   } else {
 747     // We currently assume that the concurrent flag has been set to
 748     // false before we start remark. At this point we should also be
 749     // in a STW phase.
 750     assert(!concurrent_marking_in_progress(), "invariant");
 751     assert(out_of_regions(),
 752            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 753            p2i(_finger), p2i(_heap_end));
 754   }
 755 }
 756 
 757 void ConcurrentMark::set_non_marking_state() {
 758   // We set the global marking state to some default values when we're
 759   // not doing marking.
 760   reset_marking_state();
 761   _active_tasks = 0;
 762   clear_concurrent_marking_in_progress();
 763 }
 764 
 765 ConcurrentMark::~ConcurrentMark() {
 766   // The ConcurrentMark instance is never freed.
 767   ShouldNotReachHere();
 768 }
 769 
 770 void ConcurrentMark::clearNextBitmap() {
 771   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 772 
 773   // Make sure that the concurrent mark thread looks to still be in
 774   // the current cycle.
 775   guarantee(cmThread()->during_cycle(), "invariant");
 776 
 777   // We are finishing up the current cycle by clearing the next
 778   // marking bitmap and getting it ready for the next cycle. During
 779   // this time no other cycle can start. So, let's make sure that this
 780   // is the case.
 781   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 782 
 783   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 784   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 785   _parallel_workers->run_task(&task);
 786 
 787   // Clear the liveness counting data. If the marking has been aborted, the abort()
 788   // call already did that.
 789   if (cl.complete()) {
 790     clear_all_count_data();
 791   }
 792 
 793   // Repeat the asserts from above.
 794   guarantee(cmThread()->during_cycle(), "invariant");
 795   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 796 }
 797 
 798 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 799   CMBitMap* _bitmap;
 800   bool _error;
 801  public:
 802   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 803   }
 804 
 805   virtual bool doHeapRegion(HeapRegion* r) {
 806     // This closure can be called concurrently to the mutator, so we must make sure
 807     // that the result of the getNextMarkedWordAddress() call is compared to the
 808     // value passed to it as limit to detect any found bits.
 809     // We can use the region's orig_end() for the limit and the comparison value
 810     // as it always contains the "real" end of the region that never changes and
 811     // has no side effects.
 812     // Due to the latter, there can also be no problem with the compiler generating
 813     // reloads of the orig_end() call.
 814     HeapWord* end = r->orig_end();
 815     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 816   }
 817 };
 818 
 819 bool ConcurrentMark::nextMarkBitmapIsClear() {
 820   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 821   _g1h->heap_region_iterate(&cl);
 822   return cl.complete();
 823 }
 824 
 825 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 826 public:
 827   bool doHeapRegion(HeapRegion* r) {
 828     if (!r->is_continues_humongous()) {
 829       r->note_start_of_marking();
 830     }
 831     return false;
 832   }
 833 };
 834 
 835 void ConcurrentMark::checkpointRootsInitialPre() {
 836   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 837   G1CollectorPolicy* g1p = g1h->g1_policy();
 838 
 839   _has_aborted = false;
 840 
 841   // Initialize marking structures. This has to be done in a STW phase.
 842   reset();
 843 
 844   // For each region note start of marking.
 845   NoteStartOfMarkHRClosure startcl;
 846   g1h->heap_region_iterate(&startcl);
 847 }
 848 
 849 
 850 void ConcurrentMark::checkpointRootsInitialPost() {
 851   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 852 
 853   // If we force an overflow during remark, the remark operation will
 854   // actually abort and we'll restart concurrent marking. If we always
 855   // force an overflow during remark we'll never actually complete the
 856   // marking phase. So, we initialize this here, at the start of the
 857   // cycle, so that at the remaining overflow number will decrease at
 858   // every remark and we'll eventually not need to cause one.
 859   force_overflow_stw()->init();
 860 
 861   // Start Concurrent Marking weak-reference discovery.
 862   ReferenceProcessor* rp = g1h->ref_processor_cm();
 863   // enable ("weak") refs discovery
 864   rp->enable_discovery();
 865   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 866 
 867   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 868   // This is the start of  the marking cycle, we're expected all
 869   // threads to have SATB queues with active set to false.
 870   satb_mq_set.set_active_all_threads(true, /* new active value */
 871                                      false /* expected_active */);
 872 
 873   _root_regions.prepare_for_scan();
 874 
 875   // update_g1_committed() will be called at the end of an evac pause
 876   // when marking is on. So, it's also called at the end of the
 877   // initial-mark pause to update the heap end, if the heap expands
 878   // during it. No need to call it here.
 879 }
 880 
 881 /*
 882  * Notice that in the next two methods, we actually leave the STS
 883  * during the barrier sync and join it immediately afterwards. If we
 884  * do not do this, the following deadlock can occur: one thread could
 885  * be in the barrier sync code, waiting for the other thread to also
 886  * sync up, whereas another one could be trying to yield, while also
 887  * waiting for the other threads to sync up too.
 888  *
 889  * Note, however, that this code is also used during remark and in
 890  * this case we should not attempt to leave / enter the STS, otherwise
 891  * we'll either hit an assert (debug / fastdebug) or deadlock
 892  * (product). So we should only leave / enter the STS if we are
 893  * operating concurrently.
 894  *
 895  * Because the thread that does the sync barrier has left the STS, it
 896  * is possible to be suspended for a Full GC or an evacuation pause
 897  * could occur. This is actually safe, since the entering the sync
 898  * barrier is one of the last things do_marking_step() does, and it
 899  * doesn't manipulate any data structures afterwards.
 900  */
 901 
 902 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 903   bool barrier_aborted;
 904   {
 905     SuspendibleThreadSetLeaver sts_leave(concurrent());
 906     barrier_aborted = !_first_overflow_barrier_sync.enter();
 907   }
 908 
 909   // at this point everyone should have synced up and not be doing any
 910   // more work
 911 
 912   if (barrier_aborted) {
 913     // If the barrier aborted we ignore the overflow condition and
 914     // just abort the whole marking phase as quickly as possible.
 915     return;
 916   }
 917 
 918   // If we're executing the concurrent phase of marking, reset the marking
 919   // state; otherwise the marking state is reset after reference processing,
 920   // during the remark pause.
 921   // If we reset here as a result of an overflow during the remark we will
 922   // see assertion failures from any subsequent set_concurrency_and_phase()
 923   // calls.
 924   if (concurrent()) {
 925     // let the task associated with with worker 0 do this
 926     if (worker_id == 0) {
 927       // task 0 is responsible for clearing the global data structures
 928       // We should be here because of an overflow. During STW we should
 929       // not clear the overflow flag since we rely on it being true when
 930       // we exit this method to abort the pause and restart concurrent
 931       // marking.
 932       reset_marking_state(true /* clear_overflow */);
 933       force_overflow()->update();
 934 
 935       if (G1Log::fine()) {
 936         gclog_or_tty->gclog_stamp();
 937         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 938       }
 939     }
 940   }
 941 
 942   // after this, each task should reset its own data structures then
 943   // then go into the second barrier
 944 }
 945 
 946 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 947   SuspendibleThreadSetLeaver sts_leave(concurrent());
 948   _second_overflow_barrier_sync.enter();
 949 
 950   // at this point everything should be re-initialized and ready to go
 951 }
 952 
 953 #ifndef PRODUCT
 954 void ForceOverflowSettings::init() {
 955   _num_remaining = G1ConcMarkForceOverflow;
 956   _force = false;
 957   update();
 958 }
 959 
 960 void ForceOverflowSettings::update() {
 961   if (_num_remaining > 0) {
 962     _num_remaining -= 1;
 963     _force = true;
 964   } else {
 965     _force = false;
 966   }
 967 }
 968 
 969 bool ForceOverflowSettings::should_force() {
 970   if (_force) {
 971     _force = false;
 972     return true;
 973   } else {
 974     return false;
 975   }
 976 }
 977 #endif // !PRODUCT
 978 
 979 class CMConcurrentMarkingTask: public AbstractGangTask {
 980 private:
 981   ConcurrentMark*       _cm;
 982   ConcurrentMarkThread* _cmt;
 983 
 984 public:
 985   void work(uint worker_id) {
 986     assert(Thread::current()->is_ConcurrentGC_thread(),
 987            "this should only be done by a conc GC thread");
 988     ResourceMark rm;
 989 
 990     double start_vtime = os::elapsedVTime();
 991 
 992     {
 993       SuspendibleThreadSetJoiner sts_join;
 994 
 995       assert(worker_id < _cm->active_tasks(), "invariant");
 996       CMTask* the_task = _cm->task(worker_id);
 997       the_task->record_start_time();
 998       if (!_cm->has_aborted()) {
 999         do {
1000           double start_vtime_sec = os::elapsedVTime();
1001           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1002 
1003           the_task->do_marking_step(mark_step_duration_ms,
1004                                     true  /* do_termination */,
1005                                     false /* is_serial*/);
1006 
1007           double end_vtime_sec = os::elapsedVTime();
1008           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1009           _cm->clear_has_overflown();
1010 
1011           _cm->do_yield_check(worker_id);
1012 
1013           jlong sleep_time_ms;
1014           if (!_cm->has_aborted() && the_task->has_aborted()) {
1015             sleep_time_ms =
1016               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1017             {
1018               SuspendibleThreadSetLeaver sts_leave;
1019               os::sleep(Thread::current(), sleep_time_ms, false);
1020             }
1021           }
1022         } while (!_cm->has_aborted() && the_task->has_aborted());
1023       }
1024       the_task->record_end_time();
1025       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1026     }
1027 
1028     double end_vtime = os::elapsedVTime();
1029     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1030   }
1031 
1032   CMConcurrentMarkingTask(ConcurrentMark* cm,
1033                           ConcurrentMarkThread* cmt) :
1034       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1035 
1036   ~CMConcurrentMarkingTask() { }
1037 };
1038 
1039 // Calculates the number of active workers for a concurrent
1040 // phase.
1041 uint ConcurrentMark::calc_parallel_marking_threads() {
1042   uint n_conc_workers = 0;
1043   if (!UseDynamicNumberOfGCThreads ||
1044       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1045        !ForceDynamicNumberOfGCThreads)) {
1046     n_conc_workers = max_parallel_marking_threads();
1047   } else {
1048     n_conc_workers =
1049       AdaptiveSizePolicy::calc_default_active_workers(
1050                                    max_parallel_marking_threads(),
1051                                    1, /* Minimum workers */
1052                                    parallel_marking_threads(),
1053                                    Threads::number_of_non_daemon_threads());
1054     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1055     // that scaling has already gone into "_max_parallel_marking_threads".
1056   }
1057   assert(n_conc_workers > 0, "Always need at least 1");
1058   return n_conc_workers;
1059 }
1060 
1061 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1062   // Currently, only survivors can be root regions.
1063   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1064   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1065 
1066   const uintx interval = PrefetchScanIntervalInBytes;
1067   HeapWord* curr = hr->bottom();
1068   const HeapWord* end = hr->top();
1069   while (curr < end) {
1070     Prefetch::read(curr, interval);
1071     oop obj = oop(curr);
1072     int size = obj->oop_iterate_size(&cl);
1073     assert(size == obj->size(), "sanity");
1074     curr += size;
1075   }
1076 }
1077 
1078 class CMRootRegionScanTask : public AbstractGangTask {
1079 private:
1080   ConcurrentMark* _cm;
1081 
1082 public:
1083   CMRootRegionScanTask(ConcurrentMark* cm) :
1084     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1085 
1086   void work(uint worker_id) {
1087     assert(Thread::current()->is_ConcurrentGC_thread(),
1088            "this should only be done by a conc GC thread");
1089 
1090     CMRootRegions* root_regions = _cm->root_regions();
1091     HeapRegion* hr = root_regions->claim_next();
1092     while (hr != NULL) {
1093       _cm->scanRootRegion(hr, worker_id);
1094       hr = root_regions->claim_next();
1095     }
1096   }
1097 };
1098 
1099 void ConcurrentMark::scanRootRegions() {
1100   double scan_start = os::elapsedTime();
1101 
1102   // Start of concurrent marking.
1103   ClassLoaderDataGraph::clear_claimed_marks();
1104 
1105   // scan_in_progress() will have been set to true only if there was
1106   // at least one root region to scan. So, if it's false, we
1107   // should not attempt to do any further work.
1108   if (root_regions()->scan_in_progress()) {
1109     if (G1Log::fine()) {
1110       gclog_or_tty->gclog_stamp();
1111       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1112     }
1113 
1114     _parallel_marking_threads = calc_parallel_marking_threads();
1115     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1116            "Maximum number of marking threads exceeded");
1117     uint active_workers = MAX2(1U, parallel_marking_threads());
1118 
1119     CMRootRegionScanTask task(this);
1120     _parallel_workers->set_active_workers(active_workers);
1121     _parallel_workers->run_task(&task);
1122 
1123     if (G1Log::fine()) {
1124       gclog_or_tty->gclog_stamp();
1125       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1126     }
1127 
1128     // It's possible that has_aborted() is true here without actually
1129     // aborting the survivor scan earlier. This is OK as it's
1130     // mainly used for sanity checking.
1131     root_regions()->scan_finished();
1132   }
1133 }
1134 
1135 void ConcurrentMark::markFromRoots() {
1136   // we might be tempted to assert that:
1137   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1138   //        "inconsistent argument?");
1139   // However that wouldn't be right, because it's possible that
1140   // a safepoint is indeed in progress as a younger generation
1141   // stop-the-world GC happens even as we mark in this generation.
1142 
1143   _restart_for_overflow = false;
1144   force_overflow_conc()->init();
1145 
1146   // _g1h has _n_par_threads
1147   _parallel_marking_threads = calc_parallel_marking_threads();
1148   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1149     "Maximum number of marking threads exceeded");
1150 
1151   uint active_workers = MAX2(1U, parallel_marking_threads());
1152   assert(active_workers > 0, "Should have been set");
1153 
1154   // Parallel task terminator is set in "set_concurrency_and_phase()"
1155   set_concurrency_and_phase(active_workers, true /* concurrent */);
1156 
1157   CMConcurrentMarkingTask markingTask(this, cmThread());
1158   _parallel_workers->set_active_workers(active_workers);
1159   _parallel_workers->run_task(&markingTask);
1160   print_stats();
1161 }
1162 
1163 // Helper class to get rid of some boilerplate code.
1164 class G1CMTraceTime : public StackObj {
1165   GCTraceTimeImpl _gc_trace_time;
1166   static bool doit_and_prepend(bool doit) {
1167     if (doit) {
1168       gclog_or_tty->put(' ');
1169     }
1170     return doit;
1171   }
1172 
1173  public:
1174   G1CMTraceTime(const char* title, bool doit)
1175     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1176   }
1177 };
1178 
1179 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1180   // world is stopped at this checkpoint
1181   assert(SafepointSynchronize::is_at_safepoint(),
1182          "world should be stopped");
1183 
1184   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1185 
1186   // If a full collection has happened, we shouldn't do this.
1187   if (has_aborted()) {
1188     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1189     return;
1190   }
1191 
1192   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1193 
1194   if (VerifyDuringGC) {
1195     HandleMark hm;  // handle scope
1196     g1h->prepare_for_verify();
1197     Universe::verify(VerifyOption_G1UsePrevMarking,
1198                      " VerifyDuringGC:(before)");
1199   }
1200   g1h->check_bitmaps("Remark Start");
1201 
1202   G1CollectorPolicy* g1p = g1h->g1_policy();
1203   g1p->record_concurrent_mark_remark_start();
1204 
1205   double start = os::elapsedTime();
1206 
1207   checkpointRootsFinalWork();
1208 
1209   double mark_work_end = os::elapsedTime();
1210 
1211   weakRefsWork(clear_all_soft_refs);
1212 
1213   if (has_overflown()) {
1214     // Oops.  We overflowed.  Restart concurrent marking.
1215     _restart_for_overflow = true;
1216     if (G1TraceMarkStackOverflow) {
1217       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1218     }
1219 
1220     // Verify the heap w.r.t. the previous marking bitmap.
1221     if (VerifyDuringGC) {
1222       HandleMark hm;  // handle scope
1223       g1h->prepare_for_verify();
1224       Universe::verify(VerifyOption_G1UsePrevMarking,
1225                        " VerifyDuringGC:(overflow)");
1226     }
1227 
1228     // Clear the marking state because we will be restarting
1229     // marking due to overflowing the global mark stack.
1230     reset_marking_state();
1231   } else {
1232     {
1233       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1234 
1235       // Aggregate the per-task counting data that we have accumulated
1236       // while marking.
1237       aggregate_count_data();
1238     }
1239 
1240     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1241     // We're done with marking.
1242     // This is the end of  the marking cycle, we're expected all
1243     // threads to have SATB queues with active set to true.
1244     satb_mq_set.set_active_all_threads(false, /* new active value */
1245                                        true /* expected_active */);
1246 
1247     if (VerifyDuringGC) {
1248       HandleMark hm;  // handle scope
1249       g1h->prepare_for_verify();
1250       Universe::verify(VerifyOption_G1UseNextMarking,
1251                        " VerifyDuringGC:(after)");
1252     }
1253     g1h->check_bitmaps("Remark End");
1254     assert(!restart_for_overflow(), "sanity");
1255     // Completely reset the marking state since marking completed
1256     set_non_marking_state();
1257   }
1258 
1259   // Expand the marking stack, if we have to and if we can.
1260   if (_markStack.should_expand()) {
1261     _markStack.expand();
1262   }
1263 
1264   // Statistics
1265   double now = os::elapsedTime();
1266   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1267   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1268   _remark_times.add((now - start) * 1000.0);
1269 
1270   g1p->record_concurrent_mark_remark_end();
1271 
1272   G1CMIsAliveClosure is_alive(g1h);
1273   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1274 }
1275 
1276 // Base class of the closures that finalize and verify the
1277 // liveness counting data.
1278 class CMCountDataClosureBase: public HeapRegionClosure {
1279 protected:
1280   G1CollectedHeap* _g1h;
1281   ConcurrentMark* _cm;
1282   CardTableModRefBS* _ct_bs;
1283 
1284   BitMap* _region_bm;
1285   BitMap* _card_bm;
1286 
1287   // Takes a region that's not empty (i.e., it has at least one
1288   // live object in it and sets its corresponding bit on the region
1289   // bitmap to 1. If the region is "starts humongous" it will also set
1290   // to 1 the bits on the region bitmap that correspond to its
1291   // associated "continues humongous" regions.
1292   void set_bit_for_region(HeapRegion* hr) {
1293     assert(!hr->is_continues_humongous(), "should have filtered those out");
1294 
1295     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1296     if (!hr->is_starts_humongous()) {
1297       // Normal (non-humongous) case: just set the bit.
1298       _region_bm->par_at_put(index, true);
1299     } else {
1300       // Starts humongous case: calculate how many regions are part of
1301       // this humongous region and then set the bit range.
1302       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1303       _region_bm->par_at_put_range(index, end_index, true);
1304     }
1305   }
1306 
1307 public:
1308   CMCountDataClosureBase(G1CollectedHeap* g1h,
1309                          BitMap* region_bm, BitMap* card_bm):
1310     _g1h(g1h), _cm(g1h->concurrent_mark()),
1311     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1312     _region_bm(region_bm), _card_bm(card_bm) { }
1313 };
1314 
1315 // Closure that calculates the # live objects per region. Used
1316 // for verification purposes during the cleanup pause.
1317 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1318   CMBitMapRO* _bm;
1319   size_t _region_marked_bytes;
1320 
1321 public:
1322   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1323                          BitMap* region_bm, BitMap* card_bm) :
1324     CMCountDataClosureBase(g1h, region_bm, card_bm),
1325     _bm(bm), _region_marked_bytes(0) { }
1326 
1327   bool doHeapRegion(HeapRegion* hr) {
1328 
1329     if (hr->is_continues_humongous()) {
1330       // We will ignore these here and process them when their
1331       // associated "starts humongous" region is processed (see
1332       // set_bit_for_heap_region()). Note that we cannot rely on their
1333       // associated "starts humongous" region to have their bit set to
1334       // 1 since, due to the region chunking in the parallel region
1335       // iteration, a "continues humongous" region might be visited
1336       // before its associated "starts humongous".
1337       return false;
1338     }
1339 
1340     HeapWord* ntams = hr->next_top_at_mark_start();
1341     HeapWord* start = hr->bottom();
1342 
1343     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1344            "Preconditions not met - "
1345            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1346            p2i(start), p2i(ntams), p2i(hr->end()));
1347 
1348     // Find the first marked object at or after "start".
1349     start = _bm->getNextMarkedWordAddress(start, ntams);
1350 
1351     size_t marked_bytes = 0;
1352 
1353     while (start < ntams) {
1354       oop obj = oop(start);
1355       int obj_sz = obj->size();
1356       HeapWord* obj_end = start + obj_sz;
1357 
1358       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1359       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1360 
1361       // Note: if we're looking at the last region in heap - obj_end
1362       // could be actually just beyond the end of the heap; end_idx
1363       // will then correspond to a (non-existent) card that is also
1364       // just beyond the heap.
1365       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1366         // end of object is not card aligned - increment to cover
1367         // all the cards spanned by the object
1368         end_idx += 1;
1369       }
1370 
1371       // Set the bits in the card BM for the cards spanned by this object.
1372       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1373 
1374       // Add the size of this object to the number of marked bytes.
1375       marked_bytes += (size_t)obj_sz * HeapWordSize;
1376 
1377       // Find the next marked object after this one.
1378       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1379     }
1380 
1381     // Mark the allocated-since-marking portion...
1382     HeapWord* top = hr->top();
1383     if (ntams < top) {
1384       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1385       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1386 
1387       // Note: if we're looking at the last region in heap - top
1388       // could be actually just beyond the end of the heap; end_idx
1389       // will then correspond to a (non-existent) card that is also
1390       // just beyond the heap.
1391       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1392         // end of object is not card aligned - increment to cover
1393         // all the cards spanned by the object
1394         end_idx += 1;
1395       }
1396       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1397 
1398       // This definitely means the region has live objects.
1399       set_bit_for_region(hr);
1400     }
1401 
1402     // Update the live region bitmap.
1403     if (marked_bytes > 0) {
1404       set_bit_for_region(hr);
1405     }
1406 
1407     // Set the marked bytes for the current region so that
1408     // it can be queried by a calling verification routine
1409     _region_marked_bytes = marked_bytes;
1410 
1411     return false;
1412   }
1413 
1414   size_t region_marked_bytes() const { return _region_marked_bytes; }
1415 };
1416 
1417 // Heap region closure used for verifying the counting data
1418 // that was accumulated concurrently and aggregated during
1419 // the remark pause. This closure is applied to the heap
1420 // regions during the STW cleanup pause.
1421 
1422 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1423   G1CollectedHeap* _g1h;
1424   ConcurrentMark* _cm;
1425   CalcLiveObjectsClosure _calc_cl;
1426   BitMap* _region_bm;   // Region BM to be verified
1427   BitMap* _card_bm;     // Card BM to be verified
1428 
1429   BitMap* _exp_region_bm; // Expected Region BM values
1430   BitMap* _exp_card_bm;   // Expected card BM values
1431 
1432   int _failures;
1433 
1434 public:
1435   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1436                                 BitMap* region_bm,
1437                                 BitMap* card_bm,
1438                                 BitMap* exp_region_bm,
1439                                 BitMap* exp_card_bm) :
1440     _g1h(g1h), _cm(g1h->concurrent_mark()),
1441     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1442     _region_bm(region_bm), _card_bm(card_bm),
1443     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1444     _failures(0) { }
1445 
1446   int failures() const { return _failures; }
1447 
1448   bool doHeapRegion(HeapRegion* hr) {
1449     if (hr->is_continues_humongous()) {
1450       // We will ignore these here and process them when their
1451       // associated "starts humongous" region is processed (see
1452       // set_bit_for_heap_region()). Note that we cannot rely on their
1453       // associated "starts humongous" region to have their bit set to
1454       // 1 since, due to the region chunking in the parallel region
1455       // iteration, a "continues humongous" region might be visited
1456       // before its associated "starts humongous".
1457       return false;
1458     }
1459 
1460     int failures = 0;
1461 
1462     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1463     // this region and set the corresponding bits in the expected region
1464     // and card bitmaps.
1465     bool res = _calc_cl.doHeapRegion(hr);
1466     assert(res == false, "should be continuing");
1467 
1468     // Verify the marked bytes for this region.
1469     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1470     size_t act_marked_bytes = hr->next_marked_bytes();
1471 
1472     // We're not OK if expected marked bytes > actual marked bytes. It means
1473     // we have missed accounting some objects during the actual marking.
1474     if (exp_marked_bytes > act_marked_bytes) {
1475       failures += 1;
1476     }
1477 
1478     // Verify the bit, for this region, in the actual and expected
1479     // (which was just calculated) region bit maps.
1480     // We're not OK if the bit in the calculated expected region
1481     // bitmap is set and the bit in the actual region bitmap is not.
1482     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1483 
1484     bool expected = _exp_region_bm->at(index);
1485     bool actual = _region_bm->at(index);
1486     if (expected && !actual) {
1487       failures += 1;
1488     }
1489 
1490     // Verify that the card bit maps for the cards spanned by the current
1491     // region match. We have an error if we have a set bit in the expected
1492     // bit map and the corresponding bit in the actual bitmap is not set.
1493 
1494     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1495     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1496 
1497     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1498       expected = _exp_card_bm->at(i);
1499       actual = _card_bm->at(i);
1500 
1501       if (expected && !actual) {
1502         failures += 1;
1503       }
1504     }
1505 
1506     _failures += failures;
1507 
1508     // We could stop iteration over the heap when we
1509     // find the first violating region by returning true.
1510     return false;
1511   }
1512 };
1513 
1514 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1515 protected:
1516   G1CollectedHeap* _g1h;
1517   ConcurrentMark* _cm;
1518   BitMap* _actual_region_bm;
1519   BitMap* _actual_card_bm;
1520 
1521   uint    _n_workers;
1522 
1523   BitMap* _expected_region_bm;
1524   BitMap* _expected_card_bm;
1525 
1526   int  _failures;
1527 
1528   HeapRegionClaimer _hrclaimer;
1529 
1530 public:
1531   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1532                             BitMap* region_bm, BitMap* card_bm,
1533                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1534     : AbstractGangTask("G1 verify final counting"),
1535       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1536       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1537       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1538       _failures(0),
1539       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1540     assert(VerifyDuringGC, "don't call this otherwise");
1541     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1542     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1543   }
1544 
1545   void work(uint worker_id) {
1546     assert(worker_id < _n_workers, "invariant");
1547 
1548     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1549                                             _actual_region_bm, _actual_card_bm,
1550                                             _expected_region_bm,
1551                                             _expected_card_bm);
1552 
1553     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1554 
1555     Atomic::add(verify_cl.failures(), &_failures);
1556   }
1557 
1558   int failures() const { return _failures; }
1559 };
1560 
1561 // Closure that finalizes the liveness counting data.
1562 // Used during the cleanup pause.
1563 // Sets the bits corresponding to the interval [NTAMS, top]
1564 // (which contains the implicitly live objects) in the
1565 // card liveness bitmap. Also sets the bit for each region,
1566 // containing live data, in the region liveness bitmap.
1567 
1568 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1569  public:
1570   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1571                               BitMap* region_bm,
1572                               BitMap* card_bm) :
1573     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1574 
1575   bool doHeapRegion(HeapRegion* hr) {
1576 
1577     if (hr->is_continues_humongous()) {
1578       // We will ignore these here and process them when their
1579       // associated "starts humongous" region is processed (see
1580       // set_bit_for_heap_region()). Note that we cannot rely on their
1581       // associated "starts humongous" region to have their bit set to
1582       // 1 since, due to the region chunking in the parallel region
1583       // iteration, a "continues humongous" region might be visited
1584       // before its associated "starts humongous".
1585       return false;
1586     }
1587 
1588     HeapWord* ntams = hr->next_top_at_mark_start();
1589     HeapWord* top   = hr->top();
1590 
1591     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1592 
1593     // Mark the allocated-since-marking portion...
1594     if (ntams < top) {
1595       // This definitely means the region has live objects.
1596       set_bit_for_region(hr);
1597 
1598       // Now set the bits in the card bitmap for [ntams, top)
1599       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1600       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1601 
1602       // Note: if we're looking at the last region in heap - top
1603       // could be actually just beyond the end of the heap; end_idx
1604       // will then correspond to a (non-existent) card that is also
1605       // just beyond the heap.
1606       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1607         // end of object is not card aligned - increment to cover
1608         // all the cards spanned by the object
1609         end_idx += 1;
1610       }
1611 
1612       assert(end_idx <= _card_bm->size(),
1613              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1614              end_idx, _card_bm->size());
1615       assert(start_idx < _card_bm->size(),
1616              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1617              start_idx, _card_bm->size());
1618 
1619       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1620     }
1621 
1622     // Set the bit for the region if it contains live data
1623     if (hr->next_marked_bytes() > 0) {
1624       set_bit_for_region(hr);
1625     }
1626 
1627     return false;
1628   }
1629 };
1630 
1631 class G1ParFinalCountTask: public AbstractGangTask {
1632 protected:
1633   G1CollectedHeap* _g1h;
1634   ConcurrentMark* _cm;
1635   BitMap* _actual_region_bm;
1636   BitMap* _actual_card_bm;
1637 
1638   uint    _n_workers;
1639   HeapRegionClaimer _hrclaimer;
1640 
1641 public:
1642   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1643     : AbstractGangTask("G1 final counting"),
1644       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1645       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1646       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1647   }
1648 
1649   void work(uint worker_id) {
1650     assert(worker_id < _n_workers, "invariant");
1651 
1652     FinalCountDataUpdateClosure final_update_cl(_g1h,
1653                                                 _actual_region_bm,
1654                                                 _actual_card_bm);
1655 
1656     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1657   }
1658 };
1659 
1660 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1661   G1CollectedHeap* _g1;
1662   size_t _freed_bytes;
1663   FreeRegionList* _local_cleanup_list;
1664   HeapRegionSetCount _old_regions_removed;
1665   HeapRegionSetCount _humongous_regions_removed;
1666   HRRSCleanupTask* _hrrs_cleanup_task;
1667 
1668 public:
1669   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1670                              FreeRegionList* local_cleanup_list,
1671                              HRRSCleanupTask* hrrs_cleanup_task) :
1672     _g1(g1),
1673     _freed_bytes(0),
1674     _local_cleanup_list(local_cleanup_list),
1675     _old_regions_removed(),
1676     _humongous_regions_removed(),
1677     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1678 
1679   size_t freed_bytes() { return _freed_bytes; }
1680   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1681   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1682 
1683   bool doHeapRegion(HeapRegion *hr) {
1684     if (hr->is_continues_humongous() || hr->is_archive()) {
1685       return false;
1686     }
1687     // We use a claim value of zero here because all regions
1688     // were claimed with value 1 in the FinalCount task.
1689     _g1->reset_gc_time_stamps(hr);
1690     hr->note_end_of_marking();
1691 
1692     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1693       _freed_bytes += hr->used();
1694       hr->set_containing_set(NULL);
1695       if (hr->is_humongous()) {
1696         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1697         _humongous_regions_removed.increment(1u, hr->capacity());
1698         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1699       } else {
1700         _old_regions_removed.increment(1u, hr->capacity());
1701         _g1->free_region(hr, _local_cleanup_list, true);
1702       }
1703     } else {
1704       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1705     }
1706 
1707     return false;
1708   }
1709 };
1710 
1711 class G1ParNoteEndTask: public AbstractGangTask {
1712   friend class G1NoteEndOfConcMarkClosure;
1713 
1714 protected:
1715   G1CollectedHeap* _g1h;
1716   FreeRegionList* _cleanup_list;
1717   HeapRegionClaimer _hrclaimer;
1718 
1719 public:
1720   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1721       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1722   }
1723 
1724   void work(uint worker_id) {
1725     FreeRegionList local_cleanup_list("Local Cleanup List");
1726     HRRSCleanupTask hrrs_cleanup_task;
1727     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1728                                            &hrrs_cleanup_task);
1729     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1730     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1731 
1732     // Now update the lists
1733     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1734     {
1735       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1736       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1737 
1738       // If we iterate over the global cleanup list at the end of
1739       // cleanup to do this printing we will not guarantee to only
1740       // generate output for the newly-reclaimed regions (the list
1741       // might not be empty at the beginning of cleanup; we might
1742       // still be working on its previous contents). So we do the
1743       // printing here, before we append the new regions to the global
1744       // cleanup list.
1745 
1746       G1HRPrinter* hr_printer = _g1h->hr_printer();
1747       if (hr_printer->is_active()) {
1748         FreeRegionListIterator iter(&local_cleanup_list);
1749         while (iter.more_available()) {
1750           HeapRegion* hr = iter.get_next();
1751           hr_printer->cleanup(hr);
1752         }
1753       }
1754 
1755       _cleanup_list->add_ordered(&local_cleanup_list);
1756       assert(local_cleanup_list.is_empty(), "post-condition");
1757 
1758       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1759     }
1760   }
1761 };
1762 
1763 class G1ParScrubRemSetTask: public AbstractGangTask {
1764 protected:
1765   G1RemSet* _g1rs;
1766   BitMap* _region_bm;
1767   BitMap* _card_bm;
1768   HeapRegionClaimer _hrclaimer;
1769 
1770 public:
1771   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1772       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1773   }
1774 
1775   void work(uint worker_id) {
1776     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1777   }
1778 
1779 };
1780 
1781 void ConcurrentMark::cleanup() {
1782   // world is stopped at this checkpoint
1783   assert(SafepointSynchronize::is_at_safepoint(),
1784          "world should be stopped");
1785   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1786 
1787   // If a full collection has happened, we shouldn't do this.
1788   if (has_aborted()) {
1789     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1790     return;
1791   }
1792 
1793   g1h->verify_region_sets_optional();
1794 
1795   if (VerifyDuringGC) {
1796     HandleMark hm;  // handle scope
1797     g1h->prepare_for_verify();
1798     Universe::verify(VerifyOption_G1UsePrevMarking,
1799                      " VerifyDuringGC:(before)");
1800   }
1801   g1h->check_bitmaps("Cleanup Start");
1802 
1803   G1CollectorPolicy* g1p = g1h->g1_policy();
1804   g1p->record_concurrent_mark_cleanup_start();
1805 
1806   double start = os::elapsedTime();
1807 
1808   HeapRegionRemSet::reset_for_cleanup_tasks();
1809 
1810   // Do counting once more with the world stopped for good measure.
1811   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1812 
1813   g1h->workers()->run_task(&g1_par_count_task);
1814 
1815   if (VerifyDuringGC) {
1816     // Verify that the counting data accumulated during marking matches
1817     // that calculated by walking the marking bitmap.
1818 
1819     // Bitmaps to hold expected values
1820     BitMap expected_region_bm(_region_bm.size(), true);
1821     BitMap expected_card_bm(_card_bm.size(), true);
1822 
1823     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1824                                                  &_region_bm,
1825                                                  &_card_bm,
1826                                                  &expected_region_bm,
1827                                                  &expected_card_bm);
1828 
1829     g1h->workers()->run_task(&g1_par_verify_task);
1830 
1831     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1832   }
1833 
1834   size_t start_used_bytes = g1h->used();
1835   g1h->collector_state()->set_mark_in_progress(false);
1836 
1837   double count_end = os::elapsedTime();
1838   double this_final_counting_time = (count_end - start);
1839   _total_counting_time += this_final_counting_time;
1840 
1841   if (G1PrintRegionLivenessInfo) {
1842     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1843     _g1h->heap_region_iterate(&cl);
1844   }
1845 
1846   // Install newly created mark bitMap as "prev".
1847   swapMarkBitMaps();
1848 
1849   g1h->reset_gc_time_stamp();
1850 
1851   uint n_workers = _g1h->workers()->active_workers();
1852 
1853   // Note end of marking in all heap regions.
1854   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1855   g1h->workers()->run_task(&g1_par_note_end_task);
1856   g1h->check_gc_time_stamps();
1857 
1858   if (!cleanup_list_is_empty()) {
1859     // The cleanup list is not empty, so we'll have to process it
1860     // concurrently. Notify anyone else that might be wanting free
1861     // regions that there will be more free regions coming soon.
1862     g1h->set_free_regions_coming();
1863   }
1864 
1865   // call below, since it affects the metric by which we sort the heap
1866   // regions.
1867   if (G1ScrubRemSets) {
1868     double rs_scrub_start = os::elapsedTime();
1869     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1870     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1871 
1872     double rs_scrub_end = os::elapsedTime();
1873     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1874     _total_rs_scrub_time += this_rs_scrub_time;
1875   }
1876 
1877   // this will also free any regions totally full of garbage objects,
1878   // and sort the regions.
1879   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1880 
1881   // Statistics.
1882   double end = os::elapsedTime();
1883   _cleanup_times.add((end - start) * 1000.0);
1884 
1885   if (G1Log::fine()) {
1886     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1887   }
1888 
1889   // Clean up will have freed any regions completely full of garbage.
1890   // Update the soft reference policy with the new heap occupancy.
1891   Universe::update_heap_info_at_gc();
1892 
1893   if (VerifyDuringGC) {
1894     HandleMark hm;  // handle scope
1895     g1h->prepare_for_verify();
1896     Universe::verify(VerifyOption_G1UsePrevMarking,
1897                      " VerifyDuringGC:(after)");
1898   }
1899 
1900   g1h->check_bitmaps("Cleanup End");
1901 
1902   g1h->verify_region_sets_optional();
1903 
1904   // We need to make this be a "collection" so any collection pause that
1905   // races with it goes around and waits for completeCleanup to finish.
1906   g1h->increment_total_collections();
1907 
1908   // Clean out dead classes and update Metaspace sizes.
1909   if (ClassUnloadingWithConcurrentMark) {
1910     ClassLoaderDataGraph::purge();
1911   }
1912   MetaspaceGC::compute_new_size();
1913 
1914   // We reclaimed old regions so we should calculate the sizes to make
1915   // sure we update the old gen/space data.
1916   g1h->g1mm()->update_sizes();
1917   g1h->allocation_context_stats().update_after_mark();
1918 
1919   g1h->trace_heap_after_concurrent_cycle();
1920 }
1921 
1922 void ConcurrentMark::completeCleanup() {
1923   if (has_aborted()) return;
1924 
1925   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1926 
1927   _cleanup_list.verify_optional();
1928   FreeRegionList tmp_free_list("Tmp Free List");
1929 
1930   if (G1ConcRegionFreeingVerbose) {
1931     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1932                            "cleanup list has %u entries",
1933                            _cleanup_list.length());
1934   }
1935 
1936   // No one else should be accessing the _cleanup_list at this point,
1937   // so it is not necessary to take any locks
1938   while (!_cleanup_list.is_empty()) {
1939     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1940     assert(hr != NULL, "Got NULL from a non-empty list");
1941     hr->par_clear();
1942     tmp_free_list.add_ordered(hr);
1943 
1944     // Instead of adding one region at a time to the secondary_free_list,
1945     // we accumulate them in the local list and move them a few at a
1946     // time. This also cuts down on the number of notify_all() calls
1947     // we do during this process. We'll also append the local list when
1948     // _cleanup_list is empty (which means we just removed the last
1949     // region from the _cleanup_list).
1950     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1951         _cleanup_list.is_empty()) {
1952       if (G1ConcRegionFreeingVerbose) {
1953         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1954                                "appending %u entries to the secondary_free_list, "
1955                                "cleanup list still has %u entries",
1956                                tmp_free_list.length(),
1957                                _cleanup_list.length());
1958       }
1959 
1960       {
1961         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1962         g1h->secondary_free_list_add(&tmp_free_list);
1963         SecondaryFreeList_lock->notify_all();
1964       }
1965 #ifndef PRODUCT
1966       if (G1StressConcRegionFreeing) {
1967         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1968           os::sleep(Thread::current(), (jlong) 1, false);
1969         }
1970       }
1971 #endif
1972     }
1973   }
1974   assert(tmp_free_list.is_empty(), "post-condition");
1975 }
1976 
1977 // Supporting Object and Oop closures for reference discovery
1978 // and processing in during marking
1979 
1980 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1981   HeapWord* addr = (HeapWord*)obj;
1982   return addr != NULL &&
1983          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1984 }
1985 
1986 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1987 // Uses the CMTask associated with a worker thread (for serial reference
1988 // processing the CMTask for worker 0 is used) to preserve (mark) and
1989 // trace referent objects.
1990 //
1991 // Using the CMTask and embedded local queues avoids having the worker
1992 // threads operating on the global mark stack. This reduces the risk
1993 // of overflowing the stack - which we would rather avoid at this late
1994 // state. Also using the tasks' local queues removes the potential
1995 // of the workers interfering with each other that could occur if
1996 // operating on the global stack.
1997 
1998 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1999   ConcurrentMark* _cm;
2000   CMTask*         _task;
2001   int             _ref_counter_limit;
2002   int             _ref_counter;
2003   bool            _is_serial;
2004  public:
2005   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2006     _cm(cm), _task(task), _is_serial(is_serial),
2007     _ref_counter_limit(G1RefProcDrainInterval) {
2008     assert(_ref_counter_limit > 0, "sanity");
2009     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2010     _ref_counter = _ref_counter_limit;
2011   }
2012 
2013   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2014   virtual void do_oop(      oop* p) { do_oop_work(p); }
2015 
2016   template <class T> void do_oop_work(T* p) {
2017     if (!_cm->has_overflown()) {
2018       oop obj = oopDesc::load_decode_heap_oop(p);
2019       _task->deal_with_reference(obj);
2020       _ref_counter--;
2021 
2022       if (_ref_counter == 0) {
2023         // We have dealt with _ref_counter_limit references, pushing them
2024         // and objects reachable from them on to the local stack (and
2025         // possibly the global stack). Call CMTask::do_marking_step() to
2026         // process these entries.
2027         //
2028         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2029         // there's nothing more to do (i.e. we're done with the entries that
2030         // were pushed as a result of the CMTask::deal_with_reference() calls
2031         // above) or we overflow.
2032         //
2033         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2034         // flag while there may still be some work to do. (See the comment at
2035         // the beginning of CMTask::do_marking_step() for those conditions -
2036         // one of which is reaching the specified time target.) It is only
2037         // when CMTask::do_marking_step() returns without setting the
2038         // has_aborted() flag that the marking step has completed.
2039         do {
2040           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2041           _task->do_marking_step(mark_step_duration_ms,
2042                                  false      /* do_termination */,
2043                                  _is_serial);
2044         } while (_task->has_aborted() && !_cm->has_overflown());
2045         _ref_counter = _ref_counter_limit;
2046       }
2047     }
2048   }
2049 };
2050 
2051 // 'Drain' oop closure used by both serial and parallel reference processing.
2052 // Uses the CMTask associated with a given worker thread (for serial
2053 // reference processing the CMtask for worker 0 is used). Calls the
2054 // do_marking_step routine, with an unbelievably large timeout value,
2055 // to drain the marking data structures of the remaining entries
2056 // added by the 'keep alive' oop closure above.
2057 
2058 class G1CMDrainMarkingStackClosure: public VoidClosure {
2059   ConcurrentMark* _cm;
2060   CMTask*         _task;
2061   bool            _is_serial;
2062  public:
2063   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2064     _cm(cm), _task(task), _is_serial(is_serial) {
2065     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2066   }
2067 
2068   void do_void() {
2069     do {
2070       // We call CMTask::do_marking_step() to completely drain the local
2071       // and global marking stacks of entries pushed by the 'keep alive'
2072       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2073       //
2074       // CMTask::do_marking_step() is called in a loop, which we'll exit
2075       // if there's nothing more to do (i.e. we've completely drained the
2076       // entries that were pushed as a a result of applying the 'keep alive'
2077       // closure to the entries on the discovered ref lists) or we overflow
2078       // the global marking stack.
2079       //
2080       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2081       // flag while there may still be some work to do. (See the comment at
2082       // the beginning of CMTask::do_marking_step() for those conditions -
2083       // one of which is reaching the specified time target.) It is only
2084       // when CMTask::do_marking_step() returns without setting the
2085       // has_aborted() flag that the marking step has completed.
2086 
2087       _task->do_marking_step(1000000000.0 /* something very large */,
2088                              true         /* do_termination */,
2089                              _is_serial);
2090     } while (_task->has_aborted() && !_cm->has_overflown());
2091   }
2092 };
2093 
2094 // Implementation of AbstractRefProcTaskExecutor for parallel
2095 // reference processing at the end of G1 concurrent marking
2096 
2097 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2098 private:
2099   G1CollectedHeap* _g1h;
2100   ConcurrentMark*  _cm;
2101   WorkGang*        _workers;
2102   uint             _active_workers;
2103 
2104 public:
2105   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2106                           ConcurrentMark* cm,
2107                           WorkGang* workers,
2108                           uint n_workers) :
2109     _g1h(g1h), _cm(cm),
2110     _workers(workers), _active_workers(n_workers) { }
2111 
2112   // Executes the given task using concurrent marking worker threads.
2113   virtual void execute(ProcessTask& task);
2114   virtual void execute(EnqueueTask& task);
2115 };
2116 
2117 class G1CMRefProcTaskProxy: public AbstractGangTask {
2118   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2119   ProcessTask&     _proc_task;
2120   G1CollectedHeap* _g1h;
2121   ConcurrentMark*  _cm;
2122 
2123 public:
2124   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2125                      G1CollectedHeap* g1h,
2126                      ConcurrentMark* cm) :
2127     AbstractGangTask("Process reference objects in parallel"),
2128     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2129     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2130     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2131   }
2132 
2133   virtual void work(uint worker_id) {
2134     ResourceMark rm;
2135     HandleMark hm;
2136     CMTask* task = _cm->task(worker_id);
2137     G1CMIsAliveClosure g1_is_alive(_g1h);
2138     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2139     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2140 
2141     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2142   }
2143 };
2144 
2145 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2146   assert(_workers != NULL, "Need parallel worker threads.");
2147   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2148 
2149   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2150 
2151   // We need to reset the concurrency level before each
2152   // proxy task execution, so that the termination protocol
2153   // and overflow handling in CMTask::do_marking_step() knows
2154   // how many workers to wait for.
2155   _cm->set_concurrency(_active_workers);
2156   _workers->run_task(&proc_task_proxy);
2157 }
2158 
2159 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2160   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2161   EnqueueTask& _enq_task;
2162 
2163 public:
2164   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2165     AbstractGangTask("Enqueue reference objects in parallel"),
2166     _enq_task(enq_task) { }
2167 
2168   virtual void work(uint worker_id) {
2169     _enq_task.work(worker_id);
2170   }
2171 };
2172 
2173 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2174   assert(_workers != NULL, "Need parallel worker threads.");
2175   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2176 
2177   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2178 
2179   // Not strictly necessary but...
2180   //
2181   // We need to reset the concurrency level before each
2182   // proxy task execution, so that the termination protocol
2183   // and overflow handling in CMTask::do_marking_step() knows
2184   // how many workers to wait for.
2185   _cm->set_concurrency(_active_workers);
2186   _workers->run_task(&enq_task_proxy);
2187 }
2188 
2189 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2190   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2191 }
2192 
2193 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2194   if (has_overflown()) {
2195     // Skip processing the discovered references if we have
2196     // overflown the global marking stack. Reference objects
2197     // only get discovered once so it is OK to not
2198     // de-populate the discovered reference lists. We could have,
2199     // but the only benefit would be that, when marking restarts,
2200     // less reference objects are discovered.
2201     return;
2202   }
2203 
2204   ResourceMark rm;
2205   HandleMark   hm;
2206 
2207   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2208 
2209   // Is alive closure.
2210   G1CMIsAliveClosure g1_is_alive(g1h);
2211 
2212   // Inner scope to exclude the cleaning of the string and symbol
2213   // tables from the displayed time.
2214   {
2215     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2216 
2217     ReferenceProcessor* rp = g1h->ref_processor_cm();
2218 
2219     // See the comment in G1CollectedHeap::ref_processing_init()
2220     // about how reference processing currently works in G1.
2221 
2222     // Set the soft reference policy
2223     rp->setup_policy(clear_all_soft_refs);
2224     assert(_markStack.isEmpty(), "mark stack should be empty");
2225 
2226     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2227     // in serial reference processing. Note these closures are also
2228     // used for serially processing (by the the current thread) the
2229     // JNI references during parallel reference processing.
2230     //
2231     // These closures do not need to synchronize with the worker
2232     // threads involved in parallel reference processing as these
2233     // instances are executed serially by the current thread (e.g.
2234     // reference processing is not multi-threaded and is thus
2235     // performed by the current thread instead of a gang worker).
2236     //
2237     // The gang tasks involved in parallel reference processing create
2238     // their own instances of these closures, which do their own
2239     // synchronization among themselves.
2240     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2241     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2242 
2243     // We need at least one active thread. If reference processing
2244     // is not multi-threaded we use the current (VMThread) thread,
2245     // otherwise we use the work gang from the G1CollectedHeap and
2246     // we utilize all the worker threads we can.
2247     bool processing_is_mt = rp->processing_is_mt();
2248     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2249     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2250 
2251     // Parallel processing task executor.
2252     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2253                                               g1h->workers(), active_workers);
2254     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2255 
2256     // Set the concurrency level. The phase was already set prior to
2257     // executing the remark task.
2258     set_concurrency(active_workers);
2259 
2260     // Set the degree of MT processing here.  If the discovery was done MT,
2261     // the number of threads involved during discovery could differ from
2262     // the number of active workers.  This is OK as long as the discovered
2263     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2264     rp->set_active_mt_degree(active_workers);
2265 
2266     // Process the weak references.
2267     const ReferenceProcessorStats& stats =
2268         rp->process_discovered_references(&g1_is_alive,
2269                                           &g1_keep_alive,
2270                                           &g1_drain_mark_stack,
2271                                           executor,
2272                                           g1h->gc_timer_cm());
2273     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2274 
2275     // The do_oop work routines of the keep_alive and drain_marking_stack
2276     // oop closures will set the has_overflown flag if we overflow the
2277     // global marking stack.
2278 
2279     assert(_markStack.overflow() || _markStack.isEmpty(),
2280             "mark stack should be empty (unless it overflowed)");
2281 
2282     if (_markStack.overflow()) {
2283       // This should have been done already when we tried to push an
2284       // entry on to the global mark stack. But let's do it again.
2285       set_has_overflown();
2286     }
2287 
2288     assert(rp->num_q() == active_workers, "why not");
2289 
2290     rp->enqueue_discovered_references(executor);
2291 
2292     rp->verify_no_references_recorded();
2293     assert(!rp->discovery_enabled(), "Post condition");
2294   }
2295 
2296   if (has_overflown()) {
2297     // We can not trust g1_is_alive if the marking stack overflowed
2298     return;
2299   }
2300 
2301   assert(_markStack.isEmpty(), "Marking should have completed");
2302 
2303   // Unload Klasses, String, Symbols, Code Cache, etc.
2304   {
2305     G1CMTraceTime trace("Unloading", G1Log::finer());
2306 
2307     if (ClassUnloadingWithConcurrentMark) {
2308       bool purged_classes;
2309 
2310       {
2311         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2312         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2313       }
2314 
2315       {
2316         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2317         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2318       }
2319     }
2320 
2321     if (G1StringDedup::is_enabled()) {
2322       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2323       G1StringDedup::unlink(&g1_is_alive);
2324     }
2325   }
2326 }
2327 
2328 void ConcurrentMark::swapMarkBitMaps() {
2329   CMBitMapRO* temp = _prevMarkBitMap;
2330   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2331   _nextMarkBitMap  = (CMBitMap*)  temp;
2332 }
2333 
2334 // Closure for marking entries in SATB buffers.
2335 class CMSATBBufferClosure : public SATBBufferClosure {
2336 private:
2337   CMTask* _task;
2338   G1CollectedHeap* _g1h;
2339 
2340   // This is very similar to CMTask::deal_with_reference, but with
2341   // more relaxed requirements for the argument, so this must be more
2342   // circumspect about treating the argument as an object.
2343   void do_entry(void* entry) const {
2344     _task->increment_refs_reached();
2345     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2346     if (entry < hr->next_top_at_mark_start()) {
2347       // Until we get here, we don't know whether entry refers to a valid
2348       // object; it could instead have been a stale reference.
2349       oop obj = static_cast<oop>(entry);
2350       assert(obj->is_oop(true /* ignore mark word */),
2351              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2352       _task->make_reference_grey(obj, hr);
2353     }
2354   }
2355 
2356 public:
2357   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2358     : _task(task), _g1h(g1h) { }
2359 
2360   virtual void do_buffer(void** buffer, size_t size) {
2361     for (size_t i = 0; i < size; ++i) {
2362       do_entry(buffer[i]);
2363     }
2364   }
2365 };
2366 
2367 class G1RemarkThreadsClosure : public ThreadClosure {
2368   CMSATBBufferClosure _cm_satb_cl;
2369   G1CMOopClosure _cm_cl;
2370   MarkingCodeBlobClosure _code_cl;
2371   int _thread_parity;
2372 
2373  public:
2374   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2375     _cm_satb_cl(task, g1h),
2376     _cm_cl(g1h, g1h->concurrent_mark(), task),
2377     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2378     _thread_parity(Threads::thread_claim_parity()) {}
2379 
2380   void do_thread(Thread* thread) {
2381     if (thread->is_Java_thread()) {
2382       if (thread->claim_oops_do(true, _thread_parity)) {
2383         JavaThread* jt = (JavaThread*)thread;
2384 
2385         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2386         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2387         // * Alive if on the stack of an executing method
2388         // * Weakly reachable otherwise
2389         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2390         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2391         jt->nmethods_do(&_code_cl);
2392 
2393         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2394       }
2395     } else if (thread->is_VM_thread()) {
2396       if (thread->claim_oops_do(true, _thread_parity)) {
2397         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2398       }
2399     }
2400   }
2401 };
2402 
2403 class CMRemarkTask: public AbstractGangTask {
2404 private:
2405   ConcurrentMark* _cm;
2406 public:
2407   void work(uint worker_id) {
2408     // Since all available tasks are actually started, we should
2409     // only proceed if we're supposed to be active.
2410     if (worker_id < _cm->active_tasks()) {
2411       CMTask* task = _cm->task(worker_id);
2412       task->record_start_time();
2413       {
2414         ResourceMark rm;
2415         HandleMark hm;
2416 
2417         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2418         Threads::threads_do(&threads_f);
2419       }
2420 
2421       do {
2422         task->do_marking_step(1000000000.0 /* something very large */,
2423                               true         /* do_termination       */,
2424                               false        /* is_serial            */);
2425       } while (task->has_aborted() && !_cm->has_overflown());
2426       // If we overflow, then we do not want to restart. We instead
2427       // want to abort remark and do concurrent marking again.
2428       task->record_end_time();
2429     }
2430   }
2431 
2432   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2433     AbstractGangTask("Par Remark"), _cm(cm) {
2434     _cm->terminator()->reset_for_reuse(active_workers);
2435   }
2436 };
2437 
2438 void ConcurrentMark::checkpointRootsFinalWork() {
2439   ResourceMark rm;
2440   HandleMark   hm;
2441   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2442 
2443   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2444 
2445   g1h->ensure_parsability(false);
2446 
2447   // this is remark, so we'll use up all active threads
2448   uint active_workers = g1h->workers()->active_workers();
2449   set_concurrency_and_phase(active_workers, false /* concurrent */);
2450   // Leave _parallel_marking_threads at it's
2451   // value originally calculated in the ConcurrentMark
2452   // constructor and pass values of the active workers
2453   // through the gang in the task.
2454 
2455   {
2456     StrongRootsScope srs(active_workers);
2457 
2458     CMRemarkTask remarkTask(this, active_workers);
2459     // We will start all available threads, even if we decide that the
2460     // active_workers will be fewer. The extra ones will just bail out
2461     // immediately.
2462     g1h->workers()->run_task(&remarkTask);
2463   }
2464 
2465   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2466   guarantee(has_overflown() ||
2467             satb_mq_set.completed_buffers_num() == 0,
2468             "Invariant: has_overflown = %s, num buffers = %d",
2469             BOOL_TO_STR(has_overflown()),
2470             satb_mq_set.completed_buffers_num());
2471 
2472   print_stats();
2473 }
2474 
2475 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2476   // Note we are overriding the read-only view of the prev map here, via
2477   // the cast.
2478   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2479 }
2480 
2481 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2482   _nextMarkBitMap->clearRange(mr);
2483 }
2484 
2485 HeapRegion*
2486 ConcurrentMark::claim_region(uint worker_id) {
2487   // "checkpoint" the finger
2488   HeapWord* finger = _finger;
2489 
2490   // _heap_end will not change underneath our feet; it only changes at
2491   // yield points.
2492   while (finger < _heap_end) {
2493     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2494 
2495     // Note on how this code handles humongous regions. In the
2496     // normal case the finger will reach the start of a "starts
2497     // humongous" (SH) region. Its end will either be the end of the
2498     // last "continues humongous" (CH) region in the sequence, or the
2499     // standard end of the SH region (if the SH is the only region in
2500     // the sequence). That way claim_region() will skip over the CH
2501     // regions. However, there is a subtle race between a CM thread
2502     // executing this method and a mutator thread doing a humongous
2503     // object allocation. The two are not mutually exclusive as the CM
2504     // thread does not need to hold the Heap_lock when it gets
2505     // here. So there is a chance that claim_region() will come across
2506     // a free region that's in the progress of becoming a SH or a CH
2507     // region. In the former case, it will either
2508     //   a) Miss the update to the region's end, in which case it will
2509     //      visit every subsequent CH region, will find their bitmaps
2510     //      empty, and do nothing, or
2511     //   b) Will observe the update of the region's end (in which case
2512     //      it will skip the subsequent CH regions).
2513     // If it comes across a region that suddenly becomes CH, the
2514     // scenario will be similar to b). So, the race between
2515     // claim_region() and a humongous object allocation might force us
2516     // to do a bit of unnecessary work (due to some unnecessary bitmap
2517     // iterations) but it should not introduce and correctness issues.
2518     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2519 
2520     // Above heap_region_containing_raw may return NULL as we always scan claim
2521     // until the end of the heap. In this case, just jump to the next region.
2522     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2523 
2524     // Is the gap between reading the finger and doing the CAS too long?
2525     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2526     if (res == finger && curr_region != NULL) {
2527       // we succeeded
2528       HeapWord*   bottom        = curr_region->bottom();
2529       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2530 
2531       // notice that _finger == end cannot be guaranteed here since,
2532       // someone else might have moved the finger even further
2533       assert(_finger >= end, "the finger should have moved forward");
2534 
2535       if (limit > bottom) {
2536         return curr_region;
2537       } else {
2538         assert(limit == bottom,
2539                "the region limit should be at bottom");
2540         // we return NULL and the caller should try calling
2541         // claim_region() again.
2542         return NULL;
2543       }
2544     } else {
2545       assert(_finger > finger, "the finger should have moved forward");
2546       // read it again
2547       finger = _finger;
2548     }
2549   }
2550 
2551   return NULL;
2552 }
2553 
2554 #ifndef PRODUCT
2555 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2556 private:
2557   G1CollectedHeap* _g1h;
2558   const char* _phase;
2559   int _info;
2560 
2561 public:
2562   VerifyNoCSetOops(const char* phase, int info = -1) :
2563     _g1h(G1CollectedHeap::heap()),
2564     _phase(phase),
2565     _info(info)
2566   { }
2567 
2568   void operator()(oop obj) const {
2569     guarantee(obj->is_oop(),
2570               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2571               p2i(obj), _phase, _info);
2572     guarantee(!_g1h->obj_in_cs(obj),
2573               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2574               p2i(obj), _phase, _info);
2575   }
2576 };
2577 
2578 void ConcurrentMark::verify_no_cset_oops() {
2579   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2580   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2581     return;
2582   }
2583 
2584   // Verify entries on the global mark stack
2585   _markStack.iterate(VerifyNoCSetOops("Stack"));
2586 
2587   // Verify entries on the task queues
2588   for (uint i = 0; i < _max_worker_id; ++i) {
2589     CMTaskQueue* queue = _task_queues->queue(i);
2590     queue->iterate(VerifyNoCSetOops("Queue", i));
2591   }
2592 
2593   // Verify the global finger
2594   HeapWord* global_finger = finger();
2595   if (global_finger != NULL && global_finger < _heap_end) {
2596     // The global finger always points to a heap region boundary. We
2597     // use heap_region_containing_raw() to get the containing region
2598     // given that the global finger could be pointing to a free region
2599     // which subsequently becomes continues humongous. If that
2600     // happens, heap_region_containing() will return the bottom of the
2601     // corresponding starts humongous region and the check below will
2602     // not hold any more.
2603     // Since we always iterate over all regions, we might get a NULL HeapRegion
2604     // here.
2605     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2606     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2607               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2608               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2609   }
2610 
2611   // Verify the task fingers
2612   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2613   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2614     CMTask* task = _tasks[i];
2615     HeapWord* task_finger = task->finger();
2616     if (task_finger != NULL && task_finger < _heap_end) {
2617       // See above note on the global finger verification.
2618       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2619       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2620                 !task_hr->in_collection_set(),
2621                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2622                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2623     }
2624   }
2625 }
2626 #endif // PRODUCT
2627 
2628 // Aggregate the counting data that was constructed concurrently
2629 // with marking.
2630 class AggregateCountDataHRClosure: public HeapRegionClosure {
2631   G1CollectedHeap* _g1h;
2632   ConcurrentMark* _cm;
2633   CardTableModRefBS* _ct_bs;
2634   BitMap* _cm_card_bm;
2635   uint _max_worker_id;
2636 
2637  public:
2638   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2639                               BitMap* cm_card_bm,
2640                               uint max_worker_id) :
2641     _g1h(g1h), _cm(g1h->concurrent_mark()),
2642     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2643     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2644 
2645   bool doHeapRegion(HeapRegion* hr) {
2646     if (hr->is_continues_humongous()) {
2647       // We will ignore these here and process them when their
2648       // associated "starts humongous" region is processed.
2649       // Note that we cannot rely on their associated
2650       // "starts humongous" region to have their bit set to 1
2651       // since, due to the region chunking in the parallel region
2652       // iteration, a "continues humongous" region might be visited
2653       // before its associated "starts humongous".
2654       return false;
2655     }
2656 
2657     HeapWord* start = hr->bottom();
2658     HeapWord* limit = hr->next_top_at_mark_start();
2659     HeapWord* end = hr->end();
2660 
2661     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2662            "Preconditions not met - "
2663            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2664            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2665            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2666 
2667     assert(hr->next_marked_bytes() == 0, "Precondition");
2668 
2669     if (start == limit) {
2670       // NTAMS of this region has not been set so nothing to do.
2671       return false;
2672     }
2673 
2674     // 'start' should be in the heap.
2675     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2676     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2677     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2678 
2679     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2680     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2681     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2682 
2683     // If ntams is not card aligned then we bump card bitmap index
2684     // for limit so that we get the all the cards spanned by
2685     // the object ending at ntams.
2686     // Note: if this is the last region in the heap then ntams
2687     // could be actually just beyond the end of the the heap;
2688     // limit_idx will then  correspond to a (non-existent) card
2689     // that is also outside the heap.
2690     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2691       limit_idx += 1;
2692     }
2693 
2694     assert(limit_idx <= end_idx, "or else use atomics");
2695 
2696     // Aggregate the "stripe" in the count data associated with hr.
2697     uint hrm_index = hr->hrm_index();
2698     size_t marked_bytes = 0;
2699 
2700     for (uint i = 0; i < _max_worker_id; i += 1) {
2701       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2702       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2703 
2704       // Fetch the marked_bytes in this region for task i and
2705       // add it to the running total for this region.
2706       marked_bytes += marked_bytes_array[hrm_index];
2707 
2708       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2709       // into the global card bitmap.
2710       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2711 
2712       while (scan_idx < limit_idx) {
2713         assert(task_card_bm->at(scan_idx) == true, "should be");
2714         _cm_card_bm->set_bit(scan_idx);
2715         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2716 
2717         // BitMap::get_next_one_offset() can handle the case when
2718         // its left_offset parameter is greater than its right_offset
2719         // parameter. It does, however, have an early exit if
2720         // left_offset == right_offset. So let's limit the value
2721         // passed in for left offset here.
2722         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2723         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2724       }
2725     }
2726 
2727     // Update the marked bytes for this region.
2728     hr->add_to_marked_bytes(marked_bytes);
2729 
2730     // Next heap region
2731     return false;
2732   }
2733 };
2734 
2735 class G1AggregateCountDataTask: public AbstractGangTask {
2736 protected:
2737   G1CollectedHeap* _g1h;
2738   ConcurrentMark* _cm;
2739   BitMap* _cm_card_bm;
2740   uint _max_worker_id;
2741   uint _active_workers;
2742   HeapRegionClaimer _hrclaimer;
2743 
2744 public:
2745   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2746                            ConcurrentMark* cm,
2747                            BitMap* cm_card_bm,
2748                            uint max_worker_id,
2749                            uint n_workers) :
2750       AbstractGangTask("Count Aggregation"),
2751       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2752       _max_worker_id(max_worker_id),
2753       _active_workers(n_workers),
2754       _hrclaimer(_active_workers) {
2755   }
2756 
2757   void work(uint worker_id) {
2758     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2759 
2760     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2761   }
2762 };
2763 
2764 
2765 void ConcurrentMark::aggregate_count_data() {
2766   uint n_workers = _g1h->workers()->active_workers();
2767 
2768   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2769                                            _max_worker_id, n_workers);
2770 
2771   _g1h->workers()->run_task(&g1_par_agg_task);
2772 }
2773 
2774 // Clear the per-worker arrays used to store the per-region counting data
2775 void ConcurrentMark::clear_all_count_data() {
2776   // Clear the global card bitmap - it will be filled during
2777   // liveness count aggregation (during remark) and the
2778   // final counting task.
2779   _card_bm.clear();
2780 
2781   // Clear the global region bitmap - it will be filled as part
2782   // of the final counting task.
2783   _region_bm.clear();
2784 
2785   uint max_regions = _g1h->max_regions();
2786   assert(_max_worker_id > 0, "uninitialized");
2787 
2788   for (uint i = 0; i < _max_worker_id; i += 1) {
2789     BitMap* task_card_bm = count_card_bitmap_for(i);
2790     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2791 
2792     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2793     assert(marked_bytes_array != NULL, "uninitialized");
2794 
2795     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2796     task_card_bm->clear();
2797   }
2798 }
2799 
2800 void ConcurrentMark::print_stats() {
2801   if (G1MarkingVerboseLevel > 0) {
2802     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2803     for (size_t i = 0; i < _active_tasks; ++i) {
2804       _tasks[i]->print_stats();
2805       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2806     }
2807   }
2808 }
2809 
2810 // abandon current marking iteration due to a Full GC
2811 void ConcurrentMark::abort() {
2812   if (!cmThread()->during_cycle() || _has_aborted) {
2813     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2814     return;
2815   }
2816 
2817   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2818   // concurrent bitmap clearing.
2819   _nextMarkBitMap->clearAll();
2820 
2821   // Note we cannot clear the previous marking bitmap here
2822   // since VerifyDuringGC verifies the objects marked during
2823   // a full GC against the previous bitmap.
2824 
2825   // Clear the liveness counting data
2826   clear_all_count_data();
2827   // Empty mark stack
2828   reset_marking_state();
2829   for (uint i = 0; i < _max_worker_id; ++i) {
2830     _tasks[i]->clear_region_fields();
2831   }
2832   _first_overflow_barrier_sync.abort();
2833   _second_overflow_barrier_sync.abort();
2834   _has_aborted = true;
2835 
2836   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2837   satb_mq_set.abandon_partial_marking();
2838   // This can be called either during or outside marking, we'll read
2839   // the expected_active value from the SATB queue set.
2840   satb_mq_set.set_active_all_threads(
2841                                  false, /* new active value */
2842                                  satb_mq_set.is_active() /* expected_active */);
2843 
2844   _g1h->trace_heap_after_concurrent_cycle();
2845   _g1h->register_concurrent_cycle_end();
2846 }
2847 
2848 static void print_ms_time_info(const char* prefix, const char* name,
2849                                NumberSeq& ns) {
2850   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2851                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2852   if (ns.num() > 0) {
2853     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2854                            prefix, ns.sd(), ns.maximum());
2855   }
2856 }
2857 
2858 void ConcurrentMark::print_summary_info() {
2859   gclog_or_tty->print_cr(" Concurrent marking:");
2860   print_ms_time_info("  ", "init marks", _init_times);
2861   print_ms_time_info("  ", "remarks", _remark_times);
2862   {
2863     print_ms_time_info("     ", "final marks", _remark_mark_times);
2864     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2865 
2866   }
2867   print_ms_time_info("  ", "cleanups", _cleanup_times);
2868   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2869                          _total_counting_time,
2870                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2871                           (double)_cleanup_times.num()
2872                          : 0.0));
2873   if (G1ScrubRemSets) {
2874     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2875                            _total_rs_scrub_time,
2876                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2877                             (double)_cleanup_times.num()
2878                            : 0.0));
2879   }
2880   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2881                          (_init_times.sum() + _remark_times.sum() +
2882                           _cleanup_times.sum())/1000.0);
2883   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2884                 "(%8.2f s marking).",
2885                 cmThread()->vtime_accum(),
2886                 cmThread()->vtime_mark_accum());
2887 }
2888 
2889 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2890   _parallel_workers->print_worker_threads_on(st);
2891 }
2892 
2893 void ConcurrentMark::print_on_error(outputStream* st) const {
2894   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2895       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2896   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2897   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2898 }
2899 
2900 // We take a break if someone is trying to stop the world.
2901 bool ConcurrentMark::do_yield_check(uint worker_id) {
2902   if (SuspendibleThreadSet::should_yield()) {
2903     if (worker_id == 0) {
2904       _g1h->g1_policy()->record_concurrent_pause();
2905     }
2906     SuspendibleThreadSet::yield();
2907     return true;
2908   } else {
2909     return false;
2910   }
2911 }
2912 
2913 // Closure for iteration over bitmaps
2914 class CMBitMapClosure : public BitMapClosure {
2915 private:
2916   // the bitmap that is being iterated over
2917   CMBitMap*                   _nextMarkBitMap;
2918   ConcurrentMark*             _cm;
2919   CMTask*                     _task;
2920 
2921 public:
2922   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2923     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2924 
2925   bool do_bit(size_t offset) {
2926     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2927     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2928     assert( addr < _cm->finger(), "invariant");
2929     assert(addr >= _task->finger(), "invariant");
2930 
2931     // We move that task's local finger along.
2932     _task->move_finger_to(addr);
2933 
2934     _task->scan_object(oop(addr));
2935     // we only partially drain the local queue and global stack
2936     _task->drain_local_queue(true);
2937     _task->drain_global_stack(true);
2938 
2939     // if the has_aborted flag has been raised, we need to bail out of
2940     // the iteration
2941     return !_task->has_aborted();
2942   }
2943 };
2944 
2945 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2946   ReferenceProcessor* result = NULL;
2947   if (G1UseConcMarkReferenceProcessing) {
2948     result = g1h->ref_processor_cm();
2949     assert(result != NULL, "should not be NULL");
2950   }
2951   return result;
2952 }
2953 
2954 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2955                                ConcurrentMark* cm,
2956                                CMTask* task)
2957   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2958     _g1h(g1h), _cm(cm), _task(task)
2959 { }
2960 
2961 void CMTask::setup_for_region(HeapRegion* hr) {
2962   assert(hr != NULL,
2963         "claim_region() should have filtered out NULL regions");
2964   assert(!hr->is_continues_humongous(),
2965         "claim_region() should have filtered out continues humongous regions");
2966   _curr_region  = hr;
2967   _finger       = hr->bottom();
2968   update_region_limit();
2969 }
2970 
2971 void CMTask::update_region_limit() {
2972   HeapRegion* hr            = _curr_region;
2973   HeapWord* bottom          = hr->bottom();
2974   HeapWord* limit           = hr->next_top_at_mark_start();
2975 
2976   if (limit == bottom) {
2977     // The region was collected underneath our feet.
2978     // We set the finger to bottom to ensure that the bitmap
2979     // iteration that will follow this will not do anything.
2980     // (this is not a condition that holds when we set the region up,
2981     // as the region is not supposed to be empty in the first place)
2982     _finger = bottom;
2983   } else if (limit >= _region_limit) {
2984     assert(limit >= _finger, "peace of mind");
2985   } else {
2986     assert(limit < _region_limit, "only way to get here");
2987     // This can happen under some pretty unusual circumstances.  An
2988     // evacuation pause empties the region underneath our feet (NTAMS
2989     // at bottom). We then do some allocation in the region (NTAMS
2990     // stays at bottom), followed by the region being used as a GC
2991     // alloc region (NTAMS will move to top() and the objects
2992     // originally below it will be grayed). All objects now marked in
2993     // the region are explicitly grayed, if below the global finger,
2994     // and we do not need in fact to scan anything else. So, we simply
2995     // set _finger to be limit to ensure that the bitmap iteration
2996     // doesn't do anything.
2997     _finger = limit;
2998   }
2999 
3000   _region_limit = limit;
3001 }
3002 
3003 void CMTask::giveup_current_region() {
3004   assert(_curr_region != NULL, "invariant");
3005   clear_region_fields();
3006 }
3007 
3008 void CMTask::clear_region_fields() {
3009   // Values for these three fields that indicate that we're not
3010   // holding on to a region.
3011   _curr_region   = NULL;
3012   _finger        = NULL;
3013   _region_limit  = NULL;
3014 }
3015 
3016 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3017   if (cm_oop_closure == NULL) {
3018     assert(_cm_oop_closure != NULL, "invariant");
3019   } else {
3020     assert(_cm_oop_closure == NULL, "invariant");
3021   }
3022   _cm_oop_closure = cm_oop_closure;
3023 }
3024 
3025 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3026   guarantee(nextMarkBitMap != NULL, "invariant");
3027   _nextMarkBitMap                = nextMarkBitMap;
3028   clear_region_fields();
3029 
3030   _calls                         = 0;
3031   _elapsed_time_ms               = 0.0;
3032   _termination_time_ms           = 0.0;
3033   _termination_start_time_ms     = 0.0;
3034 }
3035 
3036 bool CMTask::should_exit_termination() {
3037   regular_clock_call();
3038   // This is called when we are in the termination protocol. We should
3039   // quit if, for some reason, this task wants to abort or the global
3040   // stack is not empty (this means that we can get work from it).
3041   return !_cm->mark_stack_empty() || has_aborted();
3042 }
3043 
3044 void CMTask::reached_limit() {
3045   assert(_words_scanned >= _words_scanned_limit ||
3046          _refs_reached >= _refs_reached_limit ,
3047          "shouldn't have been called otherwise");
3048   regular_clock_call();
3049 }
3050 
3051 void CMTask::regular_clock_call() {
3052   if (has_aborted()) return;
3053 
3054   // First, we need to recalculate the words scanned and refs reached
3055   // limits for the next clock call.
3056   recalculate_limits();
3057 
3058   // During the regular clock call we do the following
3059 
3060   // (1) If an overflow has been flagged, then we abort.
3061   if (_cm->has_overflown()) {
3062     set_has_aborted();
3063     return;
3064   }
3065 
3066   // If we are not concurrent (i.e. we're doing remark) we don't need
3067   // to check anything else. The other steps are only needed during
3068   // the concurrent marking phase.
3069   if (!concurrent()) return;
3070 
3071   // (2) If marking has been aborted for Full GC, then we also abort.
3072   if (_cm->has_aborted()) {
3073     set_has_aborted();
3074     return;
3075   }
3076 
3077   double curr_time_ms = os::elapsedVTime() * 1000.0;
3078 
3079   // (4) We check whether we should yield. If we have to, then we abort.
3080   if (SuspendibleThreadSet::should_yield()) {
3081     // We should yield. To do this we abort the task. The caller is
3082     // responsible for yielding.
3083     set_has_aborted();
3084     return;
3085   }
3086 
3087   // (5) We check whether we've reached our time quota. If we have,
3088   // then we abort.
3089   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3090   if (elapsed_time_ms > _time_target_ms) {
3091     set_has_aborted();
3092     _has_timed_out = true;
3093     return;
3094   }
3095 
3096   // (6) Finally, we check whether there are enough completed STAB
3097   // buffers available for processing. If there are, we abort.
3098   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3099   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3100     // we do need to process SATB buffers, we'll abort and restart
3101     // the marking task to do so
3102     set_has_aborted();
3103     return;
3104   }
3105 }
3106 
3107 void CMTask::recalculate_limits() {
3108   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3109   _words_scanned_limit      = _real_words_scanned_limit;
3110 
3111   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3112   _refs_reached_limit       = _real_refs_reached_limit;
3113 }
3114 
3115 void CMTask::decrease_limits() {
3116   // This is called when we believe that we're going to do an infrequent
3117   // operation which will increase the per byte scanned cost (i.e. move
3118   // entries to/from the global stack). It basically tries to decrease the
3119   // scanning limit so that the clock is called earlier.
3120 
3121   _words_scanned_limit = _real_words_scanned_limit -
3122     3 * words_scanned_period / 4;
3123   _refs_reached_limit  = _real_refs_reached_limit -
3124     3 * refs_reached_period / 4;
3125 }
3126 
3127 void CMTask::move_entries_to_global_stack() {
3128   // local array where we'll store the entries that will be popped
3129   // from the local queue
3130   oop buffer[global_stack_transfer_size];
3131 
3132   int n = 0;
3133   oop obj;
3134   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3135     buffer[n] = obj;
3136     ++n;
3137   }
3138 
3139   if (n > 0) {
3140     // we popped at least one entry from the local queue
3141 
3142     if (!_cm->mark_stack_push(buffer, n)) {
3143       set_has_aborted();
3144     }
3145   }
3146 
3147   // this operation was quite expensive, so decrease the limits
3148   decrease_limits();
3149 }
3150 
3151 void CMTask::get_entries_from_global_stack() {
3152   // local array where we'll store the entries that will be popped
3153   // from the global stack.
3154   oop buffer[global_stack_transfer_size];
3155   int n;
3156   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3157   assert(n <= global_stack_transfer_size,
3158          "we should not pop more than the given limit");
3159   if (n > 0) {
3160     // yes, we did actually pop at least one entry
3161     for (int i = 0; i < n; ++i) {
3162       bool success = _task_queue->push(buffer[i]);
3163       // We only call this when the local queue is empty or under a
3164       // given target limit. So, we do not expect this push to fail.
3165       assert(success, "invariant");
3166     }
3167   }
3168 
3169   // this operation was quite expensive, so decrease the limits
3170   decrease_limits();
3171 }
3172 
3173 void CMTask::drain_local_queue(bool partially) {
3174   if (has_aborted()) return;
3175 
3176   // Decide what the target size is, depending whether we're going to
3177   // drain it partially (so that other tasks can steal if they run out
3178   // of things to do) or totally (at the very end).
3179   size_t target_size;
3180   if (partially) {
3181     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3182   } else {
3183     target_size = 0;
3184   }
3185 
3186   if (_task_queue->size() > target_size) {
3187     oop obj;
3188     bool ret = _task_queue->pop_local(obj);
3189     while (ret) {
3190       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3191       assert(!_g1h->is_on_master_free_list(
3192                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3193 
3194       scan_object(obj);
3195 
3196       if (_task_queue->size() <= target_size || has_aborted()) {
3197         ret = false;
3198       } else {
3199         ret = _task_queue->pop_local(obj);
3200       }
3201     }
3202   }
3203 }
3204 
3205 void CMTask::drain_global_stack(bool partially) {
3206   if (has_aborted()) return;
3207 
3208   // We have a policy to drain the local queue before we attempt to
3209   // drain the global stack.
3210   assert(partially || _task_queue->size() == 0, "invariant");
3211 
3212   // Decide what the target size is, depending whether we're going to
3213   // drain it partially (so that other tasks can steal if they run out
3214   // of things to do) or totally (at the very end).  Notice that,
3215   // because we move entries from the global stack in chunks or
3216   // because another task might be doing the same, we might in fact
3217   // drop below the target. But, this is not a problem.
3218   size_t target_size;
3219   if (partially) {
3220     target_size = _cm->partial_mark_stack_size_target();
3221   } else {
3222     target_size = 0;
3223   }
3224 
3225   if (_cm->mark_stack_size() > target_size) {
3226     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3227       get_entries_from_global_stack();
3228       drain_local_queue(partially);
3229     }
3230   }
3231 }
3232 
3233 // SATB Queue has several assumptions on whether to call the par or
3234 // non-par versions of the methods. this is why some of the code is
3235 // replicated. We should really get rid of the single-threaded version
3236 // of the code to simplify things.
3237 void CMTask::drain_satb_buffers() {
3238   if (has_aborted()) return;
3239 
3240   // We set this so that the regular clock knows that we're in the
3241   // middle of draining buffers and doesn't set the abort flag when it
3242   // notices that SATB buffers are available for draining. It'd be
3243   // very counter productive if it did that. :-)
3244   _draining_satb_buffers = true;
3245 
3246   CMSATBBufferClosure satb_cl(this, _g1h);
3247   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3248 
3249   // This keeps claiming and applying the closure to completed buffers
3250   // until we run out of buffers or we need to abort.
3251   while (!has_aborted() &&
3252          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3253     regular_clock_call();
3254   }
3255 
3256   _draining_satb_buffers = false;
3257 
3258   assert(has_aborted() ||
3259          concurrent() ||
3260          satb_mq_set.completed_buffers_num() == 0, "invariant");
3261 
3262   // again, this was a potentially expensive operation, decrease the
3263   // limits to get the regular clock call early
3264   decrease_limits();
3265 }
3266 
3267 void CMTask::print_stats() {
3268   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3269                          _worker_id, _calls);
3270   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3271                          _elapsed_time_ms, _termination_time_ms);
3272   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3273                          _step_times_ms.num(), _step_times_ms.avg(),
3274                          _step_times_ms.sd());
3275   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3276                          _step_times_ms.maximum(), _step_times_ms.sum());
3277 }
3278 
3279 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3280   return _task_queues->steal(worker_id, hash_seed, obj);
3281 }
3282 
3283 /*****************************************************************************
3284 
3285     The do_marking_step(time_target_ms, ...) method is the building
3286     block of the parallel marking framework. It can be called in parallel
3287     with other invocations of do_marking_step() on different tasks
3288     (but only one per task, obviously) and concurrently with the
3289     mutator threads, or during remark, hence it eliminates the need
3290     for two versions of the code. When called during remark, it will
3291     pick up from where the task left off during the concurrent marking
3292     phase. Interestingly, tasks are also claimable during evacuation
3293     pauses too, since do_marking_step() ensures that it aborts before
3294     it needs to yield.
3295 
3296     The data structures that it uses to do marking work are the
3297     following:
3298 
3299       (1) Marking Bitmap. If there are gray objects that appear only
3300       on the bitmap (this happens either when dealing with an overflow
3301       or when the initial marking phase has simply marked the roots
3302       and didn't push them on the stack), then tasks claim heap
3303       regions whose bitmap they then scan to find gray objects. A
3304       global finger indicates where the end of the last claimed region
3305       is. A local finger indicates how far into the region a task has
3306       scanned. The two fingers are used to determine how to gray an
3307       object (i.e. whether simply marking it is OK, as it will be
3308       visited by a task in the future, or whether it needs to be also
3309       pushed on a stack).
3310 
3311       (2) Local Queue. The local queue of the task which is accessed
3312       reasonably efficiently by the task. Other tasks can steal from
3313       it when they run out of work. Throughout the marking phase, a
3314       task attempts to keep its local queue short but not totally
3315       empty, so that entries are available for stealing by other
3316       tasks. Only when there is no more work, a task will totally
3317       drain its local queue.
3318 
3319       (3) Global Mark Stack. This handles local queue overflow. During
3320       marking only sets of entries are moved between it and the local
3321       queues, as access to it requires a mutex and more fine-grain
3322       interaction with it which might cause contention. If it
3323       overflows, then the marking phase should restart and iterate
3324       over the bitmap to identify gray objects. Throughout the marking
3325       phase, tasks attempt to keep the global mark stack at a small
3326       length but not totally empty, so that entries are available for
3327       popping by other tasks. Only when there is no more work, tasks
3328       will totally drain the global mark stack.
3329 
3330       (4) SATB Buffer Queue. This is where completed SATB buffers are
3331       made available. Buffers are regularly removed from this queue
3332       and scanned for roots, so that the queue doesn't get too
3333       long. During remark, all completed buffers are processed, as
3334       well as the filled in parts of any uncompleted buffers.
3335 
3336     The do_marking_step() method tries to abort when the time target
3337     has been reached. There are a few other cases when the
3338     do_marking_step() method also aborts:
3339 
3340       (1) When the marking phase has been aborted (after a Full GC).
3341 
3342       (2) When a global overflow (on the global stack) has been
3343       triggered. Before the task aborts, it will actually sync up with
3344       the other tasks to ensure that all the marking data structures
3345       (local queues, stacks, fingers etc.)  are re-initialized so that
3346       when do_marking_step() completes, the marking phase can
3347       immediately restart.
3348 
3349       (3) When enough completed SATB buffers are available. The
3350       do_marking_step() method only tries to drain SATB buffers right
3351       at the beginning. So, if enough buffers are available, the
3352       marking step aborts and the SATB buffers are processed at
3353       the beginning of the next invocation.
3354 
3355       (4) To yield. when we have to yield then we abort and yield
3356       right at the end of do_marking_step(). This saves us from a lot
3357       of hassle as, by yielding we might allow a Full GC. If this
3358       happens then objects will be compacted underneath our feet, the
3359       heap might shrink, etc. We save checking for this by just
3360       aborting and doing the yield right at the end.
3361 
3362     From the above it follows that the do_marking_step() method should
3363     be called in a loop (or, otherwise, regularly) until it completes.
3364 
3365     If a marking step completes without its has_aborted() flag being
3366     true, it means it has completed the current marking phase (and
3367     also all other marking tasks have done so and have all synced up).
3368 
3369     A method called regular_clock_call() is invoked "regularly" (in
3370     sub ms intervals) throughout marking. It is this clock method that
3371     checks all the abort conditions which were mentioned above and
3372     decides when the task should abort. A work-based scheme is used to
3373     trigger this clock method: when the number of object words the
3374     marking phase has scanned or the number of references the marking
3375     phase has visited reach a given limit. Additional invocations to
3376     the method clock have been planted in a few other strategic places
3377     too. The initial reason for the clock method was to avoid calling
3378     vtime too regularly, as it is quite expensive. So, once it was in
3379     place, it was natural to piggy-back all the other conditions on it
3380     too and not constantly check them throughout the code.
3381 
3382     If do_termination is true then do_marking_step will enter its
3383     termination protocol.
3384 
3385     The value of is_serial must be true when do_marking_step is being
3386     called serially (i.e. by the VMThread) and do_marking_step should
3387     skip any synchronization in the termination and overflow code.
3388     Examples include the serial remark code and the serial reference
3389     processing closures.
3390 
3391     The value of is_serial must be false when do_marking_step is
3392     being called by any of the worker threads in a work gang.
3393     Examples include the concurrent marking code (CMMarkingTask),
3394     the MT remark code, and the MT reference processing closures.
3395 
3396  *****************************************************************************/
3397 
3398 void CMTask::do_marking_step(double time_target_ms,
3399                              bool do_termination,
3400                              bool is_serial) {
3401   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3402   assert(concurrent() == _cm->concurrent(), "they should be the same");
3403 
3404   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3405   assert(_task_queues != NULL, "invariant");
3406   assert(_task_queue != NULL, "invariant");
3407   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3408 
3409   assert(!_claimed,
3410          "only one thread should claim this task at any one time");
3411 
3412   // OK, this doesn't safeguard again all possible scenarios, as it is
3413   // possible for two threads to set the _claimed flag at the same
3414   // time. But it is only for debugging purposes anyway and it will
3415   // catch most problems.
3416   _claimed = true;
3417 
3418   _start_time_ms = os::elapsedVTime() * 1000.0;
3419 
3420   // If do_stealing is true then do_marking_step will attempt to
3421   // steal work from the other CMTasks. It only makes sense to
3422   // enable stealing when the termination protocol is enabled
3423   // and do_marking_step() is not being called serially.
3424   bool do_stealing = do_termination && !is_serial;
3425 
3426   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3427   _time_target_ms = time_target_ms - diff_prediction_ms;
3428 
3429   // set up the variables that are used in the work-based scheme to
3430   // call the regular clock method
3431   _words_scanned = 0;
3432   _refs_reached  = 0;
3433   recalculate_limits();
3434 
3435   // clear all flags
3436   clear_has_aborted();
3437   _has_timed_out = false;
3438   _draining_satb_buffers = false;
3439 
3440   ++_calls;
3441 
3442   // Set up the bitmap and oop closures. Anything that uses them is
3443   // eventually called from this method, so it is OK to allocate these
3444   // statically.
3445   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3446   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3447   set_cm_oop_closure(&cm_oop_closure);
3448 
3449   if (_cm->has_overflown()) {
3450     // This can happen if the mark stack overflows during a GC pause
3451     // and this task, after a yield point, restarts. We have to abort
3452     // as we need to get into the overflow protocol which happens
3453     // right at the end of this task.
3454     set_has_aborted();
3455   }
3456 
3457   // First drain any available SATB buffers. After this, we will not
3458   // look at SATB buffers before the next invocation of this method.
3459   // If enough completed SATB buffers are queued up, the regular clock
3460   // will abort this task so that it restarts.
3461   drain_satb_buffers();
3462   // ...then partially drain the local queue and the global stack
3463   drain_local_queue(true);
3464   drain_global_stack(true);
3465 
3466   do {
3467     if (!has_aborted() && _curr_region != NULL) {
3468       // This means that we're already holding on to a region.
3469       assert(_finger != NULL, "if region is not NULL, then the finger "
3470              "should not be NULL either");
3471 
3472       // We might have restarted this task after an evacuation pause
3473       // which might have evacuated the region we're holding on to
3474       // underneath our feet. Let's read its limit again to make sure
3475       // that we do not iterate over a region of the heap that
3476       // contains garbage (update_region_limit() will also move
3477       // _finger to the start of the region if it is found empty).
3478       update_region_limit();
3479       // We will start from _finger not from the start of the region,
3480       // as we might be restarting this task after aborting half-way
3481       // through scanning this region. In this case, _finger points to
3482       // the address where we last found a marked object. If this is a
3483       // fresh region, _finger points to start().
3484       MemRegion mr = MemRegion(_finger, _region_limit);
3485 
3486       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3487              "humongous regions should go around loop once only");
3488 
3489       // Some special cases:
3490       // If the memory region is empty, we can just give up the region.
3491       // If the current region is humongous then we only need to check
3492       // the bitmap for the bit associated with the start of the object,
3493       // scan the object if it's live, and give up the region.
3494       // Otherwise, let's iterate over the bitmap of the part of the region
3495       // that is left.
3496       // If the iteration is successful, give up the region.
3497       if (mr.is_empty()) {
3498         giveup_current_region();
3499         regular_clock_call();
3500       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3501         if (_nextMarkBitMap->isMarked(mr.start())) {
3502           // The object is marked - apply the closure
3503           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3504           bitmap_closure.do_bit(offset);
3505         }
3506         // Even if this task aborted while scanning the humongous object
3507         // we can (and should) give up the current region.
3508         giveup_current_region();
3509         regular_clock_call();
3510       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3511         giveup_current_region();
3512         regular_clock_call();
3513       } else {
3514         assert(has_aborted(), "currently the only way to do so");
3515         // The only way to abort the bitmap iteration is to return
3516         // false from the do_bit() method. However, inside the
3517         // do_bit() method we move the _finger to point to the
3518         // object currently being looked at. So, if we bail out, we
3519         // have definitely set _finger to something non-null.
3520         assert(_finger != NULL, "invariant");
3521 
3522         // Region iteration was actually aborted. So now _finger
3523         // points to the address of the object we last scanned. If we
3524         // leave it there, when we restart this task, we will rescan
3525         // the object. It is easy to avoid this. We move the finger by
3526         // enough to point to the next possible object header (the
3527         // bitmap knows by how much we need to move it as it knows its
3528         // granularity).
3529         assert(_finger < _region_limit, "invariant");
3530         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3531         // Check if bitmap iteration was aborted while scanning the last object
3532         if (new_finger >= _region_limit) {
3533           giveup_current_region();
3534         } else {
3535           move_finger_to(new_finger);
3536         }
3537       }
3538     }
3539     // At this point we have either completed iterating over the
3540     // region we were holding on to, or we have aborted.
3541 
3542     // We then partially drain the local queue and the global stack.
3543     // (Do we really need this?)
3544     drain_local_queue(true);
3545     drain_global_stack(true);
3546 
3547     // Read the note on the claim_region() method on why it might
3548     // return NULL with potentially more regions available for
3549     // claiming and why we have to check out_of_regions() to determine
3550     // whether we're done or not.
3551     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3552       // We are going to try to claim a new region. We should have
3553       // given up on the previous one.
3554       // Separated the asserts so that we know which one fires.
3555       assert(_curr_region  == NULL, "invariant");
3556       assert(_finger       == NULL, "invariant");
3557       assert(_region_limit == NULL, "invariant");
3558       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3559       if (claimed_region != NULL) {
3560         // Yes, we managed to claim one
3561         setup_for_region(claimed_region);
3562         assert(_curr_region == claimed_region, "invariant");
3563       }
3564       // It is important to call the regular clock here. It might take
3565       // a while to claim a region if, for example, we hit a large
3566       // block of empty regions. So we need to call the regular clock
3567       // method once round the loop to make sure it's called
3568       // frequently enough.
3569       regular_clock_call();
3570     }
3571 
3572     if (!has_aborted() && _curr_region == NULL) {
3573       assert(_cm->out_of_regions(),
3574              "at this point we should be out of regions");
3575     }
3576   } while ( _curr_region != NULL && !has_aborted());
3577 
3578   if (!has_aborted()) {
3579     // We cannot check whether the global stack is empty, since other
3580     // tasks might be pushing objects to it concurrently.
3581     assert(_cm->out_of_regions(),
3582            "at this point we should be out of regions");
3583     // Try to reduce the number of available SATB buffers so that
3584     // remark has less work to do.
3585     drain_satb_buffers();
3586   }
3587 
3588   // Since we've done everything else, we can now totally drain the
3589   // local queue and global stack.
3590   drain_local_queue(false);
3591   drain_global_stack(false);
3592 
3593   // Attempt at work stealing from other task's queues.
3594   if (do_stealing && !has_aborted()) {
3595     // We have not aborted. This means that we have finished all that
3596     // we could. Let's try to do some stealing...
3597 
3598     // We cannot check whether the global stack is empty, since other
3599     // tasks might be pushing objects to it concurrently.
3600     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3601            "only way to reach here");
3602     while (!has_aborted()) {
3603       oop obj;
3604       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3605         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3606                "any stolen object should be marked");
3607         scan_object(obj);
3608 
3609         // And since we're towards the end, let's totally drain the
3610         // local queue and global stack.
3611         drain_local_queue(false);
3612         drain_global_stack(false);
3613       } else {
3614         break;
3615       }
3616     }
3617   }
3618 
3619   // If we are about to wrap up and go into termination, check if we
3620   // should raise the overflow flag.
3621   if (do_termination && !has_aborted()) {
3622     if (_cm->force_overflow()->should_force()) {
3623       _cm->set_has_overflown();
3624       regular_clock_call();
3625     }
3626   }
3627 
3628   // We still haven't aborted. Now, let's try to get into the
3629   // termination protocol.
3630   if (do_termination && !has_aborted()) {
3631     // We cannot check whether the global stack is empty, since other
3632     // tasks might be concurrently pushing objects on it.
3633     // Separated the asserts so that we know which one fires.
3634     assert(_cm->out_of_regions(), "only way to reach here");
3635     assert(_task_queue->size() == 0, "only way to reach here");
3636     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3637 
3638     // The CMTask class also extends the TerminatorTerminator class,
3639     // hence its should_exit_termination() method will also decide
3640     // whether to exit the termination protocol or not.
3641     bool finished = (is_serial ||
3642                      _cm->terminator()->offer_termination(this));
3643     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3644     _termination_time_ms +=
3645       termination_end_time_ms - _termination_start_time_ms;
3646 
3647     if (finished) {
3648       // We're all done.
3649 
3650       if (_worker_id == 0) {
3651         // let's allow task 0 to do this
3652         if (concurrent()) {
3653           assert(_cm->concurrent_marking_in_progress(), "invariant");
3654           // we need to set this to false before the next
3655           // safepoint. This way we ensure that the marking phase
3656           // doesn't observe any more heap expansions.
3657           _cm->clear_concurrent_marking_in_progress();
3658         }
3659       }
3660 
3661       // We can now guarantee that the global stack is empty, since
3662       // all other tasks have finished. We separated the guarantees so
3663       // that, if a condition is false, we can immediately find out
3664       // which one.
3665       guarantee(_cm->out_of_regions(), "only way to reach here");
3666       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3667       guarantee(_task_queue->size() == 0, "only way to reach here");
3668       guarantee(!_cm->has_overflown(), "only way to reach here");
3669       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3670     } else {
3671       // Apparently there's more work to do. Let's abort this task. It
3672       // will restart it and we can hopefully find more things to do.
3673       set_has_aborted();
3674     }
3675   }
3676 
3677   // Mainly for debugging purposes to make sure that a pointer to the
3678   // closure which was statically allocated in this frame doesn't
3679   // escape it by accident.
3680   set_cm_oop_closure(NULL);
3681   double end_time_ms = os::elapsedVTime() * 1000.0;
3682   double elapsed_time_ms = end_time_ms - _start_time_ms;
3683   // Update the step history.
3684   _step_times_ms.add(elapsed_time_ms);
3685 
3686   if (has_aborted()) {
3687     // The task was aborted for some reason.
3688     if (_has_timed_out) {
3689       double diff_ms = elapsed_time_ms - _time_target_ms;
3690       // Keep statistics of how well we did with respect to hitting
3691       // our target only if we actually timed out (if we aborted for
3692       // other reasons, then the results might get skewed).
3693       _marking_step_diffs_ms.add(diff_ms);
3694     }
3695 
3696     if (_cm->has_overflown()) {
3697       // This is the interesting one. We aborted because a global
3698       // overflow was raised. This means we have to restart the
3699       // marking phase and start iterating over regions. However, in
3700       // order to do this we have to make sure that all tasks stop
3701       // what they are doing and re-initialize in a safe manner. We
3702       // will achieve this with the use of two barrier sync points.
3703 
3704       if (!is_serial) {
3705         // We only need to enter the sync barrier if being called
3706         // from a parallel context
3707         _cm->enter_first_sync_barrier(_worker_id);
3708 
3709         // When we exit this sync barrier we know that all tasks have
3710         // stopped doing marking work. So, it's now safe to
3711         // re-initialize our data structures. At the end of this method,
3712         // task 0 will clear the global data structures.
3713       }
3714 
3715       // We clear the local state of this task...
3716       clear_region_fields();
3717 
3718       if (!is_serial) {
3719         // ...and enter the second barrier.
3720         _cm->enter_second_sync_barrier(_worker_id);
3721       }
3722       // At this point, if we're during the concurrent phase of
3723       // marking, everything has been re-initialized and we're
3724       // ready to restart.
3725     }
3726   }
3727 
3728   _claimed = false;
3729 }
3730 
3731 CMTask::CMTask(uint worker_id,
3732                ConcurrentMark* cm,
3733                size_t* marked_bytes,
3734                BitMap* card_bm,
3735                CMTaskQueue* task_queue,
3736                CMTaskQueueSet* task_queues)
3737   : _g1h(G1CollectedHeap::heap()),
3738     _worker_id(worker_id), _cm(cm),
3739     _claimed(false),
3740     _nextMarkBitMap(NULL), _hash_seed(17),
3741     _task_queue(task_queue),
3742     _task_queues(task_queues),
3743     _cm_oop_closure(NULL),
3744     _marked_bytes_array(marked_bytes),
3745     _card_bm(card_bm) {
3746   guarantee(task_queue != NULL, "invariant");
3747   guarantee(task_queues != NULL, "invariant");
3748 
3749   _marking_step_diffs_ms.add(0.5);
3750 }
3751 
3752 // These are formatting macros that are used below to ensure
3753 // consistent formatting. The *_H_* versions are used to format the
3754 // header for a particular value and they should be kept consistent
3755 // with the corresponding macro. Also note that most of the macros add
3756 // the necessary white space (as a prefix) which makes them a bit
3757 // easier to compose.
3758 
3759 // All the output lines are prefixed with this string to be able to
3760 // identify them easily in a large log file.
3761 #define G1PPRL_LINE_PREFIX            "###"
3762 
3763 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3764 #ifdef _LP64
3765 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3766 #else // _LP64
3767 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3768 #endif // _LP64
3769 
3770 // For per-region info
3771 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3772 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3773 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3774 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3775 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3776 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3777 
3778 // For summary info
3779 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3780 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3781 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3782 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3783 
3784 G1PrintRegionLivenessInfoClosure::
3785 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
3786   : _out(out),
3787     _total_used_bytes(0), _total_capacity_bytes(0),
3788     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3789     _hum_used_bytes(0), _hum_capacity_bytes(0),
3790     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3791     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3792   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3793   MemRegion g1_reserved = g1h->g1_reserved();
3794   double now = os::elapsedTime();
3795 
3796   // Print the header of the output.
3797   _out->cr();
3798   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3799   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
3800                  G1PPRL_SUM_ADDR_FORMAT("reserved")
3801                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
3802                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3803                  HeapRegion::GrainBytes);
3804   _out->print_cr(G1PPRL_LINE_PREFIX);
3805   _out->print_cr(G1PPRL_LINE_PREFIX
3806                 G1PPRL_TYPE_H_FORMAT
3807                 G1PPRL_ADDR_BASE_H_FORMAT
3808                 G1PPRL_BYTE_H_FORMAT
3809                 G1PPRL_BYTE_H_FORMAT
3810                 G1PPRL_BYTE_H_FORMAT
3811                 G1PPRL_DOUBLE_H_FORMAT
3812                 G1PPRL_BYTE_H_FORMAT
3813                 G1PPRL_BYTE_H_FORMAT,
3814                 "type", "address-range",
3815                 "used", "prev-live", "next-live", "gc-eff",
3816                 "remset", "code-roots");
3817   _out->print_cr(G1PPRL_LINE_PREFIX
3818                 G1PPRL_TYPE_H_FORMAT
3819                 G1PPRL_ADDR_BASE_H_FORMAT
3820                 G1PPRL_BYTE_H_FORMAT
3821                 G1PPRL_BYTE_H_FORMAT
3822                 G1PPRL_BYTE_H_FORMAT
3823                 G1PPRL_DOUBLE_H_FORMAT
3824                 G1PPRL_BYTE_H_FORMAT
3825                 G1PPRL_BYTE_H_FORMAT,
3826                 "", "",
3827                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3828                 "(bytes)", "(bytes)");
3829 }
3830 
3831 // It takes as a parameter a reference to one of the _hum_* fields, it
3832 // deduces the corresponding value for a region in a humongous region
3833 // series (either the region size, or what's left if the _hum_* field
3834 // is < the region size), and updates the _hum_* field accordingly.
3835 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3836   size_t bytes = 0;
3837   // The > 0 check is to deal with the prev and next live bytes which
3838   // could be 0.
3839   if (*hum_bytes > 0) {
3840     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3841     *hum_bytes -= bytes;
3842   }
3843   return bytes;
3844 }
3845 
3846 // It deduces the values for a region in a humongous region series
3847 // from the _hum_* fields and updates those accordingly. It assumes
3848 // that that _hum_* fields have already been set up from the "starts
3849 // humongous" region and we visit the regions in address order.
3850 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3851                                                      size_t* capacity_bytes,
3852                                                      size_t* prev_live_bytes,
3853                                                      size_t* next_live_bytes) {
3854   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3855   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3856   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3857   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3858   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3859 }
3860 
3861 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3862   const char* type       = r->get_type_str();
3863   HeapWord* bottom       = r->bottom();
3864   HeapWord* end          = r->end();
3865   size_t capacity_bytes  = r->capacity();
3866   size_t used_bytes      = r->used();
3867   size_t prev_live_bytes = r->live_bytes();
3868   size_t next_live_bytes = r->next_live_bytes();
3869   double gc_eff          = r->gc_efficiency();
3870   size_t remset_bytes    = r->rem_set()->mem_size();
3871   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3872 
3873   if (r->is_starts_humongous()) {
3874     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3875            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3876            "they should have been zeroed after the last time we used them");
3877     // Set up the _hum_* fields.
3878     _hum_capacity_bytes  = capacity_bytes;
3879     _hum_used_bytes      = used_bytes;
3880     _hum_prev_live_bytes = prev_live_bytes;
3881     _hum_next_live_bytes = next_live_bytes;
3882     get_hum_bytes(&used_bytes, &capacity_bytes,
3883                   &prev_live_bytes, &next_live_bytes);
3884     end = bottom + HeapRegion::GrainWords;
3885   } else if (r->is_continues_humongous()) {
3886     get_hum_bytes(&used_bytes, &capacity_bytes,
3887                   &prev_live_bytes, &next_live_bytes);
3888     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3889   }
3890 
3891   _total_used_bytes      += used_bytes;
3892   _total_capacity_bytes  += capacity_bytes;
3893   _total_prev_live_bytes += prev_live_bytes;
3894   _total_next_live_bytes += next_live_bytes;
3895   _total_remset_bytes    += remset_bytes;
3896   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3897 
3898   // Print a line for this particular region.
3899   _out->print_cr(G1PPRL_LINE_PREFIX
3900                  G1PPRL_TYPE_FORMAT
3901                  G1PPRL_ADDR_BASE_FORMAT
3902                  G1PPRL_BYTE_FORMAT
3903                  G1PPRL_BYTE_FORMAT
3904                  G1PPRL_BYTE_FORMAT
3905                  G1PPRL_DOUBLE_FORMAT
3906                  G1PPRL_BYTE_FORMAT
3907                  G1PPRL_BYTE_FORMAT,
3908                  type, p2i(bottom), p2i(end),
3909                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3910                  remset_bytes, strong_code_roots_bytes);
3911 
3912   return false;
3913 }
3914 
3915 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3916   // add static memory usages to remembered set sizes
3917   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3918   // Print the footer of the output.
3919   _out->print_cr(G1PPRL_LINE_PREFIX);
3920   _out->print_cr(G1PPRL_LINE_PREFIX
3921                  " SUMMARY"
3922                  G1PPRL_SUM_MB_FORMAT("capacity")
3923                  G1PPRL_SUM_MB_PERC_FORMAT("used")
3924                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3925                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3926                  G1PPRL_SUM_MB_FORMAT("remset")
3927                  G1PPRL_SUM_MB_FORMAT("code-roots"),
3928                  bytes_to_mb(_total_capacity_bytes),
3929                  bytes_to_mb(_total_used_bytes),
3930                  perc(_total_used_bytes, _total_capacity_bytes),
3931                  bytes_to_mb(_total_prev_live_bytes),
3932                  perc(_total_prev_live_bytes, _total_capacity_bytes),
3933                  bytes_to_mb(_total_next_live_bytes),
3934                  perc(_total_next_live_bytes, _total_capacity_bytes),
3935                  bytes_to_mb(_total_remset_bytes),
3936                  bytes_to_mb(_total_strong_code_roots_bytes));
3937   _out->cr();
3938 }