1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, 1U);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 466   _g1h(g1h),
 467   _markBitMap1(),
 468   _markBitMap2(),
 469   _parallel_marking_threads(0),
 470   _max_parallel_marking_threads(0),
 471   _sleep_factor(0.0),
 472   _marking_task_overhead(1.0),
 473   _cleanup_sleep_factor(0.0),
 474   _cleanup_task_overhead(1.0),
 475   _cleanup_list("Cleanup List"),
 476   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 477   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 478             CardTableModRefBS::card_shift,
 479             false /* in_resource_area*/),
 480 
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483 
 484   _markStack(this),
 485   // _finger set in set_non_marking_state
 486 
 487   _max_worker_id(ParallelGCThreads),
 488   // _active_tasks set in set_non_marking_state
 489   // _tasks set inside the constructor
 490   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 491   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 492 
 493   _has_overflown(false),
 494   _concurrent(false),
 495   _has_aborted(false),
 496   _restart_for_overflow(false),
 497   _concurrent_marking_in_progress(false),
 498 
 499   // _verbose_level set below
 500 
 501   _init_times(),
 502   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 503   _cleanup_times(),
 504   _total_counting_time(0.0),
 505   _total_rs_scrub_time(0.0),
 506 
 507   _parallel_workers(NULL),
 508 
 509   _count_card_bitmaps(NULL),
 510   _count_marked_bytes(NULL),
 511   _completed_initialization(false) {
 512 
 513   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 514   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 515 
 516   // Create & start a ConcurrentMark thread.
 517   _cmThread = new ConcurrentMarkThread(this);
 518   assert(cmThread() != NULL, "CM Thread should have been created");
 519   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 520   if (_cmThread->osthread() == NULL) {
 521       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 522   }
 523 
 524   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 525   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 526   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 527 
 528   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 529   satb_qs.set_buffer_size(G1SATBBufferSize);
 530 
 531   _root_regions.init(_g1h, this);
 532 
 533   if (ConcGCThreads > ParallelGCThreads) {
 534     warning("Can't have more ConcGCThreads (%u) "
 535             "than ParallelGCThreads (%u).",
 536             ConcGCThreads, ParallelGCThreads);
 537     return;
 538   }
 539   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 540     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 541     // if both are set
 542     _sleep_factor             = 0.0;
 543     _marking_task_overhead    = 1.0;
 544   } else if (G1MarkingOverheadPercent > 0) {
 545     // We will calculate the number of parallel marking threads based
 546     // on a target overhead with respect to the soft real-time goal
 547     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 548     double overall_cm_overhead =
 549       (double) MaxGCPauseMillis * marking_overhead /
 550       (double) GCPauseIntervalMillis;
 551     double cpu_ratio = 1.0 / (double) os::processor_count();
 552     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 553     double marking_task_overhead =
 554       overall_cm_overhead / marking_thread_num *
 555                                               (double) os::processor_count();
 556     double sleep_factor =
 557                        (1.0 - marking_task_overhead) / marking_task_overhead;
 558 
 559     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 560     _sleep_factor             = sleep_factor;
 561     _marking_task_overhead    = marking_task_overhead;
 562   } else {
 563     // Calculate the number of parallel marking threads by scaling
 564     // the number of parallel GC threads.
 565     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 566     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 567     _sleep_factor             = 0.0;
 568     _marking_task_overhead    = 1.0;
 569   }
 570 
 571   assert(ConcGCThreads > 0, "Should have been set");
 572   _parallel_marking_threads = ConcGCThreads;
 573   _max_parallel_marking_threads = _parallel_marking_threads;
 574 
 575   if (parallel_marking_threads() > 1) {
 576     _cleanup_task_overhead = 1.0;
 577   } else {
 578     _cleanup_task_overhead = marking_task_overhead();
 579   }
 580   _cleanup_sleep_factor =
 581                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 582 
 583 #if 0
 584   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 585   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 586   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 587   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 588   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 589 #endif
 590 
 591   _parallel_workers = new WorkGang("G1 Marker",
 592        _max_parallel_marking_threads, false, true);
 593   if (_parallel_workers == NULL) {
 594     vm_exit_during_initialization("Failed necessary allocation.");
 595   } else {
 596     _parallel_workers->initialize_workers();
 597   }
 598 
 599   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 600     size_t mark_stack_size =
 601       MIN2(MarkStackSizeMax,
 602           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 603     // Verify that the calculated value for MarkStackSize is in range.
 604     // It would be nice to use the private utility routine from Arguments.
 605     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 606       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 607               "must be between 1 and " SIZE_FORMAT,
 608               mark_stack_size, MarkStackSizeMax);
 609       return;
 610     }
 611     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 612   } else {
 613     // Verify MarkStackSize is in range.
 614     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 615       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 616         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 617           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 618                   "must be between 1 and " SIZE_FORMAT,
 619                   MarkStackSize, MarkStackSizeMax);
 620           return;
 621         }
 622       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 623         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 624           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 625                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 626                   MarkStackSize, MarkStackSizeMax);
 627           return;
 628         }
 629       }
 630     }
 631   }
 632 
 633   if (!_markStack.allocate(MarkStackSize)) {
 634     warning("Failed to allocate CM marking stack");
 635     return;
 636   }
 637 
 638   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 639   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 640 
 641   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 642   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 643 
 644   BitMap::idx_t card_bm_size = _card_bm.size();
 645 
 646   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 647   _active_tasks = _max_worker_id;
 648 
 649   uint max_regions = _g1h->max_regions();
 650   for (uint i = 0; i < _max_worker_id; ++i) {
 651     CMTaskQueue* task_queue = new CMTaskQueue();
 652     task_queue->initialize();
 653     _task_queues->register_queue(i, task_queue);
 654 
 655     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 656     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 657 
 658     _tasks[i] = new CMTask(i, this,
 659                            _count_marked_bytes[i],
 660                            &_count_card_bitmaps[i],
 661                            task_queue, _task_queues);
 662 
 663     _accum_task_vtime[i] = 0.0;
 664   }
 665 
 666   // Calculate the card number for the bottom of the heap. Used
 667   // in biasing indexes into the accounting card bitmaps.
 668   _heap_bottom_card_num =
 669     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 670                                 CardTableModRefBS::card_shift);
 671 
 672   // Clear all the liveness counting data
 673   clear_all_count_data();
 674 
 675   // so that the call below can read a sensible value
 676   _heap_start = g1h->reserved_region().start();
 677   set_non_marking_state();
 678   _completed_initialization = true;
 679 }
 680 
 681 void ConcurrentMark::reset() {
 682   // Starting values for these two. This should be called in a STW
 683   // phase.
 684   MemRegion reserved = _g1h->g1_reserved();
 685   _heap_start = reserved.start();
 686   _heap_end   = reserved.end();
 687 
 688   // Separated the asserts so that we know which one fires.
 689   assert(_heap_start != NULL, "heap bounds should look ok");
 690   assert(_heap_end != NULL, "heap bounds should look ok");
 691   assert(_heap_start < _heap_end, "heap bounds should look ok");
 692 
 693   // Reset all the marking data structures and any necessary flags
 694   reset_marking_state();
 695 
 696   // We do reset all of them, since different phases will use
 697   // different number of active threads. So, it's easiest to have all
 698   // of them ready.
 699   for (uint i = 0; i < _max_worker_id; ++i) {
 700     _tasks[i]->reset(_nextMarkBitMap);
 701   }
 702 
 703   // we need this to make sure that the flag is on during the evac
 704   // pause with initial mark piggy-backed
 705   set_concurrent_marking_in_progress();
 706 }
 707 
 708 
 709 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 710   _markStack.set_should_expand();
 711   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 712   if (clear_overflow) {
 713     clear_has_overflown();
 714   } else {
 715     assert(has_overflown(), "pre-condition");
 716   }
 717   _finger = _heap_start;
 718 
 719   for (uint i = 0; i < _max_worker_id; ++i) {
 720     CMTaskQueue* queue = _task_queues->queue(i);
 721     queue->set_empty();
 722   }
 723 }
 724 
 725 void ConcurrentMark::set_concurrency(uint active_tasks) {
 726   assert(active_tasks <= _max_worker_id, "we should not have more");
 727 
 728   _active_tasks = active_tasks;
 729   // Need to update the three data structures below according to the
 730   // number of active threads for this phase.
 731   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 732   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 733   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 734 }
 735 
 736 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 737   set_concurrency(active_tasks);
 738 
 739   _concurrent = concurrent;
 740   // We propagate this to all tasks, not just the active ones.
 741   for (uint i = 0; i < _max_worker_id; ++i)
 742     _tasks[i]->set_concurrent(concurrent);
 743 
 744   if (concurrent) {
 745     set_concurrent_marking_in_progress();
 746   } else {
 747     // We currently assume that the concurrent flag has been set to
 748     // false before we start remark. At this point we should also be
 749     // in a STW phase.
 750     assert(!concurrent_marking_in_progress(), "invariant");
 751     assert(out_of_regions(),
 752            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 753            p2i(_finger), p2i(_heap_end));
 754   }
 755 }
 756 
 757 void ConcurrentMark::set_non_marking_state() {
 758   // We set the global marking state to some default values when we're
 759   // not doing marking.
 760   reset_marking_state();
 761   _active_tasks = 0;
 762   clear_concurrent_marking_in_progress();
 763 }
 764 
 765 ConcurrentMark::~ConcurrentMark() {
 766   // The ConcurrentMark instance is never freed.
 767   ShouldNotReachHere();
 768 }
 769 
 770 void ConcurrentMark::clearNextBitmap() {
 771   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 772 
 773   // Make sure that the concurrent mark thread looks to still be in
 774   // the current cycle.
 775   guarantee(cmThread()->during_cycle(), "invariant");
 776 
 777   // We are finishing up the current cycle by clearing the next
 778   // marking bitmap and getting it ready for the next cycle. During
 779   // this time no other cycle can start. So, let's make sure that this
 780   // is the case.
 781   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 782 
 783   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 784   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 785   _parallel_workers->run_task(&task);
 786 
 787   // Clear the liveness counting data. If the marking has been aborted, the abort()
 788   // call already did that.
 789   if (cl.complete()) {
 790     clear_all_count_data();
 791   }
 792 
 793   // Repeat the asserts from above.
 794   guarantee(cmThread()->during_cycle(), "invariant");
 795   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 796 }
 797 
 798 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 799   CMBitMap* _bitmap;
 800   bool _error;
 801  public:
 802   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 803   }
 804 
 805   virtual bool doHeapRegion(HeapRegion* r) {
 806     // This closure can be called concurrently to the mutator, so we must make sure
 807     // that the result of the getNextMarkedWordAddress() call is compared to the
 808     // value passed to it as limit to detect any found bits.
 809     HeapWord* end = r->end();
 810     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 811   }
 812 };
 813 
 814 bool ConcurrentMark::nextMarkBitmapIsClear() {
 815   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 816   _g1h->heap_region_iterate(&cl);
 817   return cl.complete();
 818 }
 819 
 820 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 821 public:
 822   bool doHeapRegion(HeapRegion* r) {
 823     r->note_start_of_marking();
 824     return false;
 825   }
 826 };
 827 
 828 void ConcurrentMark::checkpointRootsInitialPre() {
 829   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 830   G1CollectorPolicy* g1p = g1h->g1_policy();
 831 
 832   _has_aborted = false;
 833 
 834   // Initialize marking structures. This has to be done in a STW phase.
 835   reset();
 836 
 837   // For each region note start of marking.
 838   NoteStartOfMarkHRClosure startcl;
 839   g1h->heap_region_iterate(&startcl);
 840 }
 841 
 842 
 843 void ConcurrentMark::checkpointRootsInitialPost() {
 844   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 845 
 846   // If we force an overflow during remark, the remark operation will
 847   // actually abort and we'll restart concurrent marking. If we always
 848   // force an overflow during remark we'll never actually complete the
 849   // marking phase. So, we initialize this here, at the start of the
 850   // cycle, so that at the remaining overflow number will decrease at
 851   // every remark and we'll eventually not need to cause one.
 852   force_overflow_stw()->init();
 853 
 854   // Start Concurrent Marking weak-reference discovery.
 855   ReferenceProcessor* rp = g1h->ref_processor_cm();
 856   // enable ("weak") refs discovery
 857   rp->enable_discovery();
 858   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 859 
 860   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 861   // This is the start of  the marking cycle, we're expected all
 862   // threads to have SATB queues with active set to false.
 863   satb_mq_set.set_active_all_threads(true, /* new active value */
 864                                      false /* expected_active */);
 865 
 866   _root_regions.prepare_for_scan();
 867 
 868   // update_g1_committed() will be called at the end of an evac pause
 869   // when marking is on. So, it's also called at the end of the
 870   // initial-mark pause to update the heap end, if the heap expands
 871   // during it. No need to call it here.
 872 }
 873 
 874 /*
 875  * Notice that in the next two methods, we actually leave the STS
 876  * during the barrier sync and join it immediately afterwards. If we
 877  * do not do this, the following deadlock can occur: one thread could
 878  * be in the barrier sync code, waiting for the other thread to also
 879  * sync up, whereas another one could be trying to yield, while also
 880  * waiting for the other threads to sync up too.
 881  *
 882  * Note, however, that this code is also used during remark and in
 883  * this case we should not attempt to leave / enter the STS, otherwise
 884  * we'll either hit an assert (debug / fastdebug) or deadlock
 885  * (product). So we should only leave / enter the STS if we are
 886  * operating concurrently.
 887  *
 888  * Because the thread that does the sync barrier has left the STS, it
 889  * is possible to be suspended for a Full GC or an evacuation pause
 890  * could occur. This is actually safe, since the entering the sync
 891  * barrier is one of the last things do_marking_step() does, and it
 892  * doesn't manipulate any data structures afterwards.
 893  */
 894 
 895 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 896   bool barrier_aborted;
 897   {
 898     SuspendibleThreadSetLeaver sts_leave(concurrent());
 899     barrier_aborted = !_first_overflow_barrier_sync.enter();
 900   }
 901 
 902   // at this point everyone should have synced up and not be doing any
 903   // more work
 904 
 905   if (barrier_aborted) {
 906     // If the barrier aborted we ignore the overflow condition and
 907     // just abort the whole marking phase as quickly as possible.
 908     return;
 909   }
 910 
 911   // If we're executing the concurrent phase of marking, reset the marking
 912   // state; otherwise the marking state is reset after reference processing,
 913   // during the remark pause.
 914   // If we reset here as a result of an overflow during the remark we will
 915   // see assertion failures from any subsequent set_concurrency_and_phase()
 916   // calls.
 917   if (concurrent()) {
 918     // let the task associated with with worker 0 do this
 919     if (worker_id == 0) {
 920       // task 0 is responsible for clearing the global data structures
 921       // We should be here because of an overflow. During STW we should
 922       // not clear the overflow flag since we rely on it being true when
 923       // we exit this method to abort the pause and restart concurrent
 924       // marking.
 925       reset_marking_state(true /* clear_overflow */);
 926       force_overflow()->update();
 927 
 928       if (G1Log::fine()) {
 929         gclog_or_tty->gclog_stamp();
 930         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 931       }
 932     }
 933   }
 934 
 935   // after this, each task should reset its own data structures then
 936   // then go into the second barrier
 937 }
 938 
 939 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 940   SuspendibleThreadSetLeaver sts_leave(concurrent());
 941   _second_overflow_barrier_sync.enter();
 942 
 943   // at this point everything should be re-initialized and ready to go
 944 }
 945 
 946 #ifndef PRODUCT
 947 void ForceOverflowSettings::init() {
 948   _num_remaining = G1ConcMarkForceOverflow;
 949   _force = false;
 950   update();
 951 }
 952 
 953 void ForceOverflowSettings::update() {
 954   if (_num_remaining > 0) {
 955     _num_remaining -= 1;
 956     _force = true;
 957   } else {
 958     _force = false;
 959   }
 960 }
 961 
 962 bool ForceOverflowSettings::should_force() {
 963   if (_force) {
 964     _force = false;
 965     return true;
 966   } else {
 967     return false;
 968   }
 969 }
 970 #endif // !PRODUCT
 971 
 972 class CMConcurrentMarkingTask: public AbstractGangTask {
 973 private:
 974   ConcurrentMark*       _cm;
 975   ConcurrentMarkThread* _cmt;
 976 
 977 public:
 978   void work(uint worker_id) {
 979     assert(Thread::current()->is_ConcurrentGC_thread(),
 980            "this should only be done by a conc GC thread");
 981     ResourceMark rm;
 982 
 983     double start_vtime = os::elapsedVTime();
 984 
 985     {
 986       SuspendibleThreadSetJoiner sts_join;
 987 
 988       assert(worker_id < _cm->active_tasks(), "invariant");
 989       CMTask* the_task = _cm->task(worker_id);
 990       the_task->record_start_time();
 991       if (!_cm->has_aborted()) {
 992         do {
 993           double start_vtime_sec = os::elapsedVTime();
 994           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 995 
 996           the_task->do_marking_step(mark_step_duration_ms,
 997                                     true  /* do_termination */,
 998                                     false /* is_serial*/);
 999 
1000           double end_vtime_sec = os::elapsedVTime();
1001           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1002           _cm->clear_has_overflown();
1003 
1004           _cm->do_yield_check(worker_id);
1005 
1006           jlong sleep_time_ms;
1007           if (!_cm->has_aborted() && the_task->has_aborted()) {
1008             sleep_time_ms =
1009               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1010             {
1011               SuspendibleThreadSetLeaver sts_leave;
1012               os::sleep(Thread::current(), sleep_time_ms, false);
1013             }
1014           }
1015         } while (!_cm->has_aborted() && the_task->has_aborted());
1016       }
1017       the_task->record_end_time();
1018       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1019     }
1020 
1021     double end_vtime = os::elapsedVTime();
1022     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1023   }
1024 
1025   CMConcurrentMarkingTask(ConcurrentMark* cm,
1026                           ConcurrentMarkThread* cmt) :
1027       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1028 
1029   ~CMConcurrentMarkingTask() { }
1030 };
1031 
1032 // Calculates the number of active workers for a concurrent
1033 // phase.
1034 uint ConcurrentMark::calc_parallel_marking_threads() {
1035   uint n_conc_workers = 0;
1036   if (!UseDynamicNumberOfGCThreads ||
1037       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1038        !ForceDynamicNumberOfGCThreads)) {
1039     n_conc_workers = max_parallel_marking_threads();
1040   } else {
1041     n_conc_workers =
1042       AdaptiveSizePolicy::calc_default_active_workers(
1043                                    max_parallel_marking_threads(),
1044                                    1, /* Minimum workers */
1045                                    parallel_marking_threads(),
1046                                    Threads::number_of_non_daemon_threads());
1047     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1048     // that scaling has already gone into "_max_parallel_marking_threads".
1049   }
1050   assert(n_conc_workers > 0, "Always need at least 1");
1051   return n_conc_workers;
1052 }
1053 
1054 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1055   // Currently, only survivors can be root regions.
1056   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1057   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1058 
1059   const uintx interval = PrefetchScanIntervalInBytes;
1060   HeapWord* curr = hr->bottom();
1061   const HeapWord* end = hr->top();
1062   while (curr < end) {
1063     Prefetch::read(curr, interval);
1064     oop obj = oop(curr);
1065     int size = obj->oop_iterate_size(&cl);
1066     assert(size == obj->size(), "sanity");
1067     curr += size;
1068   }
1069 }
1070 
1071 class CMRootRegionScanTask : public AbstractGangTask {
1072 private:
1073   ConcurrentMark* _cm;
1074 
1075 public:
1076   CMRootRegionScanTask(ConcurrentMark* cm) :
1077     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1078 
1079   void work(uint worker_id) {
1080     assert(Thread::current()->is_ConcurrentGC_thread(),
1081            "this should only be done by a conc GC thread");
1082 
1083     CMRootRegions* root_regions = _cm->root_regions();
1084     HeapRegion* hr = root_regions->claim_next();
1085     while (hr != NULL) {
1086       _cm->scanRootRegion(hr, worker_id);
1087       hr = root_regions->claim_next();
1088     }
1089   }
1090 };
1091 
1092 void ConcurrentMark::scanRootRegions() {
1093   double scan_start = os::elapsedTime();
1094 
1095   // Start of concurrent marking.
1096   ClassLoaderDataGraph::clear_claimed_marks();
1097 
1098   // scan_in_progress() will have been set to true only if there was
1099   // at least one root region to scan. So, if it's false, we
1100   // should not attempt to do any further work.
1101   if (root_regions()->scan_in_progress()) {
1102     if (G1Log::fine()) {
1103       gclog_or_tty->gclog_stamp();
1104       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1105     }
1106 
1107     _parallel_marking_threads = calc_parallel_marking_threads();
1108     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1109            "Maximum number of marking threads exceeded");
1110     uint active_workers = MAX2(1U, parallel_marking_threads());
1111 
1112     CMRootRegionScanTask task(this);
1113     _parallel_workers->set_active_workers(active_workers);
1114     _parallel_workers->run_task(&task);
1115 
1116     if (G1Log::fine()) {
1117       gclog_or_tty->gclog_stamp();
1118       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1119     }
1120 
1121     // It's possible that has_aborted() is true here without actually
1122     // aborting the survivor scan earlier. This is OK as it's
1123     // mainly used for sanity checking.
1124     root_regions()->scan_finished();
1125   }
1126 }
1127 
1128 void ConcurrentMark::markFromRoots() {
1129   // we might be tempted to assert that:
1130   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1131   //        "inconsistent argument?");
1132   // However that wouldn't be right, because it's possible that
1133   // a safepoint is indeed in progress as a younger generation
1134   // stop-the-world GC happens even as we mark in this generation.
1135 
1136   _restart_for_overflow = false;
1137   force_overflow_conc()->init();
1138 
1139   // _g1h has _n_par_threads
1140   _parallel_marking_threads = calc_parallel_marking_threads();
1141   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1142     "Maximum number of marking threads exceeded");
1143 
1144   uint active_workers = MAX2(1U, parallel_marking_threads());
1145   assert(active_workers > 0, "Should have been set");
1146 
1147   // Parallel task terminator is set in "set_concurrency_and_phase()"
1148   set_concurrency_and_phase(active_workers, true /* concurrent */);
1149 
1150   CMConcurrentMarkingTask markingTask(this, cmThread());
1151   _parallel_workers->set_active_workers(active_workers);
1152   _parallel_workers->run_task(&markingTask);
1153   print_stats();
1154 }
1155 
1156 // Helper class to get rid of some boilerplate code.
1157 class G1CMTraceTime : public StackObj {
1158   GCTraceTimeImpl _gc_trace_time;
1159   static bool doit_and_prepend(bool doit) {
1160     if (doit) {
1161       gclog_or_tty->put(' ');
1162     }
1163     return doit;
1164   }
1165 
1166  public:
1167   G1CMTraceTime(const char* title, bool doit)
1168     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1169   }
1170 };
1171 
1172 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1173   // world is stopped at this checkpoint
1174   assert(SafepointSynchronize::is_at_safepoint(),
1175          "world should be stopped");
1176 
1177   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1178 
1179   // If a full collection has happened, we shouldn't do this.
1180   if (has_aborted()) {
1181     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1182     return;
1183   }
1184 
1185   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1186 
1187   if (VerifyDuringGC) {
1188     HandleMark hm;  // handle scope
1189     g1h->prepare_for_verify();
1190     Universe::verify(VerifyOption_G1UsePrevMarking,
1191                      " VerifyDuringGC:(before)");
1192   }
1193   g1h->check_bitmaps("Remark Start");
1194 
1195   G1CollectorPolicy* g1p = g1h->g1_policy();
1196   g1p->record_concurrent_mark_remark_start();
1197 
1198   double start = os::elapsedTime();
1199 
1200   checkpointRootsFinalWork();
1201 
1202   double mark_work_end = os::elapsedTime();
1203 
1204   weakRefsWork(clear_all_soft_refs);
1205 
1206   if (has_overflown()) {
1207     // Oops.  We overflowed.  Restart concurrent marking.
1208     _restart_for_overflow = true;
1209     if (G1TraceMarkStackOverflow) {
1210       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1211     }
1212 
1213     // Verify the heap w.r.t. the previous marking bitmap.
1214     if (VerifyDuringGC) {
1215       HandleMark hm;  // handle scope
1216       g1h->prepare_for_verify();
1217       Universe::verify(VerifyOption_G1UsePrevMarking,
1218                        " VerifyDuringGC:(overflow)");
1219     }
1220 
1221     // Clear the marking state because we will be restarting
1222     // marking due to overflowing the global mark stack.
1223     reset_marking_state();
1224   } else {
1225     {
1226       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1227 
1228       // Aggregate the per-task counting data that we have accumulated
1229       // while marking.
1230       aggregate_count_data();
1231     }
1232 
1233     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1234     // We're done with marking.
1235     // This is the end of  the marking cycle, we're expected all
1236     // threads to have SATB queues with active set to true.
1237     satb_mq_set.set_active_all_threads(false, /* new active value */
1238                                        true /* expected_active */);
1239 
1240     if (VerifyDuringGC) {
1241       HandleMark hm;  // handle scope
1242       g1h->prepare_for_verify();
1243       Universe::verify(VerifyOption_G1UseNextMarking,
1244                        " VerifyDuringGC:(after)");
1245     }
1246     g1h->check_bitmaps("Remark End");
1247     assert(!restart_for_overflow(), "sanity");
1248     // Completely reset the marking state since marking completed
1249     set_non_marking_state();
1250   }
1251 
1252   // Expand the marking stack, if we have to and if we can.
1253   if (_markStack.should_expand()) {
1254     _markStack.expand();
1255   }
1256 
1257   // Statistics
1258   double now = os::elapsedTime();
1259   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1260   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1261   _remark_times.add((now - start) * 1000.0);
1262 
1263   g1p->record_concurrent_mark_remark_end();
1264 
1265   G1CMIsAliveClosure is_alive(g1h);
1266   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1267 }
1268 
1269 // Base class of the closures that finalize and verify the
1270 // liveness counting data.
1271 class CMCountDataClosureBase: public HeapRegionClosure {
1272 protected:
1273   G1CollectedHeap* _g1h;
1274   ConcurrentMark* _cm;
1275   CardTableModRefBS* _ct_bs;
1276 
1277   BitMap* _region_bm;
1278   BitMap* _card_bm;
1279 
1280   // Takes a region that's not empty (i.e., it has at least one
1281   // live object in it and sets its corresponding bit on the region
1282   // bitmap to 1.
1283   void set_bit_for_region(HeapRegion* hr) {
1284     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1285     _region_bm->par_at_put(index, true);
1286   }
1287 
1288 public:
1289   CMCountDataClosureBase(G1CollectedHeap* g1h,
1290                          BitMap* region_bm, BitMap* card_bm):
1291     _g1h(g1h), _cm(g1h->concurrent_mark()),
1292     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1293     _region_bm(region_bm), _card_bm(card_bm) { }
1294 };
1295 
1296 // Closure that calculates the # live objects per region. Used
1297 // for verification purposes during the cleanup pause.
1298 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1299   CMBitMapRO* _bm;
1300   size_t _region_marked_bytes;
1301 
1302 public:
1303   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1304                          BitMap* region_bm, BitMap* card_bm) :
1305     CMCountDataClosureBase(g1h, region_bm, card_bm),
1306     _bm(bm), _region_marked_bytes(0) { }
1307 
1308   bool doHeapRegion(HeapRegion* hr) {
1309     HeapWord* ntams = hr->next_top_at_mark_start();
1310     HeapWord* start = hr->bottom();
1311 
1312     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1313            "Preconditions not met - "
1314            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1315            p2i(start), p2i(ntams), p2i(hr->end()));
1316 
1317     // Find the first marked object at or after "start".
1318     start = _bm->getNextMarkedWordAddress(start, ntams);
1319 
1320     size_t marked_bytes = 0;
1321 
1322     while (start < ntams) {
1323       oop obj = oop(start);
1324       int obj_sz = obj->size();
1325       HeapWord* obj_end = start + obj_sz;
1326 
1327       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1328       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1329 
1330       // Note: if we're looking at the last region in heap - obj_end
1331       // could be actually just beyond the end of the heap; end_idx
1332       // will then correspond to a (non-existent) card that is also
1333       // just beyond the heap.
1334       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1335         // end of object is not card aligned - increment to cover
1336         // all the cards spanned by the object
1337         end_idx += 1;
1338       }
1339 
1340       // Set the bits in the card BM for the cards spanned by this object.
1341       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1342 
1343       // Add the size of this object to the number of marked bytes.
1344       marked_bytes += (size_t)obj_sz * HeapWordSize;
1345 
1346       if (obj_end > hr->end()) {
1347         break;
1348       }
1349       // Find the next marked object after this one.
1350       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1351     }
1352 
1353     // Mark the allocated-since-marking portion...
1354     HeapWord* top = hr->top();
1355     if (ntams < top) {
1356       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1357       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1358 
1359       // Note: if we're looking at the last region in heap - top
1360       // could be actually just beyond the end of the heap; end_idx
1361       // will then correspond to a (non-existent) card that is also
1362       // just beyond the heap.
1363       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1364         // end of object is not card aligned - increment to cover
1365         // all the cards spanned by the object
1366         end_idx += 1;
1367       }
1368       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1369 
1370       // This definitely means the region has live objects.
1371       set_bit_for_region(hr);
1372     }
1373 
1374     // Update the live region bitmap.
1375     if (marked_bytes > 0) {
1376       set_bit_for_region(hr);
1377     }
1378 
1379     // Set the marked bytes for the current region so that
1380     // it can be queried by a calling verification routine
1381     _region_marked_bytes = marked_bytes;
1382 
1383     return false;
1384   }
1385 
1386   size_t region_marked_bytes() const { return _region_marked_bytes; }
1387 };
1388 
1389 // Heap region closure used for verifying the counting data
1390 // that was accumulated concurrently and aggregated during
1391 // the remark pause. This closure is applied to the heap
1392 // regions during the STW cleanup pause.
1393 
1394 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1395   G1CollectedHeap* _g1h;
1396   ConcurrentMark* _cm;
1397   CalcLiveObjectsClosure _calc_cl;
1398   BitMap* _region_bm;   // Region BM to be verified
1399   BitMap* _card_bm;     // Card BM to be verified
1400 
1401   BitMap* _exp_region_bm; // Expected Region BM values
1402   BitMap* _exp_card_bm;   // Expected card BM values
1403 
1404   int _failures;
1405 
1406 public:
1407   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1408                                 BitMap* region_bm,
1409                                 BitMap* card_bm,
1410                                 BitMap* exp_region_bm,
1411                                 BitMap* exp_card_bm) :
1412     _g1h(g1h), _cm(g1h->concurrent_mark()),
1413     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1414     _region_bm(region_bm), _card_bm(card_bm),
1415     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1416     _failures(0) { }
1417 
1418   int failures() const { return _failures; }
1419 
1420   bool doHeapRegion(HeapRegion* hr) {
1421     int failures = 0;
1422 
1423     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1424     // this region and set the corresponding bits in the expected region
1425     // and card bitmaps.
1426     bool res = _calc_cl.doHeapRegion(hr);
1427     assert(res == false, "should be continuing");
1428 
1429     // Verify the marked bytes for this region.
1430     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1431     size_t act_marked_bytes = hr->next_marked_bytes();
1432 
1433     // We're not OK if expected marked bytes > actual marked bytes. It means
1434     // we have missed accounting some objects during the actual marking.
1435     if (exp_marked_bytes > act_marked_bytes) {
1436       failures += 1;
1437     }
1438 
1439     // Verify the bit, for this region, in the actual and expected
1440     // (which was just calculated) region bit maps.
1441     // We're not OK if the bit in the calculated expected region
1442     // bitmap is set and the bit in the actual region bitmap is not.
1443     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1444 
1445     bool expected = _exp_region_bm->at(index);
1446     bool actual = _region_bm->at(index);
1447     if (expected && !actual) {
1448       failures += 1;
1449     }
1450 
1451     // Verify that the card bit maps for the cards spanned by the current
1452     // region match. We have an error if we have a set bit in the expected
1453     // bit map and the corresponding bit in the actual bitmap is not set.
1454 
1455     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1456     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1457 
1458     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1459       expected = _exp_card_bm->at(i);
1460       actual = _card_bm->at(i);
1461 
1462       if (expected && !actual) {
1463         failures += 1;
1464       }
1465     }
1466 
1467     _failures += failures;
1468 
1469     // We could stop iteration over the heap when we
1470     // find the first violating region by returning true.
1471     return false;
1472   }
1473 };
1474 
1475 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1476 protected:
1477   G1CollectedHeap* _g1h;
1478   ConcurrentMark* _cm;
1479   BitMap* _actual_region_bm;
1480   BitMap* _actual_card_bm;
1481 
1482   uint    _n_workers;
1483 
1484   BitMap* _expected_region_bm;
1485   BitMap* _expected_card_bm;
1486 
1487   int  _failures;
1488 
1489   HeapRegionClaimer _hrclaimer;
1490 
1491 public:
1492   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1493                             BitMap* region_bm, BitMap* card_bm,
1494                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1495     : AbstractGangTask("G1 verify final counting"),
1496       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1497       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1498       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1499       _failures(0),
1500       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1501     assert(VerifyDuringGC, "don't call this otherwise");
1502     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1503     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1504   }
1505 
1506   void work(uint worker_id) {
1507     assert(worker_id < _n_workers, "invariant");
1508 
1509     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1510                                             _actual_region_bm, _actual_card_bm,
1511                                             _expected_region_bm,
1512                                             _expected_card_bm);
1513 
1514     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1515 
1516     Atomic::add(verify_cl.failures(), &_failures);
1517   }
1518 
1519   int failures() const { return _failures; }
1520 };
1521 
1522 // Closure that finalizes the liveness counting data.
1523 // Used during the cleanup pause.
1524 // Sets the bits corresponding to the interval [NTAMS, top]
1525 // (which contains the implicitly live objects) in the
1526 // card liveness bitmap. Also sets the bit for each region,
1527 // containing live data, in the region liveness bitmap.
1528 
1529 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1530  public:
1531   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1532                               BitMap* region_bm,
1533                               BitMap* card_bm) :
1534     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1535 
1536   bool doHeapRegion(HeapRegion* hr) {
1537     HeapWord* ntams = hr->next_top_at_mark_start();
1538     HeapWord* top   = hr->top();
1539 
1540     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1541 
1542     // Mark the allocated-since-marking portion...
1543     if (ntams < top) {
1544       // This definitely means the region has live objects.
1545       set_bit_for_region(hr);
1546 
1547       // Now set the bits in the card bitmap for [ntams, top)
1548       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1549       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1550 
1551       // Note: if we're looking at the last region in heap - top
1552       // could be actually just beyond the end of the heap; end_idx
1553       // will then correspond to a (non-existent) card that is also
1554       // just beyond the heap.
1555       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1556         // end of object is not card aligned - increment to cover
1557         // all the cards spanned by the object
1558         end_idx += 1;
1559       }
1560 
1561       assert(end_idx <= _card_bm->size(),
1562              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1563              end_idx, _card_bm->size());
1564       assert(start_idx < _card_bm->size(),
1565              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1566              start_idx, _card_bm->size());
1567 
1568       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1569     }
1570 
1571     // Set the bit for the region if it contains live data
1572     if (hr->next_marked_bytes() > 0) {
1573       set_bit_for_region(hr);
1574     }
1575 
1576     return false;
1577   }
1578 };
1579 
1580 class G1ParFinalCountTask: public AbstractGangTask {
1581 protected:
1582   G1CollectedHeap* _g1h;
1583   ConcurrentMark* _cm;
1584   BitMap* _actual_region_bm;
1585   BitMap* _actual_card_bm;
1586 
1587   uint    _n_workers;
1588   HeapRegionClaimer _hrclaimer;
1589 
1590 public:
1591   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1592     : AbstractGangTask("G1 final counting"),
1593       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1594       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1595       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1596   }
1597 
1598   void work(uint worker_id) {
1599     assert(worker_id < _n_workers, "invariant");
1600 
1601     FinalCountDataUpdateClosure final_update_cl(_g1h,
1602                                                 _actual_region_bm,
1603                                                 _actual_card_bm);
1604 
1605     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1606   }
1607 };
1608 
1609 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1610   G1CollectedHeap* _g1;
1611   size_t _freed_bytes;
1612   FreeRegionList* _local_cleanup_list;
1613   HeapRegionSetCount _old_regions_removed;
1614   HeapRegionSetCount _humongous_regions_removed;
1615   HRRSCleanupTask* _hrrs_cleanup_task;
1616 
1617 public:
1618   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1619                              FreeRegionList* local_cleanup_list,
1620                              HRRSCleanupTask* hrrs_cleanup_task) :
1621     _g1(g1),
1622     _freed_bytes(0),
1623     _local_cleanup_list(local_cleanup_list),
1624     _old_regions_removed(),
1625     _humongous_regions_removed(),
1626     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1627 
1628   size_t freed_bytes() { return _freed_bytes; }
1629   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1630   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1631 
1632   bool doHeapRegion(HeapRegion *hr) {
1633     if (hr->is_archive()) {
1634       return false;
1635     }
1636     // We use a claim value of zero here because all regions
1637     // were claimed with value 1 in the FinalCount task.
1638     _g1->reset_gc_time_stamps(hr);
1639     hr->note_end_of_marking();
1640 
1641     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1642       _freed_bytes += hr->used();
1643       hr->set_containing_set(NULL);
1644       if (hr->is_humongous()) {
1645         _humongous_regions_removed.increment(1u, hr->capacity());
1646         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1647       } else {
1648         _old_regions_removed.increment(1u, hr->capacity());
1649         _g1->free_region(hr, _local_cleanup_list, true);
1650       }
1651     } else {
1652       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1653     }
1654 
1655     return false;
1656   }
1657 };
1658 
1659 class G1ParNoteEndTask: public AbstractGangTask {
1660   friend class G1NoteEndOfConcMarkClosure;
1661 
1662 protected:
1663   G1CollectedHeap* _g1h;
1664   FreeRegionList* _cleanup_list;
1665   HeapRegionClaimer _hrclaimer;
1666 
1667 public:
1668   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1669       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1670   }
1671 
1672   void work(uint worker_id) {
1673     FreeRegionList local_cleanup_list("Local Cleanup List");
1674     HRRSCleanupTask hrrs_cleanup_task;
1675     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1676                                            &hrrs_cleanup_task);
1677     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1678     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1679 
1680     // Now update the lists
1681     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1682     {
1683       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1684       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1685 
1686       // If we iterate over the global cleanup list at the end of
1687       // cleanup to do this printing we will not guarantee to only
1688       // generate output for the newly-reclaimed regions (the list
1689       // might not be empty at the beginning of cleanup; we might
1690       // still be working on its previous contents). So we do the
1691       // printing here, before we append the new regions to the global
1692       // cleanup list.
1693 
1694       G1HRPrinter* hr_printer = _g1h->hr_printer();
1695       if (hr_printer->is_active()) {
1696         FreeRegionListIterator iter(&local_cleanup_list);
1697         while (iter.more_available()) {
1698           HeapRegion* hr = iter.get_next();
1699           hr_printer->cleanup(hr);
1700         }
1701       }
1702 
1703       _cleanup_list->add_ordered(&local_cleanup_list);
1704       assert(local_cleanup_list.is_empty(), "post-condition");
1705 
1706       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1707     }
1708   }
1709 };
1710 
1711 class G1ParScrubRemSetTask: public AbstractGangTask {
1712 protected:
1713   G1RemSet* _g1rs;
1714   BitMap* _region_bm;
1715   BitMap* _card_bm;
1716   HeapRegionClaimer _hrclaimer;
1717 
1718 public:
1719   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1720       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1721   }
1722 
1723   void work(uint worker_id) {
1724     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1725   }
1726 
1727 };
1728 
1729 void ConcurrentMark::cleanup() {
1730   // world is stopped at this checkpoint
1731   assert(SafepointSynchronize::is_at_safepoint(),
1732          "world should be stopped");
1733   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1734 
1735   // If a full collection has happened, we shouldn't do this.
1736   if (has_aborted()) {
1737     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1738     return;
1739   }
1740 
1741   g1h->verify_region_sets_optional();
1742 
1743   if (VerifyDuringGC) {
1744     HandleMark hm;  // handle scope
1745     g1h->prepare_for_verify();
1746     Universe::verify(VerifyOption_G1UsePrevMarking,
1747                      " VerifyDuringGC:(before)");
1748   }
1749   g1h->check_bitmaps("Cleanup Start");
1750 
1751   G1CollectorPolicy* g1p = g1h->g1_policy();
1752   g1p->record_concurrent_mark_cleanup_start();
1753 
1754   double start = os::elapsedTime();
1755 
1756   HeapRegionRemSet::reset_for_cleanup_tasks();
1757 
1758   // Do counting once more with the world stopped for good measure.
1759   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1760 
1761   g1h->workers()->run_task(&g1_par_count_task);
1762 
1763   if (VerifyDuringGC) {
1764     // Verify that the counting data accumulated during marking matches
1765     // that calculated by walking the marking bitmap.
1766 
1767     // Bitmaps to hold expected values
1768     BitMap expected_region_bm(_region_bm.size(), true);
1769     BitMap expected_card_bm(_card_bm.size(), true);
1770 
1771     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1772                                                  &_region_bm,
1773                                                  &_card_bm,
1774                                                  &expected_region_bm,
1775                                                  &expected_card_bm);
1776 
1777     g1h->workers()->run_task(&g1_par_verify_task);
1778 
1779     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1780   }
1781 
1782   size_t start_used_bytes = g1h->used();
1783   g1h->collector_state()->set_mark_in_progress(false);
1784 
1785   double count_end = os::elapsedTime();
1786   double this_final_counting_time = (count_end - start);
1787   _total_counting_time += this_final_counting_time;
1788 
1789   if (G1PrintRegionLivenessInfo) {
1790     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1791     _g1h->heap_region_iterate(&cl);
1792   }
1793 
1794   // Install newly created mark bitMap as "prev".
1795   swapMarkBitMaps();
1796 
1797   g1h->reset_gc_time_stamp();
1798 
1799   uint n_workers = _g1h->workers()->active_workers();
1800 
1801   // Note end of marking in all heap regions.
1802   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1803   g1h->workers()->run_task(&g1_par_note_end_task);
1804   g1h->check_gc_time_stamps();
1805 
1806   if (!cleanup_list_is_empty()) {
1807     // The cleanup list is not empty, so we'll have to process it
1808     // concurrently. Notify anyone else that might be wanting free
1809     // regions that there will be more free regions coming soon.
1810     g1h->set_free_regions_coming();
1811   }
1812 
1813   // call below, since it affects the metric by which we sort the heap
1814   // regions.
1815   if (G1ScrubRemSets) {
1816     double rs_scrub_start = os::elapsedTime();
1817     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1818     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1819 
1820     double rs_scrub_end = os::elapsedTime();
1821     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1822     _total_rs_scrub_time += this_rs_scrub_time;
1823   }
1824 
1825   // this will also free any regions totally full of garbage objects,
1826   // and sort the regions.
1827   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1828 
1829   // Statistics.
1830   double end = os::elapsedTime();
1831   _cleanup_times.add((end - start) * 1000.0);
1832 
1833   if (G1Log::fine()) {
1834     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1835   }
1836 
1837   // Clean up will have freed any regions completely full of garbage.
1838   // Update the soft reference policy with the new heap occupancy.
1839   Universe::update_heap_info_at_gc();
1840 
1841   if (VerifyDuringGC) {
1842     HandleMark hm;  // handle scope
1843     g1h->prepare_for_verify();
1844     Universe::verify(VerifyOption_G1UsePrevMarking,
1845                      " VerifyDuringGC:(after)");
1846   }
1847 
1848   g1h->check_bitmaps("Cleanup End");
1849 
1850   g1h->verify_region_sets_optional();
1851 
1852   // We need to make this be a "collection" so any collection pause that
1853   // races with it goes around and waits for completeCleanup to finish.
1854   g1h->increment_total_collections();
1855 
1856   // Clean out dead classes and update Metaspace sizes.
1857   if (ClassUnloadingWithConcurrentMark) {
1858     ClassLoaderDataGraph::purge();
1859   }
1860   MetaspaceGC::compute_new_size();
1861 
1862   // We reclaimed old regions so we should calculate the sizes to make
1863   // sure we update the old gen/space data.
1864   g1h->g1mm()->update_sizes();
1865   g1h->allocation_context_stats().update_after_mark();
1866 
1867   g1h->trace_heap_after_concurrent_cycle();
1868 }
1869 
1870 void ConcurrentMark::completeCleanup() {
1871   if (has_aborted()) return;
1872 
1873   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1874 
1875   _cleanup_list.verify_optional();
1876   FreeRegionList tmp_free_list("Tmp Free List");
1877 
1878   if (G1ConcRegionFreeingVerbose) {
1879     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1880                            "cleanup list has %u entries",
1881                            _cleanup_list.length());
1882   }
1883 
1884   // No one else should be accessing the _cleanup_list at this point,
1885   // so it is not necessary to take any locks
1886   while (!_cleanup_list.is_empty()) {
1887     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1888     assert(hr != NULL, "Got NULL from a non-empty list");
1889     hr->par_clear();
1890     tmp_free_list.add_ordered(hr);
1891 
1892     // Instead of adding one region at a time to the secondary_free_list,
1893     // we accumulate them in the local list and move them a few at a
1894     // time. This also cuts down on the number of notify_all() calls
1895     // we do during this process. We'll also append the local list when
1896     // _cleanup_list is empty (which means we just removed the last
1897     // region from the _cleanup_list).
1898     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1899         _cleanup_list.is_empty()) {
1900       if (G1ConcRegionFreeingVerbose) {
1901         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1902                                "appending %u entries to the secondary_free_list, "
1903                                "cleanup list still has %u entries",
1904                                tmp_free_list.length(),
1905                                _cleanup_list.length());
1906       }
1907 
1908       {
1909         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1910         g1h->secondary_free_list_add(&tmp_free_list);
1911         SecondaryFreeList_lock->notify_all();
1912       }
1913 #ifndef PRODUCT
1914       if (G1StressConcRegionFreeing) {
1915         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1916           os::sleep(Thread::current(), (jlong) 1, false);
1917         }
1918       }
1919 #endif
1920     }
1921   }
1922   assert(tmp_free_list.is_empty(), "post-condition");
1923 }
1924 
1925 // Supporting Object and Oop closures for reference discovery
1926 // and processing in during marking
1927 
1928 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1929   HeapWord* addr = (HeapWord*)obj;
1930   return addr != NULL &&
1931          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1932 }
1933 
1934 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1935 // Uses the CMTask associated with a worker thread (for serial reference
1936 // processing the CMTask for worker 0 is used) to preserve (mark) and
1937 // trace referent objects.
1938 //
1939 // Using the CMTask and embedded local queues avoids having the worker
1940 // threads operating on the global mark stack. This reduces the risk
1941 // of overflowing the stack - which we would rather avoid at this late
1942 // state. Also using the tasks' local queues removes the potential
1943 // of the workers interfering with each other that could occur if
1944 // operating on the global stack.
1945 
1946 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1947   ConcurrentMark* _cm;
1948   CMTask*         _task;
1949   int             _ref_counter_limit;
1950   int             _ref_counter;
1951   bool            _is_serial;
1952  public:
1953   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1954     _cm(cm), _task(task), _is_serial(is_serial),
1955     _ref_counter_limit(G1RefProcDrainInterval) {
1956     assert(_ref_counter_limit > 0, "sanity");
1957     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1958     _ref_counter = _ref_counter_limit;
1959   }
1960 
1961   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1962   virtual void do_oop(      oop* p) { do_oop_work(p); }
1963 
1964   template <class T> void do_oop_work(T* p) {
1965     if (!_cm->has_overflown()) {
1966       oop obj = oopDesc::load_decode_heap_oop(p);
1967       _task->deal_with_reference(obj);
1968       _ref_counter--;
1969 
1970       if (_ref_counter == 0) {
1971         // We have dealt with _ref_counter_limit references, pushing them
1972         // and objects reachable from them on to the local stack (and
1973         // possibly the global stack). Call CMTask::do_marking_step() to
1974         // process these entries.
1975         //
1976         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1977         // there's nothing more to do (i.e. we're done with the entries that
1978         // were pushed as a result of the CMTask::deal_with_reference() calls
1979         // above) or we overflow.
1980         //
1981         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1982         // flag while there may still be some work to do. (See the comment at
1983         // the beginning of CMTask::do_marking_step() for those conditions -
1984         // one of which is reaching the specified time target.) It is only
1985         // when CMTask::do_marking_step() returns without setting the
1986         // has_aborted() flag that the marking step has completed.
1987         do {
1988           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1989           _task->do_marking_step(mark_step_duration_ms,
1990                                  false      /* do_termination */,
1991                                  _is_serial);
1992         } while (_task->has_aborted() && !_cm->has_overflown());
1993         _ref_counter = _ref_counter_limit;
1994       }
1995     }
1996   }
1997 };
1998 
1999 // 'Drain' oop closure used by both serial and parallel reference processing.
2000 // Uses the CMTask associated with a given worker thread (for serial
2001 // reference processing the CMtask for worker 0 is used). Calls the
2002 // do_marking_step routine, with an unbelievably large timeout value,
2003 // to drain the marking data structures of the remaining entries
2004 // added by the 'keep alive' oop closure above.
2005 
2006 class G1CMDrainMarkingStackClosure: public VoidClosure {
2007   ConcurrentMark* _cm;
2008   CMTask*         _task;
2009   bool            _is_serial;
2010  public:
2011   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2012     _cm(cm), _task(task), _is_serial(is_serial) {
2013     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2014   }
2015 
2016   void do_void() {
2017     do {
2018       // We call CMTask::do_marking_step() to completely drain the local
2019       // and global marking stacks of entries pushed by the 'keep alive'
2020       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2021       //
2022       // CMTask::do_marking_step() is called in a loop, which we'll exit
2023       // if there's nothing more to do (i.e. we've completely drained the
2024       // entries that were pushed as a a result of applying the 'keep alive'
2025       // closure to the entries on the discovered ref lists) or we overflow
2026       // the global marking stack.
2027       //
2028       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2029       // flag while there may still be some work to do. (See the comment at
2030       // the beginning of CMTask::do_marking_step() for those conditions -
2031       // one of which is reaching the specified time target.) It is only
2032       // when CMTask::do_marking_step() returns without setting the
2033       // has_aborted() flag that the marking step has completed.
2034 
2035       _task->do_marking_step(1000000000.0 /* something very large */,
2036                              true         /* do_termination */,
2037                              _is_serial);
2038     } while (_task->has_aborted() && !_cm->has_overflown());
2039   }
2040 };
2041 
2042 // Implementation of AbstractRefProcTaskExecutor for parallel
2043 // reference processing at the end of G1 concurrent marking
2044 
2045 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2046 private:
2047   G1CollectedHeap* _g1h;
2048   ConcurrentMark*  _cm;
2049   WorkGang*        _workers;
2050   uint             _active_workers;
2051 
2052 public:
2053   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2054                           ConcurrentMark* cm,
2055                           WorkGang* workers,
2056                           uint n_workers) :
2057     _g1h(g1h), _cm(cm),
2058     _workers(workers), _active_workers(n_workers) { }
2059 
2060   // Executes the given task using concurrent marking worker threads.
2061   virtual void execute(ProcessTask& task);
2062   virtual void execute(EnqueueTask& task);
2063 };
2064 
2065 class G1CMRefProcTaskProxy: public AbstractGangTask {
2066   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2067   ProcessTask&     _proc_task;
2068   G1CollectedHeap* _g1h;
2069   ConcurrentMark*  _cm;
2070 
2071 public:
2072   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2073                      G1CollectedHeap* g1h,
2074                      ConcurrentMark* cm) :
2075     AbstractGangTask("Process reference objects in parallel"),
2076     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2077     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2078     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2079   }
2080 
2081   virtual void work(uint worker_id) {
2082     ResourceMark rm;
2083     HandleMark hm;
2084     CMTask* task = _cm->task(worker_id);
2085     G1CMIsAliveClosure g1_is_alive(_g1h);
2086     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2087     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2088 
2089     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2090   }
2091 };
2092 
2093 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2094   assert(_workers != NULL, "Need parallel worker threads.");
2095   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2096 
2097   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2098 
2099   // We need to reset the concurrency level before each
2100   // proxy task execution, so that the termination protocol
2101   // and overflow handling in CMTask::do_marking_step() knows
2102   // how many workers to wait for.
2103   _cm->set_concurrency(_active_workers);
2104   _workers->run_task(&proc_task_proxy);
2105 }
2106 
2107 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2108   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2109   EnqueueTask& _enq_task;
2110 
2111 public:
2112   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2113     AbstractGangTask("Enqueue reference objects in parallel"),
2114     _enq_task(enq_task) { }
2115 
2116   virtual void work(uint worker_id) {
2117     _enq_task.work(worker_id);
2118   }
2119 };
2120 
2121 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2122   assert(_workers != NULL, "Need parallel worker threads.");
2123   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2124 
2125   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2126 
2127   // Not strictly necessary but...
2128   //
2129   // We need to reset the concurrency level before each
2130   // proxy task execution, so that the termination protocol
2131   // and overflow handling in CMTask::do_marking_step() knows
2132   // how many workers to wait for.
2133   _cm->set_concurrency(_active_workers);
2134   _workers->run_task(&enq_task_proxy);
2135 }
2136 
2137 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2138   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2139 }
2140 
2141 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2142   if (has_overflown()) {
2143     // Skip processing the discovered references if we have
2144     // overflown the global marking stack. Reference objects
2145     // only get discovered once so it is OK to not
2146     // de-populate the discovered reference lists. We could have,
2147     // but the only benefit would be that, when marking restarts,
2148     // less reference objects are discovered.
2149     return;
2150   }
2151 
2152   ResourceMark rm;
2153   HandleMark   hm;
2154 
2155   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2156 
2157   // Is alive closure.
2158   G1CMIsAliveClosure g1_is_alive(g1h);
2159 
2160   // Inner scope to exclude the cleaning of the string and symbol
2161   // tables from the displayed time.
2162   {
2163     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2164 
2165     ReferenceProcessor* rp = g1h->ref_processor_cm();
2166 
2167     // See the comment in G1CollectedHeap::ref_processing_init()
2168     // about how reference processing currently works in G1.
2169 
2170     // Set the soft reference policy
2171     rp->setup_policy(clear_all_soft_refs);
2172     assert(_markStack.isEmpty(), "mark stack should be empty");
2173 
2174     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2175     // in serial reference processing. Note these closures are also
2176     // used for serially processing (by the the current thread) the
2177     // JNI references during parallel reference processing.
2178     //
2179     // These closures do not need to synchronize with the worker
2180     // threads involved in parallel reference processing as these
2181     // instances are executed serially by the current thread (e.g.
2182     // reference processing is not multi-threaded and is thus
2183     // performed by the current thread instead of a gang worker).
2184     //
2185     // The gang tasks involved in parallel reference processing create
2186     // their own instances of these closures, which do their own
2187     // synchronization among themselves.
2188     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2189     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2190 
2191     // We need at least one active thread. If reference processing
2192     // is not multi-threaded we use the current (VMThread) thread,
2193     // otherwise we use the work gang from the G1CollectedHeap and
2194     // we utilize all the worker threads we can.
2195     bool processing_is_mt = rp->processing_is_mt();
2196     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2197     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2198 
2199     // Parallel processing task executor.
2200     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2201                                               g1h->workers(), active_workers);
2202     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2203 
2204     // Set the concurrency level. The phase was already set prior to
2205     // executing the remark task.
2206     set_concurrency(active_workers);
2207 
2208     // Set the degree of MT processing here.  If the discovery was done MT,
2209     // the number of threads involved during discovery could differ from
2210     // the number of active workers.  This is OK as long as the discovered
2211     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2212     rp->set_active_mt_degree(active_workers);
2213 
2214     // Process the weak references.
2215     const ReferenceProcessorStats& stats =
2216         rp->process_discovered_references(&g1_is_alive,
2217                                           &g1_keep_alive,
2218                                           &g1_drain_mark_stack,
2219                                           executor,
2220                                           g1h->gc_timer_cm());
2221     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2222 
2223     // The do_oop work routines of the keep_alive and drain_marking_stack
2224     // oop closures will set the has_overflown flag if we overflow the
2225     // global marking stack.
2226 
2227     assert(_markStack.overflow() || _markStack.isEmpty(),
2228             "mark stack should be empty (unless it overflowed)");
2229 
2230     if (_markStack.overflow()) {
2231       // This should have been done already when we tried to push an
2232       // entry on to the global mark stack. But let's do it again.
2233       set_has_overflown();
2234     }
2235 
2236     assert(rp->num_q() == active_workers, "why not");
2237 
2238     rp->enqueue_discovered_references(executor);
2239 
2240     rp->verify_no_references_recorded();
2241     assert(!rp->discovery_enabled(), "Post condition");
2242   }
2243 
2244   if (has_overflown()) {
2245     // We can not trust g1_is_alive if the marking stack overflowed
2246     return;
2247   }
2248 
2249   assert(_markStack.isEmpty(), "Marking should have completed");
2250 
2251   // Unload Klasses, String, Symbols, Code Cache, etc.
2252   {
2253     G1CMTraceTime trace("Unloading", G1Log::finer());
2254 
2255     if (ClassUnloadingWithConcurrentMark) {
2256       bool purged_classes;
2257 
2258       {
2259         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2260         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2261       }
2262 
2263       {
2264         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2265         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2266       }
2267     }
2268 
2269     if (G1StringDedup::is_enabled()) {
2270       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2271       G1StringDedup::unlink(&g1_is_alive);
2272     }
2273   }
2274 }
2275 
2276 void ConcurrentMark::swapMarkBitMaps() {
2277   CMBitMapRO* temp = _prevMarkBitMap;
2278   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2279   _nextMarkBitMap  = (CMBitMap*)  temp;
2280 }
2281 
2282 // Closure for marking entries in SATB buffers.
2283 class CMSATBBufferClosure : public SATBBufferClosure {
2284 private:
2285   CMTask* _task;
2286   G1CollectedHeap* _g1h;
2287 
2288   // This is very similar to CMTask::deal_with_reference, but with
2289   // more relaxed requirements for the argument, so this must be more
2290   // circumspect about treating the argument as an object.
2291   void do_entry(void* entry) const {
2292     _task->increment_refs_reached();
2293     HeapRegion* hr = _g1h->heap_region_containing(entry);
2294     if (entry < hr->next_top_at_mark_start()) {
2295       // Until we get here, we don't know whether entry refers to a valid
2296       // object; it could instead have been a stale reference.
2297       oop obj = static_cast<oop>(entry);
2298       assert(obj->is_oop(true /* ignore mark word */),
2299              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2300       _task->make_reference_grey(obj, hr);
2301     }
2302   }
2303 
2304 public:
2305   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2306     : _task(task), _g1h(g1h) { }
2307 
2308   virtual void do_buffer(void** buffer, size_t size) {
2309     for (size_t i = 0; i < size; ++i) {
2310       do_entry(buffer[i]);
2311     }
2312   }
2313 };
2314 
2315 class G1RemarkThreadsClosure : public ThreadClosure {
2316   CMSATBBufferClosure _cm_satb_cl;
2317   G1CMOopClosure _cm_cl;
2318   MarkingCodeBlobClosure _code_cl;
2319   int _thread_parity;
2320 
2321  public:
2322   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2323     _cm_satb_cl(task, g1h),
2324     _cm_cl(g1h, g1h->concurrent_mark(), task),
2325     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2326     _thread_parity(Threads::thread_claim_parity()) {}
2327 
2328   void do_thread(Thread* thread) {
2329     if (thread->is_Java_thread()) {
2330       if (thread->claim_oops_do(true, _thread_parity)) {
2331         JavaThread* jt = (JavaThread*)thread;
2332 
2333         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2334         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2335         // * Alive if on the stack of an executing method
2336         // * Weakly reachable otherwise
2337         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2338         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2339         jt->nmethods_do(&_code_cl);
2340 
2341         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2342       }
2343     } else if (thread->is_VM_thread()) {
2344       if (thread->claim_oops_do(true, _thread_parity)) {
2345         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2346       }
2347     }
2348   }
2349 };
2350 
2351 class CMRemarkTask: public AbstractGangTask {
2352 private:
2353   ConcurrentMark* _cm;
2354 public:
2355   void work(uint worker_id) {
2356     // Since all available tasks are actually started, we should
2357     // only proceed if we're supposed to be active.
2358     if (worker_id < _cm->active_tasks()) {
2359       CMTask* task = _cm->task(worker_id);
2360       task->record_start_time();
2361       {
2362         ResourceMark rm;
2363         HandleMark hm;
2364 
2365         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2366         Threads::threads_do(&threads_f);
2367       }
2368 
2369       do {
2370         task->do_marking_step(1000000000.0 /* something very large */,
2371                               true         /* do_termination       */,
2372                               false        /* is_serial            */);
2373       } while (task->has_aborted() && !_cm->has_overflown());
2374       // If we overflow, then we do not want to restart. We instead
2375       // want to abort remark and do concurrent marking again.
2376       task->record_end_time();
2377     }
2378   }
2379 
2380   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2381     AbstractGangTask("Par Remark"), _cm(cm) {
2382     _cm->terminator()->reset_for_reuse(active_workers);
2383   }
2384 };
2385 
2386 void ConcurrentMark::checkpointRootsFinalWork() {
2387   ResourceMark rm;
2388   HandleMark   hm;
2389   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2390 
2391   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2392 
2393   g1h->ensure_parsability(false);
2394 
2395   // this is remark, so we'll use up all active threads
2396   uint active_workers = g1h->workers()->active_workers();
2397   set_concurrency_and_phase(active_workers, false /* concurrent */);
2398   // Leave _parallel_marking_threads at it's
2399   // value originally calculated in the ConcurrentMark
2400   // constructor and pass values of the active workers
2401   // through the gang in the task.
2402 
2403   {
2404     StrongRootsScope srs(active_workers);
2405 
2406     CMRemarkTask remarkTask(this, active_workers);
2407     // We will start all available threads, even if we decide that the
2408     // active_workers will be fewer. The extra ones will just bail out
2409     // immediately.
2410     g1h->workers()->run_task(&remarkTask);
2411   }
2412 
2413   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2414   guarantee(has_overflown() ||
2415             satb_mq_set.completed_buffers_num() == 0,
2416             "Invariant: has_overflown = %s, num buffers = %d",
2417             BOOL_TO_STR(has_overflown()),
2418             satb_mq_set.completed_buffers_num());
2419 
2420   print_stats();
2421 }
2422 
2423 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2424   // Note we are overriding the read-only view of the prev map here, via
2425   // the cast.
2426   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2427 }
2428 
2429 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2430   _nextMarkBitMap->clearRange(mr);
2431 }
2432 
2433 HeapRegion*
2434 ConcurrentMark::claim_region(uint worker_id) {
2435   // "checkpoint" the finger
2436   HeapWord* finger = _finger;
2437 
2438   // _heap_end will not change underneath our feet; it only changes at
2439   // yield points.
2440   while (finger < _heap_end) {
2441     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2442 
2443     // Note on how this code handles humongous regions. In the
2444     // normal case the finger will reach the start of a "starts
2445     // humongous" (SH) region. Its end will either be the end of the
2446     // last "continues humongous" (CH) region in the sequence, or the
2447     // standard end of the SH region (if the SH is the only region in
2448     // the sequence). That way claim_region() will skip over the CH
2449     // regions. However, there is a subtle race between a CM thread
2450     // executing this method and a mutator thread doing a humongous
2451     // object allocation. The two are not mutually exclusive as the CM
2452     // thread does not need to hold the Heap_lock when it gets
2453     // here. So there is a chance that claim_region() will come across
2454     // a free region that's in the progress of becoming a SH or a CH
2455     // region. In the former case, it will either
2456     //   a) Miss the update to the region's end, in which case it will
2457     //      visit every subsequent CH region, will find their bitmaps
2458     //      empty, and do nothing, or
2459     //   b) Will observe the update of the region's end (in which case
2460     //      it will skip the subsequent CH regions).
2461     // If it comes across a region that suddenly becomes CH, the
2462     // scenario will be similar to b). So, the race between
2463     // claim_region() and a humongous object allocation might force us
2464     // to do a bit of unnecessary work (due to some unnecessary bitmap
2465     // iterations) but it should not introduce and correctness issues.
2466     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2467 
2468     // Above heap_region_containing may return NULL as we always scan claim
2469     // until the end of the heap. In this case, just jump to the next region.
2470     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2471 
2472     // Is the gap between reading the finger and doing the CAS too long?
2473     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2474     if (res == finger && curr_region != NULL) {
2475       // we succeeded
2476       HeapWord*   bottom        = curr_region->bottom();
2477       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2478 
2479       // notice that _finger == end cannot be guaranteed here since,
2480       // someone else might have moved the finger even further
2481       assert(_finger >= end, "the finger should have moved forward");
2482 
2483       if (limit > bottom) {
2484         return curr_region;
2485       } else {
2486         assert(limit == bottom,
2487                "the region limit should be at bottom");
2488         // we return NULL and the caller should try calling
2489         // claim_region() again.
2490         return NULL;
2491       }
2492     } else {
2493       assert(_finger > finger, "the finger should have moved forward");
2494       // read it again
2495       finger = _finger;
2496     }
2497   }
2498 
2499   return NULL;
2500 }
2501 
2502 #ifndef PRODUCT
2503 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2504 private:
2505   G1CollectedHeap* _g1h;
2506   const char* _phase;
2507   int _info;
2508 
2509 public:
2510   VerifyNoCSetOops(const char* phase, int info = -1) :
2511     _g1h(G1CollectedHeap::heap()),
2512     _phase(phase),
2513     _info(info)
2514   { }
2515 
2516   void operator()(oop obj) const {
2517     guarantee(obj->is_oop(),
2518               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2519               p2i(obj), _phase, _info);
2520     guarantee(!_g1h->obj_in_cs(obj),
2521               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2522               p2i(obj), _phase, _info);
2523   }
2524 };
2525 
2526 void ConcurrentMark::verify_no_cset_oops() {
2527   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2528   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2529     return;
2530   }
2531 
2532   // Verify entries on the global mark stack
2533   _markStack.iterate(VerifyNoCSetOops("Stack"));
2534 
2535   // Verify entries on the task queues
2536   for (uint i = 0; i < _max_worker_id; ++i) {
2537     CMTaskQueue* queue = _task_queues->queue(i);
2538     queue->iterate(VerifyNoCSetOops("Queue", i));
2539   }
2540 
2541   // Verify the global finger
2542   HeapWord* global_finger = finger();
2543   if (global_finger != NULL && global_finger < _heap_end) {
2544     // The global finger always points to a heap region boundary. We
2545     // use heap_region_containing() to get the containing region
2546     // given that the global finger could be pointing to a free region
2547     // which subsequently becomes continues humongous. If that
2548     // happens, heap_region_containing() will return the bottom of the
2549     // corresponding starts humongous region and the check below will
2550     // not hold any more.
2551     // Since we always iterate over all regions, we might get a NULL HeapRegion
2552     // here.
2553     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2554     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2555               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2556               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2557   }
2558 
2559   // Verify the task fingers
2560   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2561   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2562     CMTask* task = _tasks[i];
2563     HeapWord* task_finger = task->finger();
2564     if (task_finger != NULL && task_finger < _heap_end) {
2565       // See above note on the global finger verification.
2566       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2567       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2568                 !task_hr->in_collection_set(),
2569                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2570                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2571     }
2572   }
2573 }
2574 #endif // PRODUCT
2575 
2576 // Aggregate the counting data that was constructed concurrently
2577 // with marking.
2578 class AggregateCountDataHRClosure: public HeapRegionClosure {
2579   G1CollectedHeap* _g1h;
2580   ConcurrentMark* _cm;
2581   CardTableModRefBS* _ct_bs;
2582   BitMap* _cm_card_bm;
2583   uint _max_worker_id;
2584 
2585  public:
2586   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2587                               BitMap* cm_card_bm,
2588                               uint max_worker_id) :
2589     _g1h(g1h), _cm(g1h->concurrent_mark()),
2590     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2591     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2592 
2593   bool doHeapRegion(HeapRegion* hr) {
2594     HeapWord* start = hr->bottom();
2595     HeapWord* limit = hr->next_top_at_mark_start();
2596     HeapWord* end = hr->end();
2597 
2598     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2599            "Preconditions not met - "
2600            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2601            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2602            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2603 
2604     assert(hr->next_marked_bytes() == 0, "Precondition");
2605 
2606     if (start == limit) {
2607       // NTAMS of this region has not been set so nothing to do.
2608       return false;
2609     }
2610 
2611     // 'start' should be in the heap.
2612     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2613     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2614     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2615 
2616     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2617     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2618     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2619 
2620     // If ntams is not card aligned then we bump card bitmap index
2621     // for limit so that we get the all the cards spanned by
2622     // the object ending at ntams.
2623     // Note: if this is the last region in the heap then ntams
2624     // could be actually just beyond the end of the the heap;
2625     // limit_idx will then  correspond to a (non-existent) card
2626     // that is also outside the heap.
2627     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2628       limit_idx += 1;
2629     }
2630 
2631     assert(limit_idx <= end_idx, "or else use atomics");
2632 
2633     // Aggregate the "stripe" in the count data associated with hr.
2634     uint hrm_index = hr->hrm_index();
2635     size_t marked_bytes = 0;
2636 
2637     for (uint i = 0; i < _max_worker_id; i += 1) {
2638       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2639       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2640 
2641       // Fetch the marked_bytes in this region for task i and
2642       // add it to the running total for this region.
2643       marked_bytes += marked_bytes_array[hrm_index];
2644 
2645       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2646       // into the global card bitmap.
2647       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2648 
2649       while (scan_idx < limit_idx) {
2650         assert(task_card_bm->at(scan_idx) == true, "should be");
2651         _cm_card_bm->set_bit(scan_idx);
2652         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2653 
2654         // BitMap::get_next_one_offset() can handle the case when
2655         // its left_offset parameter is greater than its right_offset
2656         // parameter. It does, however, have an early exit if
2657         // left_offset == right_offset. So let's limit the value
2658         // passed in for left offset here.
2659         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2660         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2661       }
2662     }
2663 
2664     // Update the marked bytes for this region.
2665     hr->add_to_marked_bytes(marked_bytes);
2666 
2667     // Next heap region
2668     return false;
2669   }
2670 };
2671 
2672 class G1AggregateCountDataTask: public AbstractGangTask {
2673 protected:
2674   G1CollectedHeap* _g1h;
2675   ConcurrentMark* _cm;
2676   BitMap* _cm_card_bm;
2677   uint _max_worker_id;
2678   uint _active_workers;
2679   HeapRegionClaimer _hrclaimer;
2680 
2681 public:
2682   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2683                            ConcurrentMark* cm,
2684                            BitMap* cm_card_bm,
2685                            uint max_worker_id,
2686                            uint n_workers) :
2687       AbstractGangTask("Count Aggregation"),
2688       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2689       _max_worker_id(max_worker_id),
2690       _active_workers(n_workers),
2691       _hrclaimer(_active_workers) {
2692   }
2693 
2694   void work(uint worker_id) {
2695     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2696 
2697     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2698   }
2699 };
2700 
2701 
2702 void ConcurrentMark::aggregate_count_data() {
2703   uint n_workers = _g1h->workers()->active_workers();
2704 
2705   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2706                                            _max_worker_id, n_workers);
2707 
2708   _g1h->workers()->run_task(&g1_par_agg_task);
2709 }
2710 
2711 // Clear the per-worker arrays used to store the per-region counting data
2712 void ConcurrentMark::clear_all_count_data() {
2713   // Clear the global card bitmap - it will be filled during
2714   // liveness count aggregation (during remark) and the
2715   // final counting task.
2716   _card_bm.clear();
2717 
2718   // Clear the global region bitmap - it will be filled as part
2719   // of the final counting task.
2720   _region_bm.clear();
2721 
2722   uint max_regions = _g1h->max_regions();
2723   assert(_max_worker_id > 0, "uninitialized");
2724 
2725   for (uint i = 0; i < _max_worker_id; i += 1) {
2726     BitMap* task_card_bm = count_card_bitmap_for(i);
2727     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2728 
2729     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2730     assert(marked_bytes_array != NULL, "uninitialized");
2731 
2732     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2733     task_card_bm->clear();
2734   }
2735 }
2736 
2737 void ConcurrentMark::print_stats() {
2738   if (G1MarkingVerboseLevel > 0) {
2739     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2740     for (size_t i = 0; i < _active_tasks; ++i) {
2741       _tasks[i]->print_stats();
2742       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2743     }
2744   }
2745 }
2746 
2747 // abandon current marking iteration due to a Full GC
2748 void ConcurrentMark::abort() {
2749   if (!cmThread()->during_cycle() || _has_aborted) {
2750     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2751     return;
2752   }
2753 
2754   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2755   // concurrent bitmap clearing.
2756   _nextMarkBitMap->clearAll();
2757 
2758   // Note we cannot clear the previous marking bitmap here
2759   // since VerifyDuringGC verifies the objects marked during
2760   // a full GC against the previous bitmap.
2761 
2762   // Clear the liveness counting data
2763   clear_all_count_data();
2764   // Empty mark stack
2765   reset_marking_state();
2766   for (uint i = 0; i < _max_worker_id; ++i) {
2767     _tasks[i]->clear_region_fields();
2768   }
2769   _first_overflow_barrier_sync.abort();
2770   _second_overflow_barrier_sync.abort();
2771   _has_aborted = true;
2772 
2773   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2774   satb_mq_set.abandon_partial_marking();
2775   // This can be called either during or outside marking, we'll read
2776   // the expected_active value from the SATB queue set.
2777   satb_mq_set.set_active_all_threads(
2778                                  false, /* new active value */
2779                                  satb_mq_set.is_active() /* expected_active */);
2780 
2781   _g1h->trace_heap_after_concurrent_cycle();
2782   _g1h->register_concurrent_cycle_end();
2783 }
2784 
2785 static void print_ms_time_info(const char* prefix, const char* name,
2786                                NumberSeq& ns) {
2787   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2788                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2789   if (ns.num() > 0) {
2790     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2791                            prefix, ns.sd(), ns.maximum());
2792   }
2793 }
2794 
2795 void ConcurrentMark::print_summary_info() {
2796   gclog_or_tty->print_cr(" Concurrent marking:");
2797   print_ms_time_info("  ", "init marks", _init_times);
2798   print_ms_time_info("  ", "remarks", _remark_times);
2799   {
2800     print_ms_time_info("     ", "final marks", _remark_mark_times);
2801     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2802 
2803   }
2804   print_ms_time_info("  ", "cleanups", _cleanup_times);
2805   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2806                          _total_counting_time,
2807                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2808                           (double)_cleanup_times.num()
2809                          : 0.0));
2810   if (G1ScrubRemSets) {
2811     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2812                            _total_rs_scrub_time,
2813                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2814                             (double)_cleanup_times.num()
2815                            : 0.0));
2816   }
2817   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2818                          (_init_times.sum() + _remark_times.sum() +
2819                           _cleanup_times.sum())/1000.0);
2820   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2821                 "(%8.2f s marking).",
2822                 cmThread()->vtime_accum(),
2823                 cmThread()->vtime_mark_accum());
2824 }
2825 
2826 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2827   _parallel_workers->print_worker_threads_on(st);
2828 }
2829 
2830 void ConcurrentMark::print_on_error(outputStream* st) const {
2831   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2832       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2833   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2834   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2835 }
2836 
2837 // We take a break if someone is trying to stop the world.
2838 bool ConcurrentMark::do_yield_check(uint worker_id) {
2839   if (SuspendibleThreadSet::should_yield()) {
2840     if (worker_id == 0) {
2841       _g1h->g1_policy()->record_concurrent_pause();
2842     }
2843     SuspendibleThreadSet::yield();
2844     return true;
2845   } else {
2846     return false;
2847   }
2848 }
2849 
2850 // Closure for iteration over bitmaps
2851 class CMBitMapClosure : public BitMapClosure {
2852 private:
2853   // the bitmap that is being iterated over
2854   CMBitMap*                   _nextMarkBitMap;
2855   ConcurrentMark*             _cm;
2856   CMTask*                     _task;
2857 
2858 public:
2859   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2860     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2861 
2862   bool do_bit(size_t offset) {
2863     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2864     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2865     assert( addr < _cm->finger(), "invariant");
2866     assert(addr >= _task->finger(), "invariant");
2867 
2868     // We move that task's local finger along.
2869     _task->move_finger_to(addr);
2870 
2871     _task->scan_object(oop(addr));
2872     // we only partially drain the local queue and global stack
2873     _task->drain_local_queue(true);
2874     _task->drain_global_stack(true);
2875 
2876     // if the has_aborted flag has been raised, we need to bail out of
2877     // the iteration
2878     return !_task->has_aborted();
2879   }
2880 };
2881 
2882 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2883   ReferenceProcessor* result = NULL;
2884   if (G1UseConcMarkReferenceProcessing) {
2885     result = g1h->ref_processor_cm();
2886     assert(result != NULL, "should not be NULL");
2887   }
2888   return result;
2889 }
2890 
2891 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2892                                ConcurrentMark* cm,
2893                                CMTask* task)
2894   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2895     _g1h(g1h), _cm(cm), _task(task)
2896 { }
2897 
2898 void CMTask::setup_for_region(HeapRegion* hr) {
2899   assert(hr != NULL,
2900         "claim_region() should have filtered out NULL regions");
2901   _curr_region  = hr;
2902   _finger       = hr->bottom();
2903   update_region_limit();
2904 }
2905 
2906 void CMTask::update_region_limit() {
2907   HeapRegion* hr            = _curr_region;
2908   HeapWord* bottom          = hr->bottom();
2909   HeapWord* limit           = hr->next_top_at_mark_start();
2910 
2911   if (limit == bottom) {
2912     // The region was collected underneath our feet.
2913     // We set the finger to bottom to ensure that the bitmap
2914     // iteration that will follow this will not do anything.
2915     // (this is not a condition that holds when we set the region up,
2916     // as the region is not supposed to be empty in the first place)
2917     _finger = bottom;
2918   } else if (limit >= _region_limit) {
2919     assert(limit >= _finger, "peace of mind");
2920   } else {
2921     assert(limit < _region_limit, "only way to get here");
2922     // This can happen under some pretty unusual circumstances.  An
2923     // evacuation pause empties the region underneath our feet (NTAMS
2924     // at bottom). We then do some allocation in the region (NTAMS
2925     // stays at bottom), followed by the region being used as a GC
2926     // alloc region (NTAMS will move to top() and the objects
2927     // originally below it will be grayed). All objects now marked in
2928     // the region are explicitly grayed, if below the global finger,
2929     // and we do not need in fact to scan anything else. So, we simply
2930     // set _finger to be limit to ensure that the bitmap iteration
2931     // doesn't do anything.
2932     _finger = limit;
2933   }
2934 
2935   _region_limit = limit;
2936 }
2937 
2938 void CMTask::giveup_current_region() {
2939   assert(_curr_region != NULL, "invariant");
2940   clear_region_fields();
2941 }
2942 
2943 void CMTask::clear_region_fields() {
2944   // Values for these three fields that indicate that we're not
2945   // holding on to a region.
2946   _curr_region   = NULL;
2947   _finger        = NULL;
2948   _region_limit  = NULL;
2949 }
2950 
2951 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2952   if (cm_oop_closure == NULL) {
2953     assert(_cm_oop_closure != NULL, "invariant");
2954   } else {
2955     assert(_cm_oop_closure == NULL, "invariant");
2956   }
2957   _cm_oop_closure = cm_oop_closure;
2958 }
2959 
2960 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2961   guarantee(nextMarkBitMap != NULL, "invariant");
2962   _nextMarkBitMap                = nextMarkBitMap;
2963   clear_region_fields();
2964 
2965   _calls                         = 0;
2966   _elapsed_time_ms               = 0.0;
2967   _termination_time_ms           = 0.0;
2968   _termination_start_time_ms     = 0.0;
2969 }
2970 
2971 bool CMTask::should_exit_termination() {
2972   regular_clock_call();
2973   // This is called when we are in the termination protocol. We should
2974   // quit if, for some reason, this task wants to abort or the global
2975   // stack is not empty (this means that we can get work from it).
2976   return !_cm->mark_stack_empty() || has_aborted();
2977 }
2978 
2979 void CMTask::reached_limit() {
2980   assert(_words_scanned >= _words_scanned_limit ||
2981          _refs_reached >= _refs_reached_limit ,
2982          "shouldn't have been called otherwise");
2983   regular_clock_call();
2984 }
2985 
2986 void CMTask::regular_clock_call() {
2987   if (has_aborted()) return;
2988 
2989   // First, we need to recalculate the words scanned and refs reached
2990   // limits for the next clock call.
2991   recalculate_limits();
2992 
2993   // During the regular clock call we do the following
2994 
2995   // (1) If an overflow has been flagged, then we abort.
2996   if (_cm->has_overflown()) {
2997     set_has_aborted();
2998     return;
2999   }
3000 
3001   // If we are not concurrent (i.e. we're doing remark) we don't need
3002   // to check anything else. The other steps are only needed during
3003   // the concurrent marking phase.
3004   if (!concurrent()) return;
3005 
3006   // (2) If marking has been aborted for Full GC, then we also abort.
3007   if (_cm->has_aborted()) {
3008     set_has_aborted();
3009     return;
3010   }
3011 
3012   double curr_time_ms = os::elapsedVTime() * 1000.0;
3013 
3014   // (4) We check whether we should yield. If we have to, then we abort.
3015   if (SuspendibleThreadSet::should_yield()) {
3016     // We should yield. To do this we abort the task. The caller is
3017     // responsible for yielding.
3018     set_has_aborted();
3019     return;
3020   }
3021 
3022   // (5) We check whether we've reached our time quota. If we have,
3023   // then we abort.
3024   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3025   if (elapsed_time_ms > _time_target_ms) {
3026     set_has_aborted();
3027     _has_timed_out = true;
3028     return;
3029   }
3030 
3031   // (6) Finally, we check whether there are enough completed STAB
3032   // buffers available for processing. If there are, we abort.
3033   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3034   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3035     // we do need to process SATB buffers, we'll abort and restart
3036     // the marking task to do so
3037     set_has_aborted();
3038     return;
3039   }
3040 }
3041 
3042 void CMTask::recalculate_limits() {
3043   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3044   _words_scanned_limit      = _real_words_scanned_limit;
3045 
3046   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3047   _refs_reached_limit       = _real_refs_reached_limit;
3048 }
3049 
3050 void CMTask::decrease_limits() {
3051   // This is called when we believe that we're going to do an infrequent
3052   // operation which will increase the per byte scanned cost (i.e. move
3053   // entries to/from the global stack). It basically tries to decrease the
3054   // scanning limit so that the clock is called earlier.
3055 
3056   _words_scanned_limit = _real_words_scanned_limit -
3057     3 * words_scanned_period / 4;
3058   _refs_reached_limit  = _real_refs_reached_limit -
3059     3 * refs_reached_period / 4;
3060 }
3061 
3062 void CMTask::move_entries_to_global_stack() {
3063   // local array where we'll store the entries that will be popped
3064   // from the local queue
3065   oop buffer[global_stack_transfer_size];
3066 
3067   int n = 0;
3068   oop obj;
3069   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3070     buffer[n] = obj;
3071     ++n;
3072   }
3073 
3074   if (n > 0) {
3075     // we popped at least one entry from the local queue
3076 
3077     if (!_cm->mark_stack_push(buffer, n)) {
3078       set_has_aborted();
3079     }
3080   }
3081 
3082   // this operation was quite expensive, so decrease the limits
3083   decrease_limits();
3084 }
3085 
3086 void CMTask::get_entries_from_global_stack() {
3087   // local array where we'll store the entries that will be popped
3088   // from the global stack.
3089   oop buffer[global_stack_transfer_size];
3090   int n;
3091   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3092   assert(n <= global_stack_transfer_size,
3093          "we should not pop more than the given limit");
3094   if (n > 0) {
3095     // yes, we did actually pop at least one entry
3096     for (int i = 0; i < n; ++i) {
3097       bool success = _task_queue->push(buffer[i]);
3098       // We only call this when the local queue is empty or under a
3099       // given target limit. So, we do not expect this push to fail.
3100       assert(success, "invariant");
3101     }
3102   }
3103 
3104   // this operation was quite expensive, so decrease the limits
3105   decrease_limits();
3106 }
3107 
3108 void CMTask::drain_local_queue(bool partially) {
3109   if (has_aborted()) return;
3110 
3111   // Decide what the target size is, depending whether we're going to
3112   // drain it partially (so that other tasks can steal if they run out
3113   // of things to do) or totally (at the very end).
3114   size_t target_size;
3115   if (partially) {
3116     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3117   } else {
3118     target_size = 0;
3119   }
3120 
3121   if (_task_queue->size() > target_size) {
3122     oop obj;
3123     bool ret = _task_queue->pop_local(obj);
3124     while (ret) {
3125       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3126       assert(!_g1h->is_on_master_free_list(
3127                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3128 
3129       scan_object(obj);
3130 
3131       if (_task_queue->size() <= target_size || has_aborted()) {
3132         ret = false;
3133       } else {
3134         ret = _task_queue->pop_local(obj);
3135       }
3136     }
3137   }
3138 }
3139 
3140 void CMTask::drain_global_stack(bool partially) {
3141   if (has_aborted()) return;
3142 
3143   // We have a policy to drain the local queue before we attempt to
3144   // drain the global stack.
3145   assert(partially || _task_queue->size() == 0, "invariant");
3146 
3147   // Decide what the target size is, depending whether we're going to
3148   // drain it partially (so that other tasks can steal if they run out
3149   // of things to do) or totally (at the very end).  Notice that,
3150   // because we move entries from the global stack in chunks or
3151   // because another task might be doing the same, we might in fact
3152   // drop below the target. But, this is not a problem.
3153   size_t target_size;
3154   if (partially) {
3155     target_size = _cm->partial_mark_stack_size_target();
3156   } else {
3157     target_size = 0;
3158   }
3159 
3160   if (_cm->mark_stack_size() > target_size) {
3161     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3162       get_entries_from_global_stack();
3163       drain_local_queue(partially);
3164     }
3165   }
3166 }
3167 
3168 // SATB Queue has several assumptions on whether to call the par or
3169 // non-par versions of the methods. this is why some of the code is
3170 // replicated. We should really get rid of the single-threaded version
3171 // of the code to simplify things.
3172 void CMTask::drain_satb_buffers() {
3173   if (has_aborted()) return;
3174 
3175   // We set this so that the regular clock knows that we're in the
3176   // middle of draining buffers and doesn't set the abort flag when it
3177   // notices that SATB buffers are available for draining. It'd be
3178   // very counter productive if it did that. :-)
3179   _draining_satb_buffers = true;
3180 
3181   CMSATBBufferClosure satb_cl(this, _g1h);
3182   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3183 
3184   // This keeps claiming and applying the closure to completed buffers
3185   // until we run out of buffers or we need to abort.
3186   while (!has_aborted() &&
3187          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3188     regular_clock_call();
3189   }
3190 
3191   _draining_satb_buffers = false;
3192 
3193   assert(has_aborted() ||
3194          concurrent() ||
3195          satb_mq_set.completed_buffers_num() == 0, "invariant");
3196 
3197   // again, this was a potentially expensive operation, decrease the
3198   // limits to get the regular clock call early
3199   decrease_limits();
3200 }
3201 
3202 void CMTask::print_stats() {
3203   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3204                          _worker_id, _calls);
3205   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3206                          _elapsed_time_ms, _termination_time_ms);
3207   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3208                          _step_times_ms.num(), _step_times_ms.avg(),
3209                          _step_times_ms.sd());
3210   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3211                          _step_times_ms.maximum(), _step_times_ms.sum());
3212 }
3213 
3214 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3215   return _task_queues->steal(worker_id, hash_seed, obj);
3216 }
3217 
3218 /*****************************************************************************
3219 
3220     The do_marking_step(time_target_ms, ...) method is the building
3221     block of the parallel marking framework. It can be called in parallel
3222     with other invocations of do_marking_step() on different tasks
3223     (but only one per task, obviously) and concurrently with the
3224     mutator threads, or during remark, hence it eliminates the need
3225     for two versions of the code. When called during remark, it will
3226     pick up from where the task left off during the concurrent marking
3227     phase. Interestingly, tasks are also claimable during evacuation
3228     pauses too, since do_marking_step() ensures that it aborts before
3229     it needs to yield.
3230 
3231     The data structures that it uses to do marking work are the
3232     following:
3233 
3234       (1) Marking Bitmap. If there are gray objects that appear only
3235       on the bitmap (this happens either when dealing with an overflow
3236       or when the initial marking phase has simply marked the roots
3237       and didn't push them on the stack), then tasks claim heap
3238       regions whose bitmap they then scan to find gray objects. A
3239       global finger indicates where the end of the last claimed region
3240       is. A local finger indicates how far into the region a task has
3241       scanned. The two fingers are used to determine how to gray an
3242       object (i.e. whether simply marking it is OK, as it will be
3243       visited by a task in the future, or whether it needs to be also
3244       pushed on a stack).
3245 
3246       (2) Local Queue. The local queue of the task which is accessed
3247       reasonably efficiently by the task. Other tasks can steal from
3248       it when they run out of work. Throughout the marking phase, a
3249       task attempts to keep its local queue short but not totally
3250       empty, so that entries are available for stealing by other
3251       tasks. Only when there is no more work, a task will totally
3252       drain its local queue.
3253 
3254       (3) Global Mark Stack. This handles local queue overflow. During
3255       marking only sets of entries are moved between it and the local
3256       queues, as access to it requires a mutex and more fine-grain
3257       interaction with it which might cause contention. If it
3258       overflows, then the marking phase should restart and iterate
3259       over the bitmap to identify gray objects. Throughout the marking
3260       phase, tasks attempt to keep the global mark stack at a small
3261       length but not totally empty, so that entries are available for
3262       popping by other tasks. Only when there is no more work, tasks
3263       will totally drain the global mark stack.
3264 
3265       (4) SATB Buffer Queue. This is where completed SATB buffers are
3266       made available. Buffers are regularly removed from this queue
3267       and scanned for roots, so that the queue doesn't get too
3268       long. During remark, all completed buffers are processed, as
3269       well as the filled in parts of any uncompleted buffers.
3270 
3271     The do_marking_step() method tries to abort when the time target
3272     has been reached. There are a few other cases when the
3273     do_marking_step() method also aborts:
3274 
3275       (1) When the marking phase has been aborted (after a Full GC).
3276 
3277       (2) When a global overflow (on the global stack) has been
3278       triggered. Before the task aborts, it will actually sync up with
3279       the other tasks to ensure that all the marking data structures
3280       (local queues, stacks, fingers etc.)  are re-initialized so that
3281       when do_marking_step() completes, the marking phase can
3282       immediately restart.
3283 
3284       (3) When enough completed SATB buffers are available. The
3285       do_marking_step() method only tries to drain SATB buffers right
3286       at the beginning. So, if enough buffers are available, the
3287       marking step aborts and the SATB buffers are processed at
3288       the beginning of the next invocation.
3289 
3290       (4) To yield. when we have to yield then we abort and yield
3291       right at the end of do_marking_step(). This saves us from a lot
3292       of hassle as, by yielding we might allow a Full GC. If this
3293       happens then objects will be compacted underneath our feet, the
3294       heap might shrink, etc. We save checking for this by just
3295       aborting and doing the yield right at the end.
3296 
3297     From the above it follows that the do_marking_step() method should
3298     be called in a loop (or, otherwise, regularly) until it completes.
3299 
3300     If a marking step completes without its has_aborted() flag being
3301     true, it means it has completed the current marking phase (and
3302     also all other marking tasks have done so and have all synced up).
3303 
3304     A method called regular_clock_call() is invoked "regularly" (in
3305     sub ms intervals) throughout marking. It is this clock method that
3306     checks all the abort conditions which were mentioned above and
3307     decides when the task should abort. A work-based scheme is used to
3308     trigger this clock method: when the number of object words the
3309     marking phase has scanned or the number of references the marking
3310     phase has visited reach a given limit. Additional invocations to
3311     the method clock have been planted in a few other strategic places
3312     too. The initial reason for the clock method was to avoid calling
3313     vtime too regularly, as it is quite expensive. So, once it was in
3314     place, it was natural to piggy-back all the other conditions on it
3315     too and not constantly check them throughout the code.
3316 
3317     If do_termination is true then do_marking_step will enter its
3318     termination protocol.
3319 
3320     The value of is_serial must be true when do_marking_step is being
3321     called serially (i.e. by the VMThread) and do_marking_step should
3322     skip any synchronization in the termination and overflow code.
3323     Examples include the serial remark code and the serial reference
3324     processing closures.
3325 
3326     The value of is_serial must be false when do_marking_step is
3327     being called by any of the worker threads in a work gang.
3328     Examples include the concurrent marking code (CMMarkingTask),
3329     the MT remark code, and the MT reference processing closures.
3330 
3331  *****************************************************************************/
3332 
3333 void CMTask::do_marking_step(double time_target_ms,
3334                              bool do_termination,
3335                              bool is_serial) {
3336   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3337   assert(concurrent() == _cm->concurrent(), "they should be the same");
3338 
3339   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3340   assert(_task_queues != NULL, "invariant");
3341   assert(_task_queue != NULL, "invariant");
3342   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3343 
3344   assert(!_claimed,
3345          "only one thread should claim this task at any one time");
3346 
3347   // OK, this doesn't safeguard again all possible scenarios, as it is
3348   // possible for two threads to set the _claimed flag at the same
3349   // time. But it is only for debugging purposes anyway and it will
3350   // catch most problems.
3351   _claimed = true;
3352 
3353   _start_time_ms = os::elapsedVTime() * 1000.0;
3354 
3355   // If do_stealing is true then do_marking_step will attempt to
3356   // steal work from the other CMTasks. It only makes sense to
3357   // enable stealing when the termination protocol is enabled
3358   // and do_marking_step() is not being called serially.
3359   bool do_stealing = do_termination && !is_serial;
3360 
3361   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3362   _time_target_ms = time_target_ms - diff_prediction_ms;
3363 
3364   // set up the variables that are used in the work-based scheme to
3365   // call the regular clock method
3366   _words_scanned = 0;
3367   _refs_reached  = 0;
3368   recalculate_limits();
3369 
3370   // clear all flags
3371   clear_has_aborted();
3372   _has_timed_out = false;
3373   _draining_satb_buffers = false;
3374 
3375   ++_calls;
3376 
3377   // Set up the bitmap and oop closures. Anything that uses them is
3378   // eventually called from this method, so it is OK to allocate these
3379   // statically.
3380   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3381   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3382   set_cm_oop_closure(&cm_oop_closure);
3383 
3384   if (_cm->has_overflown()) {
3385     // This can happen if the mark stack overflows during a GC pause
3386     // and this task, after a yield point, restarts. We have to abort
3387     // as we need to get into the overflow protocol which happens
3388     // right at the end of this task.
3389     set_has_aborted();
3390   }
3391 
3392   // First drain any available SATB buffers. After this, we will not
3393   // look at SATB buffers before the next invocation of this method.
3394   // If enough completed SATB buffers are queued up, the regular clock
3395   // will abort this task so that it restarts.
3396   drain_satb_buffers();
3397   // ...then partially drain the local queue and the global stack
3398   drain_local_queue(true);
3399   drain_global_stack(true);
3400 
3401   do {
3402     if (!has_aborted() && _curr_region != NULL) {
3403       // This means that we're already holding on to a region.
3404       assert(_finger != NULL, "if region is not NULL, then the finger "
3405              "should not be NULL either");
3406 
3407       // We might have restarted this task after an evacuation pause
3408       // which might have evacuated the region we're holding on to
3409       // underneath our feet. Let's read its limit again to make sure
3410       // that we do not iterate over a region of the heap that
3411       // contains garbage (update_region_limit() will also move
3412       // _finger to the start of the region if it is found empty).
3413       update_region_limit();
3414       // We will start from _finger not from the start of the region,
3415       // as we might be restarting this task after aborting half-way
3416       // through scanning this region. In this case, _finger points to
3417       // the address where we last found a marked object. If this is a
3418       // fresh region, _finger points to start().
3419       MemRegion mr = MemRegion(_finger, _region_limit);
3420 
3421       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3422              "humongous regions should go around loop once only");
3423 
3424       // Some special cases:
3425       // If the memory region is empty, we can just give up the region.
3426       // If the current region is humongous then we only need to check
3427       // the bitmap for the bit associated with the start of the object,
3428       // scan the object if it's live, and give up the region.
3429       // Otherwise, let's iterate over the bitmap of the part of the region
3430       // that is left.
3431       // If the iteration is successful, give up the region.
3432       if (mr.is_empty()) {
3433         giveup_current_region();
3434         regular_clock_call();
3435       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3436         if (_nextMarkBitMap->isMarked(mr.start())) {
3437           // The object is marked - apply the closure
3438           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3439           bitmap_closure.do_bit(offset);
3440         }
3441         // Even if this task aborted while scanning the humongous object
3442         // we can (and should) give up the current region.
3443         giveup_current_region();
3444         regular_clock_call();
3445       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3446         giveup_current_region();
3447         regular_clock_call();
3448       } else {
3449         assert(has_aborted(), "currently the only way to do so");
3450         // The only way to abort the bitmap iteration is to return
3451         // false from the do_bit() method. However, inside the
3452         // do_bit() method we move the _finger to point to the
3453         // object currently being looked at. So, if we bail out, we
3454         // have definitely set _finger to something non-null.
3455         assert(_finger != NULL, "invariant");
3456 
3457         // Region iteration was actually aborted. So now _finger
3458         // points to the address of the object we last scanned. If we
3459         // leave it there, when we restart this task, we will rescan
3460         // the object. It is easy to avoid this. We move the finger by
3461         // enough to point to the next possible object header (the
3462         // bitmap knows by how much we need to move it as it knows its
3463         // granularity).
3464         assert(_finger < _region_limit, "invariant");
3465         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3466         // Check if bitmap iteration was aborted while scanning the last object
3467         if (new_finger >= _region_limit) {
3468           giveup_current_region();
3469         } else {
3470           move_finger_to(new_finger);
3471         }
3472       }
3473     }
3474     // At this point we have either completed iterating over the
3475     // region we were holding on to, or we have aborted.
3476 
3477     // We then partially drain the local queue and the global stack.
3478     // (Do we really need this?)
3479     drain_local_queue(true);
3480     drain_global_stack(true);
3481 
3482     // Read the note on the claim_region() method on why it might
3483     // return NULL with potentially more regions available for
3484     // claiming and why we have to check out_of_regions() to determine
3485     // whether we're done or not.
3486     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3487       // We are going to try to claim a new region. We should have
3488       // given up on the previous one.
3489       // Separated the asserts so that we know which one fires.
3490       assert(_curr_region  == NULL, "invariant");
3491       assert(_finger       == NULL, "invariant");
3492       assert(_region_limit == NULL, "invariant");
3493       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3494       if (claimed_region != NULL) {
3495         // Yes, we managed to claim one
3496         setup_for_region(claimed_region);
3497         assert(_curr_region == claimed_region, "invariant");
3498       }
3499       // It is important to call the regular clock here. It might take
3500       // a while to claim a region if, for example, we hit a large
3501       // block of empty regions. So we need to call the regular clock
3502       // method once round the loop to make sure it's called
3503       // frequently enough.
3504       regular_clock_call();
3505     }
3506 
3507     if (!has_aborted() && _curr_region == NULL) {
3508       assert(_cm->out_of_regions(),
3509              "at this point we should be out of regions");
3510     }
3511   } while ( _curr_region != NULL && !has_aborted());
3512 
3513   if (!has_aborted()) {
3514     // We cannot check whether the global stack is empty, since other
3515     // tasks might be pushing objects to it concurrently.
3516     assert(_cm->out_of_regions(),
3517            "at this point we should be out of regions");
3518     // Try to reduce the number of available SATB buffers so that
3519     // remark has less work to do.
3520     drain_satb_buffers();
3521   }
3522 
3523   // Since we've done everything else, we can now totally drain the
3524   // local queue and global stack.
3525   drain_local_queue(false);
3526   drain_global_stack(false);
3527 
3528   // Attempt at work stealing from other task's queues.
3529   if (do_stealing && !has_aborted()) {
3530     // We have not aborted. This means that we have finished all that
3531     // we could. Let's try to do some stealing...
3532 
3533     // We cannot check whether the global stack is empty, since other
3534     // tasks might be pushing objects to it concurrently.
3535     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3536            "only way to reach here");
3537     while (!has_aborted()) {
3538       oop obj;
3539       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3540         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3541                "any stolen object should be marked");
3542         scan_object(obj);
3543 
3544         // And since we're towards the end, let's totally drain the
3545         // local queue and global stack.
3546         drain_local_queue(false);
3547         drain_global_stack(false);
3548       } else {
3549         break;
3550       }
3551     }
3552   }
3553 
3554   // If we are about to wrap up and go into termination, check if we
3555   // should raise the overflow flag.
3556   if (do_termination && !has_aborted()) {
3557     if (_cm->force_overflow()->should_force()) {
3558       _cm->set_has_overflown();
3559       regular_clock_call();
3560     }
3561   }
3562 
3563   // We still haven't aborted. Now, let's try to get into the
3564   // termination protocol.
3565   if (do_termination && !has_aborted()) {
3566     // We cannot check whether the global stack is empty, since other
3567     // tasks might be concurrently pushing objects on it.
3568     // Separated the asserts so that we know which one fires.
3569     assert(_cm->out_of_regions(), "only way to reach here");
3570     assert(_task_queue->size() == 0, "only way to reach here");
3571     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3572 
3573     // The CMTask class also extends the TerminatorTerminator class,
3574     // hence its should_exit_termination() method will also decide
3575     // whether to exit the termination protocol or not.
3576     bool finished = (is_serial ||
3577                      _cm->terminator()->offer_termination(this));
3578     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3579     _termination_time_ms +=
3580       termination_end_time_ms - _termination_start_time_ms;
3581 
3582     if (finished) {
3583       // We're all done.
3584 
3585       if (_worker_id == 0) {
3586         // let's allow task 0 to do this
3587         if (concurrent()) {
3588           assert(_cm->concurrent_marking_in_progress(), "invariant");
3589           // we need to set this to false before the next
3590           // safepoint. This way we ensure that the marking phase
3591           // doesn't observe any more heap expansions.
3592           _cm->clear_concurrent_marking_in_progress();
3593         }
3594       }
3595 
3596       // We can now guarantee that the global stack is empty, since
3597       // all other tasks have finished. We separated the guarantees so
3598       // that, if a condition is false, we can immediately find out
3599       // which one.
3600       guarantee(_cm->out_of_regions(), "only way to reach here");
3601       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3602       guarantee(_task_queue->size() == 0, "only way to reach here");
3603       guarantee(!_cm->has_overflown(), "only way to reach here");
3604       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3605     } else {
3606       // Apparently there's more work to do. Let's abort this task. It
3607       // will restart it and we can hopefully find more things to do.
3608       set_has_aborted();
3609     }
3610   }
3611 
3612   // Mainly for debugging purposes to make sure that a pointer to the
3613   // closure which was statically allocated in this frame doesn't
3614   // escape it by accident.
3615   set_cm_oop_closure(NULL);
3616   double end_time_ms = os::elapsedVTime() * 1000.0;
3617   double elapsed_time_ms = end_time_ms - _start_time_ms;
3618   // Update the step history.
3619   _step_times_ms.add(elapsed_time_ms);
3620 
3621   if (has_aborted()) {
3622     // The task was aborted for some reason.
3623     if (_has_timed_out) {
3624       double diff_ms = elapsed_time_ms - _time_target_ms;
3625       // Keep statistics of how well we did with respect to hitting
3626       // our target only if we actually timed out (if we aborted for
3627       // other reasons, then the results might get skewed).
3628       _marking_step_diffs_ms.add(diff_ms);
3629     }
3630 
3631     if (_cm->has_overflown()) {
3632       // This is the interesting one. We aborted because a global
3633       // overflow was raised. This means we have to restart the
3634       // marking phase and start iterating over regions. However, in
3635       // order to do this we have to make sure that all tasks stop
3636       // what they are doing and re-initialize in a safe manner. We
3637       // will achieve this with the use of two barrier sync points.
3638 
3639       if (!is_serial) {
3640         // We only need to enter the sync barrier if being called
3641         // from a parallel context
3642         _cm->enter_first_sync_barrier(_worker_id);
3643 
3644         // When we exit this sync barrier we know that all tasks have
3645         // stopped doing marking work. So, it's now safe to
3646         // re-initialize our data structures. At the end of this method,
3647         // task 0 will clear the global data structures.
3648       }
3649 
3650       // We clear the local state of this task...
3651       clear_region_fields();
3652 
3653       if (!is_serial) {
3654         // ...and enter the second barrier.
3655         _cm->enter_second_sync_barrier(_worker_id);
3656       }
3657       // At this point, if we're during the concurrent phase of
3658       // marking, everything has been re-initialized and we're
3659       // ready to restart.
3660     }
3661   }
3662 
3663   _claimed = false;
3664 }
3665 
3666 CMTask::CMTask(uint worker_id,
3667                ConcurrentMark* cm,
3668                size_t* marked_bytes,
3669                BitMap* card_bm,
3670                CMTaskQueue* task_queue,
3671                CMTaskQueueSet* task_queues)
3672   : _g1h(G1CollectedHeap::heap()),
3673     _worker_id(worker_id), _cm(cm),
3674     _claimed(false),
3675     _nextMarkBitMap(NULL), _hash_seed(17),
3676     _task_queue(task_queue),
3677     _task_queues(task_queues),
3678     _cm_oop_closure(NULL),
3679     _marked_bytes_array(marked_bytes),
3680     _card_bm(card_bm) {
3681   guarantee(task_queue != NULL, "invariant");
3682   guarantee(task_queues != NULL, "invariant");
3683 
3684   _marking_step_diffs_ms.add(0.5);
3685 }
3686 
3687 // These are formatting macros that are used below to ensure
3688 // consistent formatting. The *_H_* versions are used to format the
3689 // header for a particular value and they should be kept consistent
3690 // with the corresponding macro. Also note that most of the macros add
3691 // the necessary white space (as a prefix) which makes them a bit
3692 // easier to compose.
3693 
3694 // All the output lines are prefixed with this string to be able to
3695 // identify them easily in a large log file.
3696 #define G1PPRL_LINE_PREFIX            "###"
3697 
3698 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3699 #ifdef _LP64
3700 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3701 #else // _LP64
3702 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3703 #endif // _LP64
3704 
3705 // For per-region info
3706 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3707 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3708 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3709 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3710 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3711 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3712 
3713 // For summary info
3714 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3715 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3716 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3717 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3718 
3719 G1PrintRegionLivenessInfoClosure::
3720 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
3721   : _out(out),
3722     _total_used_bytes(0), _total_capacity_bytes(0),
3723     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3724     _hum_used_bytes(0), _hum_capacity_bytes(0),
3725     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3726     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3727   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3728   MemRegion g1_reserved = g1h->g1_reserved();
3729   double now = os::elapsedTime();
3730 
3731   // Print the header of the output.
3732   _out->cr();
3733   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3734   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
3735                  G1PPRL_SUM_ADDR_FORMAT("reserved")
3736                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
3737                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3738                  HeapRegion::GrainBytes);
3739   _out->print_cr(G1PPRL_LINE_PREFIX);
3740   _out->print_cr(G1PPRL_LINE_PREFIX
3741                 G1PPRL_TYPE_H_FORMAT
3742                 G1PPRL_ADDR_BASE_H_FORMAT
3743                 G1PPRL_BYTE_H_FORMAT
3744                 G1PPRL_BYTE_H_FORMAT
3745                 G1PPRL_BYTE_H_FORMAT
3746                 G1PPRL_DOUBLE_H_FORMAT
3747                 G1PPRL_BYTE_H_FORMAT
3748                 G1PPRL_BYTE_H_FORMAT,
3749                 "type", "address-range",
3750                 "used", "prev-live", "next-live", "gc-eff",
3751                 "remset", "code-roots");
3752   _out->print_cr(G1PPRL_LINE_PREFIX
3753                 G1PPRL_TYPE_H_FORMAT
3754                 G1PPRL_ADDR_BASE_H_FORMAT
3755                 G1PPRL_BYTE_H_FORMAT
3756                 G1PPRL_BYTE_H_FORMAT
3757                 G1PPRL_BYTE_H_FORMAT
3758                 G1PPRL_DOUBLE_H_FORMAT
3759                 G1PPRL_BYTE_H_FORMAT
3760                 G1PPRL_BYTE_H_FORMAT,
3761                 "", "",
3762                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3763                 "(bytes)", "(bytes)");
3764 }
3765 
3766 // It takes as a parameter a reference to one of the _hum_* fields, it
3767 // deduces the corresponding value for a region in a humongous region
3768 // series (either the region size, or what's left if the _hum_* field
3769 // is < the region size), and updates the _hum_* field accordingly.
3770 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3771   size_t bytes = 0;
3772   // The > 0 check is to deal with the prev and next live bytes which
3773   // could be 0.
3774   if (*hum_bytes > 0) {
3775     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3776     *hum_bytes -= bytes;
3777   }
3778   return bytes;
3779 }
3780 
3781 // It deduces the values for a region in a humongous region series
3782 // from the _hum_* fields and updates those accordingly. It assumes
3783 // that that _hum_* fields have already been set up from the "starts
3784 // humongous" region and we visit the regions in address order.
3785 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3786                                                      size_t* capacity_bytes,
3787                                                      size_t* prev_live_bytes,
3788                                                      size_t* next_live_bytes) {
3789   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3790   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3791   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3792   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3793   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3794 }
3795 
3796 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3797   const char* type       = r->get_type_str();
3798   HeapWord* bottom       = r->bottom();
3799   HeapWord* end          = r->end();
3800   size_t capacity_bytes  = r->capacity();
3801   size_t used_bytes      = r->used();
3802   size_t prev_live_bytes = r->live_bytes();
3803   size_t next_live_bytes = r->next_live_bytes();
3804   double gc_eff          = r->gc_efficiency();
3805   size_t remset_bytes    = r->rem_set()->mem_size();
3806   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3807 
3808   if (r->is_starts_humongous()) {
3809     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3810            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3811            "they should have been zeroed after the last time we used them");
3812     // Set up the _hum_* fields.
3813     _hum_capacity_bytes  = capacity_bytes;
3814     _hum_used_bytes      = used_bytes;
3815     _hum_prev_live_bytes = prev_live_bytes;
3816     _hum_next_live_bytes = next_live_bytes;
3817     get_hum_bytes(&used_bytes, &capacity_bytes,
3818                   &prev_live_bytes, &next_live_bytes);
3819     end = bottom + HeapRegion::GrainWords;
3820   } else if (r->is_continues_humongous()) {
3821     get_hum_bytes(&used_bytes, &capacity_bytes,
3822                   &prev_live_bytes, &next_live_bytes);
3823     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3824   }
3825 
3826   _total_used_bytes      += used_bytes;
3827   _total_capacity_bytes  += capacity_bytes;
3828   _total_prev_live_bytes += prev_live_bytes;
3829   _total_next_live_bytes += next_live_bytes;
3830   _total_remset_bytes    += remset_bytes;
3831   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3832 
3833   // Print a line for this particular region.
3834   _out->print_cr(G1PPRL_LINE_PREFIX
3835                  G1PPRL_TYPE_FORMAT
3836                  G1PPRL_ADDR_BASE_FORMAT
3837                  G1PPRL_BYTE_FORMAT
3838                  G1PPRL_BYTE_FORMAT
3839                  G1PPRL_BYTE_FORMAT
3840                  G1PPRL_DOUBLE_FORMAT
3841                  G1PPRL_BYTE_FORMAT
3842                  G1PPRL_BYTE_FORMAT,
3843                  type, p2i(bottom), p2i(end),
3844                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3845                  remset_bytes, strong_code_roots_bytes);
3846 
3847   return false;
3848 }
3849 
3850 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3851   // add static memory usages to remembered set sizes
3852   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3853   // Print the footer of the output.
3854   _out->print_cr(G1PPRL_LINE_PREFIX);
3855   _out->print_cr(G1PPRL_LINE_PREFIX
3856                  " SUMMARY"
3857                  G1PPRL_SUM_MB_FORMAT("capacity")
3858                  G1PPRL_SUM_MB_PERC_FORMAT("used")
3859                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3860                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3861                  G1PPRL_SUM_MB_FORMAT("remset")
3862                  G1PPRL_SUM_MB_FORMAT("code-roots"),
3863                  bytes_to_mb(_total_capacity_bytes),
3864                  bytes_to_mb(_total_used_bytes),
3865                  perc(_total_used_bytes, _total_capacity_bytes),
3866                  bytes_to_mb(_total_prev_live_bytes),
3867                  perc(_total_prev_live_bytes, _total_capacity_bytes),
3868                  bytes_to_mb(_total_next_live_bytes),
3869                  perc(_total_next_live_bytes, _total_capacity_bytes),
3870                  bytes_to_mb(_total_remset_bytes),
3871                  bytes_to_mb(_total_strong_code_roots_bytes));
3872   _out->cr();
3873 }