1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  30 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  33 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  34 #include "gc_implementation/g1/g1Log.hpp"
  35 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  36 #include "gc_implementation/g1/g1RemSet.hpp"
  37 #include "gc_implementation/g1/g1StringDedup.hpp"
  38 #include "gc_implementation/g1/heapRegion.inline.hpp"
  39 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
  40 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  41 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  42 #include "gc_implementation/shared/vmGCOperations.hpp"
  43 #include "gc_implementation/shared/gcTimer.hpp"
  44 #include "gc_implementation/shared/gcTrace.hpp"
  45 #include "gc_implementation/shared/gcTraceTime.hpp"
  46 #include "memory/allocation.hpp"
  47 #include "memory/genOopClosures.inline.hpp"
  48 #include "memory/referencePolicy.hpp"
  49 #include "memory/resourceArea.hpp"
  50 #include "memory/strongRootsScope.hpp"
  51 #include "oops/oop.inline.hpp"
  52 #include "runtime/handles.inline.hpp"
  53 #include "runtime/java.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/prefetch.inline.hpp"
  56 #include "services/memTracker.hpp"
  57 
  58 // Concurrent marking bit map wrapper
  59 
  60 CMBitMapRO::CMBitMapRO(int shifter) :
  61   _bm(),
  62   _shifter(shifter) {
  63   _bmStartWord = 0;
  64   _bmWordSize = 0;
  65 }
  66 
  67 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  68                                                const HeapWord* limit) const {
  69   // First we must round addr *up* to a possible object boundary.
  70   addr = (HeapWord*)align_size_up((intptr_t)addr,
  71                                   HeapWordSize << _shifter);
  72   size_t addrOffset = heapWordToOffset(addr);
  73   if (limit == NULL) {
  74     limit = _bmStartWord + _bmWordSize;
  75   }
  76   size_t limitOffset = heapWordToOffset(limit);
  77   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  78   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  79   assert(nextAddr >= addr, "get_next_one postcondition");
  80   assert(nextAddr == limit || isMarked(nextAddr),
  81          "get_next_one postcondition");
  82   return nextAddr;
  83 }
  84 
  85 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  86                                                  const HeapWord* limit) const {
  87   size_t addrOffset = heapWordToOffset(addr);
  88   if (limit == NULL) {
  89     limit = _bmStartWord + _bmWordSize;
  90   }
  91   size_t limitOffset = heapWordToOffset(limit);
  92   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  93   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  94   assert(nextAddr >= addr, "get_next_one postcondition");
  95   assert(nextAddr == limit || !isMarked(nextAddr),
  96          "get_next_one postcondition");
  97   return nextAddr;
  98 }
  99 
 100 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 101   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 102   return (int) (diff >> _shifter);
 103 }
 104 
 105 #ifndef PRODUCT
 106 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 107   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 108   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 109          "size inconsistency");
 110   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 111          _bmWordSize  == heap_rs.word_size();
 112 }
 113 #endif
 114 
 115 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 116   _bm.print_on_error(st, prefix);
 117 }
 118 
 119 size_t CMBitMap::compute_size(size_t heap_size) {
 120   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 121 }
 122 
 123 size_t CMBitMap::mark_distance() {
 124   return MinObjAlignmentInBytes * BitsPerByte;
 125 }
 126 
 127 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 128   _bmStartWord = heap.start();
 129   _bmWordSize = heap.word_size();
 130 
 131   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 132   _bm.set_size(_bmWordSize >> _shifter);
 133 
 134   storage->set_mapping_changed_listener(&_listener);
 135 }
 136 
 137 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 138   if (zero_filled) {
 139     return;
 140   }
 141   // We need to clear the bitmap on commit, removing any existing information.
 142   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 143   _bm->clearRange(mr);
 144 }
 145 
 146 // Closure used for clearing the given mark bitmap.
 147 class ClearBitmapHRClosure : public HeapRegionClosure {
 148  private:
 149   ConcurrentMark* _cm;
 150   CMBitMap* _bitmap;
 151   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 152  public:
 153   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 154     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 155   }
 156 
 157   virtual bool doHeapRegion(HeapRegion* r) {
 158     size_t const chunk_size_in_words = M / HeapWordSize;
 159 
 160     HeapWord* cur = r->bottom();
 161     HeapWord* const end = r->end();
 162 
 163     while (cur < end) {
 164       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 165       _bitmap->clearRange(mr);
 166 
 167       cur += chunk_size_in_words;
 168 
 169       // Abort iteration if after yielding the marking has been aborted.
 170       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 171         return true;
 172       }
 173       // Repeat the asserts from before the start of the closure. We will do them
 174       // as asserts here to minimize their overhead on the product. However, we
 175       // will have them as guarantees at the beginning / end of the bitmap
 176       // clearing to get some checking in the product.
 177       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 178       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 179     }
 180 
 181     return false;
 182   }
 183 };
 184 
 185 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 186   ClearBitmapHRClosure* _cl;
 187   HeapRegionClaimer     _hrclaimer;
 188   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 189 
 190 public:
 191   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 192       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 193 
 194   void work(uint worker_id) {
 195     if (_suspendible) {
 196       SuspendibleThreadSet::join();
 197     }
 198     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 199     if (_suspendible) {
 200       SuspendibleThreadSet::leave();
 201     }
 202   }
 203 };
 204 
 205 void CMBitMap::clearAll() {
 206   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 207   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 208   uint n_workers = g1h->workers()->active_workers();
 209   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 210   g1h->workers()->run_task(&task);
 211   guarantee(cl.complete(), "Must have completed iteration.");
 212   return;
 213 }
 214 
 215 void CMBitMap::markRange(MemRegion mr) {
 216   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 217   assert(!mr.is_empty(), "unexpected empty region");
 218   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 219           ((HeapWord *) mr.end())),
 220          "markRange memory region end is not card aligned");
 221   // convert address range into offset range
 222   _bm.at_put_range(heapWordToOffset(mr.start()),
 223                    heapWordToOffset(mr.end()), true);
 224 }
 225 
 226 void CMBitMap::clearRange(MemRegion mr) {
 227   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 228   assert(!mr.is_empty(), "unexpected empty region");
 229   // convert address range into offset range
 230   _bm.at_put_range(heapWordToOffset(mr.start()),
 231                    heapWordToOffset(mr.end()), false);
 232 }
 233 
 234 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 235                                             HeapWord* end_addr) {
 236   HeapWord* start = getNextMarkedWordAddress(addr);
 237   start = MIN2(start, end_addr);
 238   HeapWord* end   = getNextUnmarkedWordAddress(start);
 239   end = MIN2(end, end_addr);
 240   assert(start <= end, "Consistency check");
 241   MemRegion mr(start, end);
 242   if (!mr.is_empty()) {
 243     clearRange(mr);
 244   }
 245   return mr;
 246 }
 247 
 248 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 249   _base(NULL), _cm(cm)
 250 #ifdef ASSERT
 251   , _drain_in_progress(false)
 252   , _drain_in_progress_yields(false)
 253 #endif
 254 {}
 255 
 256 bool CMMarkStack::allocate(size_t capacity) {
 257   // allocate a stack of the requisite depth
 258   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 259   if (!rs.is_reserved()) {
 260     warning("ConcurrentMark MarkStack allocation failure");
 261     return false;
 262   }
 263   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 264   if (!_virtual_space.initialize(rs, rs.size())) {
 265     warning("ConcurrentMark MarkStack backing store failure");
 266     // Release the virtual memory reserved for the marking stack
 267     rs.release();
 268     return false;
 269   }
 270   assert(_virtual_space.committed_size() == rs.size(),
 271          "Didn't reserve backing store for all of ConcurrentMark stack?");
 272   _base = (oop*) _virtual_space.low();
 273   setEmpty();
 274   _capacity = (jint) capacity;
 275   _saved_index = -1;
 276   _should_expand = false;
 277   NOT_PRODUCT(_max_depth = 0);
 278   return true;
 279 }
 280 
 281 void CMMarkStack::expand() {
 282   // Called, during remark, if we've overflown the marking stack during marking.
 283   assert(isEmpty(), "stack should been emptied while handling overflow");
 284   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 285   // Clear expansion flag
 286   _should_expand = false;
 287   if (_capacity == (jint) MarkStackSizeMax) {
 288     if (PrintGCDetails && Verbose) {
 289       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 290     }
 291     return;
 292   }
 293   // Double capacity if possible
 294   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 295   // Do not give up existing stack until we have managed to
 296   // get the double capacity that we desired.
 297   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 298                                                            sizeof(oop)));
 299   if (rs.is_reserved()) {
 300     // Release the backing store associated with old stack
 301     _virtual_space.release();
 302     // Reinitialize virtual space for new stack
 303     if (!_virtual_space.initialize(rs, rs.size())) {
 304       fatal("Not enough swap for expanded marking stack capacity");
 305     }
 306     _base = (oop*)(_virtual_space.low());
 307     _index = 0;
 308     _capacity = new_capacity;
 309   } else {
 310     if (PrintGCDetails && Verbose) {
 311       // Failed to double capacity, continue;
 312       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 313                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 314                           _capacity / K, new_capacity / K);
 315     }
 316   }
 317 }
 318 
 319 void CMMarkStack::set_should_expand() {
 320   // If we're resetting the marking state because of an
 321   // marking stack overflow, record that we should, if
 322   // possible, expand the stack.
 323   _should_expand = _cm->has_overflown();
 324 }
 325 
 326 CMMarkStack::~CMMarkStack() {
 327   if (_base != NULL) {
 328     _base = NULL;
 329     _virtual_space.release();
 330   }
 331 }
 332 
 333 void CMMarkStack::par_push(oop ptr) {
 334   while (true) {
 335     if (isFull()) {
 336       _overflow = true;
 337       return;
 338     }
 339     // Otherwise...
 340     jint index = _index;
 341     jint next_index = index+1;
 342     jint res = Atomic::cmpxchg(next_index, &_index, index);
 343     if (res == index) {
 344       _base[index] = ptr;
 345       // Note that we don't maintain this atomically.  We could, but it
 346       // doesn't seem necessary.
 347       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 348       return;
 349     }
 350     // Otherwise, we need to try again.
 351   }
 352 }
 353 
 354 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 355   while (true) {
 356     if (isFull()) {
 357       _overflow = true;
 358       return;
 359     }
 360     // Otherwise...
 361     jint index = _index;
 362     jint next_index = index + n;
 363     if (next_index > _capacity) {
 364       _overflow = true;
 365       return;
 366     }
 367     jint res = Atomic::cmpxchg(next_index, &_index, index);
 368     if (res == index) {
 369       for (int i = 0; i < n; i++) {
 370         int  ind = index + i;
 371         assert(ind < _capacity, "By overflow test above.");
 372         _base[ind] = ptr_arr[i];
 373       }
 374       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 375       return;
 376     }
 377     // Otherwise, we need to try again.
 378   }
 379 }
 380 
 381 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 382   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 383   jint start = _index;
 384   jint next_index = start + n;
 385   if (next_index > _capacity) {
 386     _overflow = true;
 387     return;
 388   }
 389   // Otherwise.
 390   _index = next_index;
 391   for (int i = 0; i < n; i++) {
 392     int ind = start + i;
 393     assert(ind < _capacity, "By overflow test above.");
 394     _base[ind] = ptr_arr[i];
 395   }
 396   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 397 }
 398 
 399 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 400   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 401   jint index = _index;
 402   if (index == 0) {
 403     *n = 0;
 404     return false;
 405   } else {
 406     int k = MIN2(max, index);
 407     jint  new_ind = index - k;
 408     for (int j = 0; j < k; j++) {
 409       ptr_arr[j] = _base[new_ind + j];
 410     }
 411     _index = new_ind;
 412     *n = k;
 413     return true;
 414   }
 415 }
 416 
 417 template<class OopClosureClass>
 418 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 419   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 420          || SafepointSynchronize::is_at_safepoint(),
 421          "Drain recursion must be yield-safe.");
 422   bool res = true;
 423   debug_only(_drain_in_progress = true);
 424   debug_only(_drain_in_progress_yields = yield_after);
 425   while (!isEmpty()) {
 426     oop newOop = pop();
 427     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 428     assert(newOop->is_oop(), "Expected an oop");
 429     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 430            "only grey objects on this stack");
 431     newOop->oop_iterate(cl);
 432     if (yield_after && _cm->do_yield_check()) {
 433       res = false;
 434       break;
 435     }
 436   }
 437   debug_only(_drain_in_progress = false);
 438   return res;
 439 }
 440 
 441 void CMMarkStack::note_start_of_gc() {
 442   assert(_saved_index == -1,
 443          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 444   _saved_index = _index;
 445 }
 446 
 447 void CMMarkStack::note_end_of_gc() {
 448   // This is intentionally a guarantee, instead of an assert. If we
 449   // accidentally add something to the mark stack during GC, it
 450   // will be a correctness issue so it's better if we crash. we'll
 451   // only check this once per GC anyway, so it won't be a performance
 452   // issue in any way.
 453   guarantee(_saved_index == _index,
 454             err_msg("saved index: %d index: %d", _saved_index, _index));
 455   _saved_index = -1;
 456 }
 457 
 458 void CMMarkStack::oops_do(OopClosure* f) {
 459   assert(_saved_index == _index,
 460          err_msg("saved index: %d index: %d", _saved_index, _index));
 461   for (int i = 0; i < _index; i += 1) {
 462     f->do_oop(&_base[i]);
 463   }
 464 }
 465 
 466 CMRootRegions::CMRootRegions() :
 467   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 468   _should_abort(false),  _next_survivor(NULL) { }
 469 
 470 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 471   _young_list = g1h->young_list();
 472   _cm = cm;
 473 }
 474 
 475 void CMRootRegions::prepare_for_scan() {
 476   assert(!scan_in_progress(), "pre-condition");
 477 
 478   // Currently, only survivors can be root regions.
 479   assert(_next_survivor == NULL, "pre-condition");
 480   _next_survivor = _young_list->first_survivor_region();
 481   _scan_in_progress = (_next_survivor != NULL);
 482   _should_abort = false;
 483 }
 484 
 485 HeapRegion* CMRootRegions::claim_next() {
 486   if (_should_abort) {
 487     // If someone has set the should_abort flag, we return NULL to
 488     // force the caller to bail out of their loop.
 489     return NULL;
 490   }
 491 
 492   // Currently, only survivors can be root regions.
 493   HeapRegion* res = _next_survivor;
 494   if (res != NULL) {
 495     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 496     // Read it again in case it changed while we were waiting for the lock.
 497     res = _next_survivor;
 498     if (res != NULL) {
 499       if (res == _young_list->last_survivor_region()) {
 500         // We just claimed the last survivor so store NULL to indicate
 501         // that we're done.
 502         _next_survivor = NULL;
 503       } else {
 504         _next_survivor = res->get_next_young_region();
 505       }
 506     } else {
 507       // Someone else claimed the last survivor while we were trying
 508       // to take the lock so nothing else to do.
 509     }
 510   }
 511   assert(res == NULL || res->is_survivor(), "post-condition");
 512 
 513   return res;
 514 }
 515 
 516 void CMRootRegions::scan_finished() {
 517   assert(scan_in_progress(), "pre-condition");
 518 
 519   // Currently, only survivors can be root regions.
 520   if (!_should_abort) {
 521     assert(_next_survivor == NULL, "we should have claimed all survivors");
 522   }
 523   _next_survivor = NULL;
 524 
 525   {
 526     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 527     _scan_in_progress = false;
 528     RootRegionScan_lock->notify_all();
 529   }
 530 }
 531 
 532 bool CMRootRegions::wait_until_scan_finished() {
 533   if (!scan_in_progress()) return false;
 534 
 535   {
 536     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 537     while (scan_in_progress()) {
 538       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 539     }
 540   }
 541   return true;
 542 }
 543 
 544 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 545 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 546 #endif // _MSC_VER
 547 
 548 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 549   return MAX2((n_par_threads + 2) / 4, 1U);
 550 }
 551 
 552 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 553   _g1h(g1h),
 554   _markBitMap1(),
 555   _markBitMap2(),
 556   _parallel_marking_threads(0),
 557   _max_parallel_marking_threads(0),
 558   _sleep_factor(0.0),
 559   _marking_task_overhead(1.0),
 560   _cleanup_sleep_factor(0.0),
 561   _cleanup_task_overhead(1.0),
 562   _cleanup_list("Cleanup List"),
 563   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 564   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 565             CardTableModRefBS::card_shift,
 566             false /* in_resource_area*/),
 567 
 568   _prevMarkBitMap(&_markBitMap1),
 569   _nextMarkBitMap(&_markBitMap2),
 570 
 571   _markStack(this),
 572   // _finger set in set_non_marking_state
 573 
 574   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 575   // _active_tasks set in set_non_marking_state
 576   // _tasks set inside the constructor
 577   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 578   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 579 
 580   _has_overflown(false),
 581   _concurrent(false),
 582   _has_aborted(false),
 583   _aborted_gc_id(GCId::undefined()),
 584   _restart_for_overflow(false),
 585   _concurrent_marking_in_progress(false),
 586 
 587   // _verbose_level set below
 588 
 589   _init_times(),
 590   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 591   _cleanup_times(),
 592   _total_counting_time(0.0),
 593   _total_rs_scrub_time(0.0),
 594 
 595   _parallel_workers(NULL),
 596 
 597   _count_card_bitmaps(NULL),
 598   _count_marked_bytes(NULL),
 599   _completed_initialization(false) {
 600   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 601   if (verbose_level < no_verbose) {
 602     verbose_level = no_verbose;
 603   }
 604   if (verbose_level > high_verbose) {
 605     verbose_level = high_verbose;
 606   }
 607   _verbose_level = verbose_level;
 608 
 609   if (verbose_low()) {
 610     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 611                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 612   }
 613 
 614   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 615   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 616 
 617   // Create & start a ConcurrentMark thread.
 618   _cmThread = new ConcurrentMarkThread(this);
 619   assert(cmThread() != NULL, "CM Thread should have been created");
 620   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 621   if (_cmThread->osthread() == NULL) {
 622       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 623   }
 624 
 625   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 626   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 627   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 628 
 629   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 630   satb_qs.set_buffer_size(G1SATBBufferSize);
 631 
 632   _root_regions.init(_g1h, this);
 633 
 634   if (ConcGCThreads > ParallelGCThreads) {
 635     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 636             "than ParallelGCThreads (" UINTX_FORMAT ").",
 637             ConcGCThreads, ParallelGCThreads);
 638     return;
 639   }
 640   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 641     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 642     // if both are set
 643     _sleep_factor             = 0.0;
 644     _marking_task_overhead    = 1.0;
 645   } else if (G1MarkingOverheadPercent > 0) {
 646     // We will calculate the number of parallel marking threads based
 647     // on a target overhead with respect to the soft real-time goal
 648     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 649     double overall_cm_overhead =
 650       (double) MaxGCPauseMillis * marking_overhead /
 651       (double) GCPauseIntervalMillis;
 652     double cpu_ratio = 1.0 / (double) os::processor_count();
 653     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 654     double marking_task_overhead =
 655       overall_cm_overhead / marking_thread_num *
 656                                               (double) os::processor_count();
 657     double sleep_factor =
 658                        (1.0 - marking_task_overhead) / marking_task_overhead;
 659 
 660     FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 661     _sleep_factor             = sleep_factor;
 662     _marking_task_overhead    = marking_task_overhead;
 663   } else {
 664     // Calculate the number of parallel marking threads by scaling
 665     // the number of parallel GC threads.
 666     uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 667     FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 668     _sleep_factor             = 0.0;
 669     _marking_task_overhead    = 1.0;
 670   }
 671 
 672   assert(ConcGCThreads > 0, "Should have been set");
 673   _parallel_marking_threads = (uint) ConcGCThreads;
 674   _max_parallel_marking_threads = _parallel_marking_threads;
 675 
 676   if (parallel_marking_threads() > 1) {
 677     _cleanup_task_overhead = 1.0;
 678   } else {
 679     _cleanup_task_overhead = marking_task_overhead();
 680   }
 681   _cleanup_sleep_factor =
 682                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 683 
 684 #if 0
 685   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 686   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 687   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 688   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 689   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 690 #endif
 691 
 692   _parallel_workers = new FlexibleWorkGang("G1 Marker",
 693        _max_parallel_marking_threads, false, true);
 694   if (_parallel_workers == NULL) {
 695     vm_exit_during_initialization("Failed necessary allocation.");
 696   } else {
 697     _parallel_workers->initialize_workers();
 698   }
 699 
 700   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 701     size_t mark_stack_size =
 702       MIN2(MarkStackSizeMax,
 703           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 704     // Verify that the calculated value for MarkStackSize is in range.
 705     // It would be nice to use the private utility routine from Arguments.
 706     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 707       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 708               "must be between 1 and " SIZE_FORMAT,
 709               mark_stack_size, MarkStackSizeMax);
 710       return;
 711     }
 712     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 713   } else {
 714     // Verify MarkStackSize is in range.
 715     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 716       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 717         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 718           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 719                   "must be between 1 and " SIZE_FORMAT,
 720                   MarkStackSize, MarkStackSizeMax);
 721           return;
 722         }
 723       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 724         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 725           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 726                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 727                   MarkStackSize, MarkStackSizeMax);
 728           return;
 729         }
 730       }
 731     }
 732   }
 733 
 734   if (!_markStack.allocate(MarkStackSize)) {
 735     warning("Failed to allocate CM marking stack");
 736     return;
 737   }
 738 
 739   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 740   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 741 
 742   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 743   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 744 
 745   BitMap::idx_t card_bm_size = _card_bm.size();
 746 
 747   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 748   _active_tasks = _max_worker_id;
 749 
 750   uint max_regions = _g1h->max_regions();
 751   for (uint i = 0; i < _max_worker_id; ++i) {
 752     CMTaskQueue* task_queue = new CMTaskQueue();
 753     task_queue->initialize();
 754     _task_queues->register_queue(i, task_queue);
 755 
 756     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 757     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 758 
 759     _tasks[i] = new CMTask(i, this,
 760                            _count_marked_bytes[i],
 761                            &_count_card_bitmaps[i],
 762                            task_queue, _task_queues);
 763 
 764     _accum_task_vtime[i] = 0.0;
 765   }
 766 
 767   // Calculate the card number for the bottom of the heap. Used
 768   // in biasing indexes into the accounting card bitmaps.
 769   _heap_bottom_card_num =
 770     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 771                                 CardTableModRefBS::card_shift);
 772 
 773   // Clear all the liveness counting data
 774   clear_all_count_data();
 775 
 776   // so that the call below can read a sensible value
 777   _heap_start = g1h->reserved_region().start();
 778   set_non_marking_state();
 779   _completed_initialization = true;
 780 }
 781 
 782 void ConcurrentMark::reset() {
 783   // Starting values for these two. This should be called in a STW
 784   // phase.
 785   MemRegion reserved = _g1h->g1_reserved();
 786   _heap_start = reserved.start();
 787   _heap_end   = reserved.end();
 788 
 789   // Separated the asserts so that we know which one fires.
 790   assert(_heap_start != NULL, "heap bounds should look ok");
 791   assert(_heap_end != NULL, "heap bounds should look ok");
 792   assert(_heap_start < _heap_end, "heap bounds should look ok");
 793 
 794   // Reset all the marking data structures and any necessary flags
 795   reset_marking_state();
 796 
 797   if (verbose_low()) {
 798     gclog_or_tty->print_cr("[global] resetting");
 799   }
 800 
 801   // We do reset all of them, since different phases will use
 802   // different number of active threads. So, it's easiest to have all
 803   // of them ready.
 804   for (uint i = 0; i < _max_worker_id; ++i) {
 805     _tasks[i]->reset(_nextMarkBitMap);
 806   }
 807 
 808   // we need this to make sure that the flag is on during the evac
 809   // pause with initial mark piggy-backed
 810   set_concurrent_marking_in_progress();
 811 }
 812 
 813 
 814 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 815   _markStack.set_should_expand();
 816   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 817   if (clear_overflow) {
 818     clear_has_overflown();
 819   } else {
 820     assert(has_overflown(), "pre-condition");
 821   }
 822   _finger = _heap_start;
 823 
 824   for (uint i = 0; i < _max_worker_id; ++i) {
 825     CMTaskQueue* queue = _task_queues->queue(i);
 826     queue->set_empty();
 827   }
 828 }
 829 
 830 void ConcurrentMark::set_concurrency(uint active_tasks) {
 831   assert(active_tasks <= _max_worker_id, "we should not have more");
 832 
 833   _active_tasks = active_tasks;
 834   // Need to update the three data structures below according to the
 835   // number of active threads for this phase.
 836   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 837   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 838   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 839 }
 840 
 841 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 842   set_concurrency(active_tasks);
 843 
 844   _concurrent = concurrent;
 845   // We propagate this to all tasks, not just the active ones.
 846   for (uint i = 0; i < _max_worker_id; ++i)
 847     _tasks[i]->set_concurrent(concurrent);
 848 
 849   if (concurrent) {
 850     set_concurrent_marking_in_progress();
 851   } else {
 852     // We currently assume that the concurrent flag has been set to
 853     // false before we start remark. At this point we should also be
 854     // in a STW phase.
 855     assert(!concurrent_marking_in_progress(), "invariant");
 856     assert(out_of_regions(),
 857            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 858                    p2i(_finger), p2i(_heap_end)));
 859   }
 860 }
 861 
 862 void ConcurrentMark::set_non_marking_state() {
 863   // We set the global marking state to some default values when we're
 864   // not doing marking.
 865   reset_marking_state();
 866   _active_tasks = 0;
 867   clear_concurrent_marking_in_progress();
 868 }
 869 
 870 ConcurrentMark::~ConcurrentMark() {
 871   // The ConcurrentMark instance is never freed.
 872   ShouldNotReachHere();
 873 }
 874 
 875 void ConcurrentMark::clearNextBitmap() {
 876   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 877 
 878   // Make sure that the concurrent mark thread looks to still be in
 879   // the current cycle.
 880   guarantee(cmThread()->during_cycle(), "invariant");
 881 
 882   // We are finishing up the current cycle by clearing the next
 883   // marking bitmap and getting it ready for the next cycle. During
 884   // this time no other cycle can start. So, let's make sure that this
 885   // is the case.
 886   guarantee(!g1h->mark_in_progress(), "invariant");
 887 
 888   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 889   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 890   _parallel_workers->run_task(&task);
 891 
 892   // Clear the liveness counting data. If the marking has been aborted, the abort()
 893   // call already did that.
 894   if (cl.complete()) {
 895     clear_all_count_data();
 896   }
 897 
 898   // Repeat the asserts from above.
 899   guarantee(cmThread()->during_cycle(), "invariant");
 900   guarantee(!g1h->mark_in_progress(), "invariant");
 901 }
 902 
 903 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 904   CMBitMap* _bitmap;
 905   bool _error;
 906  public:
 907   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 908   }
 909 
 910   virtual bool doHeapRegion(HeapRegion* r) {
 911     // This closure can be called concurrently to the mutator, so we must make sure
 912     // that the result of the getNextMarkedWordAddress() call is compared to the
 913     // value passed to it as limit to detect any found bits.
 914     // We can use the region's orig_end() for the limit and the comparison value
 915     // as it always contains the "real" end of the region that never changes and
 916     // has no side effects.
 917     // Due to the latter, there can also be no problem with the compiler generating
 918     // reloads of the orig_end() call.
 919     HeapWord* end = r->orig_end();
 920     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 921   }
 922 };
 923 
 924 bool ConcurrentMark::nextMarkBitmapIsClear() {
 925   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 926   _g1h->heap_region_iterate(&cl);
 927   return cl.complete();
 928 }
 929 
 930 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 931 public:
 932   bool doHeapRegion(HeapRegion* r) {
 933     if (!r->is_continues_humongous()) {
 934       r->note_start_of_marking();
 935     }
 936     return false;
 937   }
 938 };
 939 
 940 void ConcurrentMark::checkpointRootsInitialPre() {
 941   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 942   G1CollectorPolicy* g1p = g1h->g1_policy();
 943 
 944   _has_aborted = false;
 945 
 946 #ifndef PRODUCT
 947   if (G1PrintReachableAtInitialMark) {
 948     print_reachable("at-cycle-start",
 949                     VerifyOption_G1UsePrevMarking, true /* all */);
 950   }
 951 #endif
 952 
 953   // Initialize marking structures. This has to be done in a STW phase.
 954   reset();
 955 
 956   // For each region note start of marking.
 957   NoteStartOfMarkHRClosure startcl;
 958   g1h->heap_region_iterate(&startcl);
 959 }
 960 
 961 
 962 void ConcurrentMark::checkpointRootsInitialPost() {
 963   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 964 
 965   // If we force an overflow during remark, the remark operation will
 966   // actually abort and we'll restart concurrent marking. If we always
 967   // force an overflow during remark we'll never actually complete the
 968   // marking phase. So, we initialize this here, at the start of the
 969   // cycle, so that at the remaining overflow number will decrease at
 970   // every remark and we'll eventually not need to cause one.
 971   force_overflow_stw()->init();
 972 
 973   // Start Concurrent Marking weak-reference discovery.
 974   ReferenceProcessor* rp = g1h->ref_processor_cm();
 975   // enable ("weak") refs discovery
 976   rp->enable_discovery();
 977   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 978 
 979   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 980   // This is the start of  the marking cycle, we're expected all
 981   // threads to have SATB queues with active set to false.
 982   satb_mq_set.set_active_all_threads(true, /* new active value */
 983                                      false /* expected_active */);
 984 
 985   _root_regions.prepare_for_scan();
 986 
 987   // update_g1_committed() will be called at the end of an evac pause
 988   // when marking is on. So, it's also called at the end of the
 989   // initial-mark pause to update the heap end, if the heap expands
 990   // during it. No need to call it here.
 991 }
 992 
 993 /*
 994  * Notice that in the next two methods, we actually leave the STS
 995  * during the barrier sync and join it immediately afterwards. If we
 996  * do not do this, the following deadlock can occur: one thread could
 997  * be in the barrier sync code, waiting for the other thread to also
 998  * sync up, whereas another one could be trying to yield, while also
 999  * waiting for the other threads to sync up too.
1000  *
1001  * Note, however, that this code is also used during remark and in
1002  * this case we should not attempt to leave / enter the STS, otherwise
1003  * we'll either hit an assert (debug / fastdebug) or deadlock
1004  * (product). So we should only leave / enter the STS if we are
1005  * operating concurrently.
1006  *
1007  * Because the thread that does the sync barrier has left the STS, it
1008  * is possible to be suspended for a Full GC or an evacuation pause
1009  * could occur. This is actually safe, since the entering the sync
1010  * barrier is one of the last things do_marking_step() does, and it
1011  * doesn't manipulate any data structures afterwards.
1012  */
1013 
1014 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1015   if (verbose_low()) {
1016     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
1017   }
1018 
1019   if (concurrent()) {
1020     SuspendibleThreadSet::leave();
1021   }
1022 
1023   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
1024 
1025   if (concurrent()) {
1026     SuspendibleThreadSet::join();
1027   }
1028   // at this point everyone should have synced up and not be doing any
1029   // more work
1030 
1031   if (verbose_low()) {
1032     if (barrier_aborted) {
1033       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1034     } else {
1035       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1036     }
1037   }
1038 
1039   if (barrier_aborted) {
1040     // If the barrier aborted we ignore the overflow condition and
1041     // just abort the whole marking phase as quickly as possible.
1042     return;
1043   }
1044 
1045   // If we're executing the concurrent phase of marking, reset the marking
1046   // state; otherwise the marking state is reset after reference processing,
1047   // during the remark pause.
1048   // If we reset here as a result of an overflow during the remark we will
1049   // see assertion failures from any subsequent set_concurrency_and_phase()
1050   // calls.
1051   if (concurrent()) {
1052     // let the task associated with with worker 0 do this
1053     if (worker_id == 0) {
1054       // task 0 is responsible for clearing the global data structures
1055       // We should be here because of an overflow. During STW we should
1056       // not clear the overflow flag since we rely on it being true when
1057       // we exit this method to abort the pause and restart concurrent
1058       // marking.
1059       reset_marking_state(true /* clear_overflow */);
1060       force_overflow()->update();
1061 
1062       if (G1Log::fine()) {
1063         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1064         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1065       }
1066     }
1067   }
1068 
1069   // after this, each task should reset its own data structures then
1070   // then go into the second barrier
1071 }
1072 
1073 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1074   if (verbose_low()) {
1075     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1076   }
1077 
1078   if (concurrent()) {
1079     SuspendibleThreadSet::leave();
1080   }
1081 
1082   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1083 
1084   if (concurrent()) {
1085     SuspendibleThreadSet::join();
1086   }
1087   // at this point everything should be re-initialized and ready to go
1088 
1089   if (verbose_low()) {
1090     if (barrier_aborted) {
1091       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1092     } else {
1093       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1094     }
1095   }
1096 }
1097 
1098 #ifndef PRODUCT
1099 void ForceOverflowSettings::init() {
1100   _num_remaining = G1ConcMarkForceOverflow;
1101   _force = false;
1102   update();
1103 }
1104 
1105 void ForceOverflowSettings::update() {
1106   if (_num_remaining > 0) {
1107     _num_remaining -= 1;
1108     _force = true;
1109   } else {
1110     _force = false;
1111   }
1112 }
1113 
1114 bool ForceOverflowSettings::should_force() {
1115   if (_force) {
1116     _force = false;
1117     return true;
1118   } else {
1119     return false;
1120   }
1121 }
1122 #endif // !PRODUCT
1123 
1124 class CMConcurrentMarkingTask: public AbstractGangTask {
1125 private:
1126   ConcurrentMark*       _cm;
1127   ConcurrentMarkThread* _cmt;
1128 
1129 public:
1130   void work(uint worker_id) {
1131     assert(Thread::current()->is_ConcurrentGC_thread(),
1132            "this should only be done by a conc GC thread");
1133     ResourceMark rm;
1134 
1135     double start_vtime = os::elapsedVTime();
1136 
1137     SuspendibleThreadSet::join();
1138 
1139     assert(worker_id < _cm->active_tasks(), "invariant");
1140     CMTask* the_task = _cm->task(worker_id);
1141     the_task->record_start_time();
1142     if (!_cm->has_aborted()) {
1143       do {
1144         double start_vtime_sec = os::elapsedVTime();
1145         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1146 
1147         the_task->do_marking_step(mark_step_duration_ms,
1148                                   true  /* do_termination */,
1149                                   false /* is_serial*/);
1150 
1151         double end_vtime_sec = os::elapsedVTime();
1152         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1153         _cm->clear_has_overflown();
1154 
1155         _cm->do_yield_check(worker_id);
1156 
1157         jlong sleep_time_ms;
1158         if (!_cm->has_aborted() && the_task->has_aborted()) {
1159           sleep_time_ms =
1160             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1161           SuspendibleThreadSet::leave();
1162           os::sleep(Thread::current(), sleep_time_ms, false);
1163           SuspendibleThreadSet::join();
1164         }
1165       } while (!_cm->has_aborted() && the_task->has_aborted());
1166     }
1167     the_task->record_end_time();
1168     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1169 
1170     SuspendibleThreadSet::leave();
1171 
1172     double end_vtime = os::elapsedVTime();
1173     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1174   }
1175 
1176   CMConcurrentMarkingTask(ConcurrentMark* cm,
1177                           ConcurrentMarkThread* cmt) :
1178       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1179 
1180   ~CMConcurrentMarkingTask() { }
1181 };
1182 
1183 // Calculates the number of active workers for a concurrent
1184 // phase.
1185 uint ConcurrentMark::calc_parallel_marking_threads() {
1186   uint n_conc_workers = 0;
1187   if (!UseDynamicNumberOfGCThreads ||
1188       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1189        !ForceDynamicNumberOfGCThreads)) {
1190     n_conc_workers = max_parallel_marking_threads();
1191   } else {
1192     n_conc_workers =
1193       AdaptiveSizePolicy::calc_default_active_workers(
1194                                    max_parallel_marking_threads(),
1195                                    1, /* Minimum workers */
1196                                    parallel_marking_threads(),
1197                                    Threads::number_of_non_daemon_threads());
1198     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1199     // that scaling has already gone into "_max_parallel_marking_threads".
1200   }
1201   assert(n_conc_workers > 0, "Always need at least 1");
1202   return n_conc_workers;
1203 }
1204 
1205 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1206   // Currently, only survivors can be root regions.
1207   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1208   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1209 
1210   const uintx interval = PrefetchScanIntervalInBytes;
1211   HeapWord* curr = hr->bottom();
1212   const HeapWord* end = hr->top();
1213   while (curr < end) {
1214     Prefetch::read(curr, interval);
1215     oop obj = oop(curr);
1216     int size = obj->oop_iterate(&cl);
1217     assert(size == obj->size(), "sanity");
1218     curr += size;
1219   }
1220 }
1221 
1222 class CMRootRegionScanTask : public AbstractGangTask {
1223 private:
1224   ConcurrentMark* _cm;
1225 
1226 public:
1227   CMRootRegionScanTask(ConcurrentMark* cm) :
1228     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1229 
1230   void work(uint worker_id) {
1231     assert(Thread::current()->is_ConcurrentGC_thread(),
1232            "this should only be done by a conc GC thread");
1233 
1234     CMRootRegions* root_regions = _cm->root_regions();
1235     HeapRegion* hr = root_regions->claim_next();
1236     while (hr != NULL) {
1237       _cm->scanRootRegion(hr, worker_id);
1238       hr = root_regions->claim_next();
1239     }
1240   }
1241 };
1242 
1243 void ConcurrentMark::scanRootRegions() {
1244   // Start of concurrent marking.
1245   ClassLoaderDataGraph::clear_claimed_marks();
1246 
1247   // scan_in_progress() will have been set to true only if there was
1248   // at least one root region to scan. So, if it's false, we
1249   // should not attempt to do any further work.
1250   if (root_regions()->scan_in_progress()) {
1251     _parallel_marking_threads = calc_parallel_marking_threads();
1252     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1253            "Maximum number of marking threads exceeded");
1254     uint active_workers = MAX2(1U, parallel_marking_threads());
1255 
1256     CMRootRegionScanTask task(this);
1257     _parallel_workers->set_active_workers(active_workers);
1258     _parallel_workers->run_task(&task);
1259 
1260     // It's possible that has_aborted() is true here without actually
1261     // aborting the survivor scan earlier. This is OK as it's
1262     // mainly used for sanity checking.
1263     root_regions()->scan_finished();
1264   }
1265 }
1266 
1267 void ConcurrentMark::markFromRoots() {
1268   // we might be tempted to assert that:
1269   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1270   //        "inconsistent argument?");
1271   // However that wouldn't be right, because it's possible that
1272   // a safepoint is indeed in progress as a younger generation
1273   // stop-the-world GC happens even as we mark in this generation.
1274 
1275   _restart_for_overflow = false;
1276   force_overflow_conc()->init();
1277 
1278   // _g1h has _n_par_threads
1279   _parallel_marking_threads = calc_parallel_marking_threads();
1280   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1281     "Maximum number of marking threads exceeded");
1282 
1283   uint active_workers = MAX2(1U, parallel_marking_threads());
1284 
1285   // Parallel task terminator is set in "set_concurrency_and_phase()"
1286   set_concurrency_and_phase(active_workers, true /* concurrent */);
1287 
1288   CMConcurrentMarkingTask markingTask(this, cmThread());
1289   _parallel_workers->set_active_workers(active_workers);
1290   // Don't set _n_par_threads because it affects MT in process_roots()
1291   // and the decisions on that MT processing is made elsewhere.
1292   assert(_parallel_workers->active_workers() > 0, "Should have been set");
1293   _parallel_workers->run_task(&markingTask);
1294   print_stats();
1295 }
1296 
1297 // Helper class to get rid of some boilerplate code.
1298 class G1CMTraceTime : public GCTraceTime {
1299   static bool doit_and_prepend(bool doit) {
1300     if (doit) {
1301       gclog_or_tty->put(' ');
1302     }
1303     return doit;
1304   }
1305 
1306  public:
1307   G1CMTraceTime(const char* title, bool doit)
1308     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1309         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1310   }
1311 };
1312 
1313 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1314   // world is stopped at this checkpoint
1315   assert(SafepointSynchronize::is_at_safepoint(),
1316          "world should be stopped");
1317 
1318   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1319 
1320   // If a full collection has happened, we shouldn't do this.
1321   if (has_aborted()) {
1322     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1323     return;
1324   }
1325 
1326   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1327 
1328   if (VerifyDuringGC) {
1329     HandleMark hm;  // handle scope
1330     g1h->prepare_for_verify();
1331     Universe::verify(VerifyOption_G1UsePrevMarking,
1332                      " VerifyDuringGC:(before)");
1333   }
1334   g1h->check_bitmaps("Remark Start");
1335 
1336   G1CollectorPolicy* g1p = g1h->g1_policy();
1337   g1p->record_concurrent_mark_remark_start();
1338 
1339   double start = os::elapsedTime();
1340 
1341   checkpointRootsFinalWork();
1342 
1343   double mark_work_end = os::elapsedTime();
1344 
1345   weakRefsWork(clear_all_soft_refs);
1346 
1347   if (has_overflown()) {
1348     // Oops.  We overflowed.  Restart concurrent marking.
1349     _restart_for_overflow = true;
1350     if (G1TraceMarkStackOverflow) {
1351       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1352     }
1353 
1354     // Verify the heap w.r.t. the previous marking bitmap.
1355     if (VerifyDuringGC) {
1356       HandleMark hm;  // handle scope
1357       g1h->prepare_for_verify();
1358       Universe::verify(VerifyOption_G1UsePrevMarking,
1359                        " VerifyDuringGC:(overflow)");
1360     }
1361 
1362     // Clear the marking state because we will be restarting
1363     // marking due to overflowing the global mark stack.
1364     reset_marking_state();
1365   } else {
1366     {
1367       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1368 
1369       // Aggregate the per-task counting data that we have accumulated
1370       // while marking.
1371       aggregate_count_data();
1372     }
1373 
1374     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1375     // We're done with marking.
1376     // This is the end of  the marking cycle, we're expected all
1377     // threads to have SATB queues with active set to true.
1378     satb_mq_set.set_active_all_threads(false, /* new active value */
1379                                        true /* expected_active */);
1380 
1381     if (VerifyDuringGC) {
1382       HandleMark hm;  // handle scope
1383       g1h->prepare_for_verify();
1384       Universe::verify(VerifyOption_G1UseNextMarking,
1385                        " VerifyDuringGC:(after)");
1386     }
1387     g1h->check_bitmaps("Remark End");
1388     assert(!restart_for_overflow(), "sanity");
1389     // Completely reset the marking state since marking completed
1390     set_non_marking_state();
1391   }
1392 
1393   // Expand the marking stack, if we have to and if we can.
1394   if (_markStack.should_expand()) {
1395     _markStack.expand();
1396   }
1397 
1398   // Statistics
1399   double now = os::elapsedTime();
1400   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1401   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1402   _remark_times.add((now - start) * 1000.0);
1403 
1404   g1p->record_concurrent_mark_remark_end();
1405 
1406   G1CMIsAliveClosure is_alive(g1h);
1407   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1408 }
1409 
1410 // Base class of the closures that finalize and verify the
1411 // liveness counting data.
1412 class CMCountDataClosureBase: public HeapRegionClosure {
1413 protected:
1414   G1CollectedHeap* _g1h;
1415   ConcurrentMark* _cm;
1416   CardTableModRefBS* _ct_bs;
1417 
1418   BitMap* _region_bm;
1419   BitMap* _card_bm;
1420 
1421   // Takes a region that's not empty (i.e., it has at least one
1422   // live object in it and sets its corresponding bit on the region
1423   // bitmap to 1. If the region is "starts humongous" it will also set
1424   // to 1 the bits on the region bitmap that correspond to its
1425   // associated "continues humongous" regions.
1426   void set_bit_for_region(HeapRegion* hr) {
1427     assert(!hr->is_continues_humongous(), "should have filtered those out");
1428 
1429     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1430     if (!hr->is_starts_humongous()) {
1431       // Normal (non-humongous) case: just set the bit.
1432       _region_bm->par_at_put(index, true);
1433     } else {
1434       // Starts humongous case: calculate how many regions are part of
1435       // this humongous region and then set the bit range.
1436       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1437       _region_bm->par_at_put_range(index, end_index, true);
1438     }
1439   }
1440 
1441 public:
1442   CMCountDataClosureBase(G1CollectedHeap* g1h,
1443                          BitMap* region_bm, BitMap* card_bm):
1444     _g1h(g1h), _cm(g1h->concurrent_mark()),
1445     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1446     _region_bm(region_bm), _card_bm(card_bm) { }
1447 };
1448 
1449 // Closure that calculates the # live objects per region. Used
1450 // for verification purposes during the cleanup pause.
1451 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1452   CMBitMapRO* _bm;
1453   size_t _region_marked_bytes;
1454 
1455 public:
1456   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1457                          BitMap* region_bm, BitMap* card_bm) :
1458     CMCountDataClosureBase(g1h, region_bm, card_bm),
1459     _bm(bm), _region_marked_bytes(0) { }
1460 
1461   bool doHeapRegion(HeapRegion* hr) {
1462 
1463     if (hr->is_continues_humongous()) {
1464       // We will ignore these here and process them when their
1465       // associated "starts humongous" region is processed (see
1466       // set_bit_for_heap_region()). Note that we cannot rely on their
1467       // associated "starts humongous" region to have their bit set to
1468       // 1 since, due to the region chunking in the parallel region
1469       // iteration, a "continues humongous" region might be visited
1470       // before its associated "starts humongous".
1471       return false;
1472     }
1473 
1474     HeapWord* ntams = hr->next_top_at_mark_start();
1475     HeapWord* start = hr->bottom();
1476 
1477     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1478            err_msg("Preconditions not met - "
1479                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1480                    p2i(start), p2i(ntams), p2i(hr->end())));
1481 
1482     // Find the first marked object at or after "start".
1483     start = _bm->getNextMarkedWordAddress(start, ntams);
1484 
1485     size_t marked_bytes = 0;
1486 
1487     while (start < ntams) {
1488       oop obj = oop(start);
1489       int obj_sz = obj->size();
1490       HeapWord* obj_end = start + obj_sz;
1491 
1492       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1493       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1494 
1495       // Note: if we're looking at the last region in heap - obj_end
1496       // could be actually just beyond the end of the heap; end_idx
1497       // will then correspond to a (non-existent) card that is also
1498       // just beyond the heap.
1499       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1500         // end of object is not card aligned - increment to cover
1501         // all the cards spanned by the object
1502         end_idx += 1;
1503       }
1504 
1505       // Set the bits in the card BM for the cards spanned by this object.
1506       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1507 
1508       // Add the size of this object to the number of marked bytes.
1509       marked_bytes += (size_t)obj_sz * HeapWordSize;
1510 
1511       // Find the next marked object after this one.
1512       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1513     }
1514 
1515     // Mark the allocated-since-marking portion...
1516     HeapWord* top = hr->top();
1517     if (ntams < top) {
1518       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1519       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1520 
1521       // Note: if we're looking at the last region in heap - top
1522       // could be actually just beyond the end of the heap; end_idx
1523       // will then correspond to a (non-existent) card that is also
1524       // just beyond the heap.
1525       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1526         // end of object is not card aligned - increment to cover
1527         // all the cards spanned by the object
1528         end_idx += 1;
1529       }
1530       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1531 
1532       // This definitely means the region has live objects.
1533       set_bit_for_region(hr);
1534     }
1535 
1536     // Update the live region bitmap.
1537     if (marked_bytes > 0) {
1538       set_bit_for_region(hr);
1539     }
1540 
1541     // Set the marked bytes for the current region so that
1542     // it can be queried by a calling verification routine
1543     _region_marked_bytes = marked_bytes;
1544 
1545     return false;
1546   }
1547 
1548   size_t region_marked_bytes() const { return _region_marked_bytes; }
1549 };
1550 
1551 // Heap region closure used for verifying the counting data
1552 // that was accumulated concurrently and aggregated during
1553 // the remark pause. This closure is applied to the heap
1554 // regions during the STW cleanup pause.
1555 
1556 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1557   G1CollectedHeap* _g1h;
1558   ConcurrentMark* _cm;
1559   CalcLiveObjectsClosure _calc_cl;
1560   BitMap* _region_bm;   // Region BM to be verified
1561   BitMap* _card_bm;     // Card BM to be verified
1562   bool _verbose;        // verbose output?
1563 
1564   BitMap* _exp_region_bm; // Expected Region BM values
1565   BitMap* _exp_card_bm;   // Expected card BM values
1566 
1567   int _failures;
1568 
1569 public:
1570   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1571                                 BitMap* region_bm,
1572                                 BitMap* card_bm,
1573                                 BitMap* exp_region_bm,
1574                                 BitMap* exp_card_bm,
1575                                 bool verbose) :
1576     _g1h(g1h), _cm(g1h->concurrent_mark()),
1577     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1578     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1579     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1580     _failures(0) { }
1581 
1582   int failures() const { return _failures; }
1583 
1584   bool doHeapRegion(HeapRegion* hr) {
1585     if (hr->is_continues_humongous()) {
1586       // We will ignore these here and process them when their
1587       // associated "starts humongous" region is processed (see
1588       // set_bit_for_heap_region()). Note that we cannot rely on their
1589       // associated "starts humongous" region to have their bit set to
1590       // 1 since, due to the region chunking in the parallel region
1591       // iteration, a "continues humongous" region might be visited
1592       // before its associated "starts humongous".
1593       return false;
1594     }
1595 
1596     int failures = 0;
1597 
1598     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1599     // this region and set the corresponding bits in the expected region
1600     // and card bitmaps.
1601     bool res = _calc_cl.doHeapRegion(hr);
1602     assert(res == false, "should be continuing");
1603 
1604     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1605                     Mutex::_no_safepoint_check_flag);
1606 
1607     // Verify the marked bytes for this region.
1608     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1609     size_t act_marked_bytes = hr->next_marked_bytes();
1610 
1611     // We're not OK if expected marked bytes > actual marked bytes. It means
1612     // we have missed accounting some objects during the actual marking.
1613     if (exp_marked_bytes > act_marked_bytes) {
1614       if (_verbose) {
1615         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1616                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1617                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1618       }
1619       failures += 1;
1620     }
1621 
1622     // Verify the bit, for this region, in the actual and expected
1623     // (which was just calculated) region bit maps.
1624     // We're not OK if the bit in the calculated expected region
1625     // bitmap is set and the bit in the actual region bitmap is not.
1626     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1627 
1628     bool expected = _exp_region_bm->at(index);
1629     bool actual = _region_bm->at(index);
1630     if (expected && !actual) {
1631       if (_verbose) {
1632         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1633                                "expected: %s, actual: %s",
1634                                hr->hrm_index(),
1635                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1636       }
1637       failures += 1;
1638     }
1639 
1640     // Verify that the card bit maps for the cards spanned by the current
1641     // region match. We have an error if we have a set bit in the expected
1642     // bit map and the corresponding bit in the actual bitmap is not set.
1643 
1644     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1645     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1646 
1647     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1648       expected = _exp_card_bm->at(i);
1649       actual = _card_bm->at(i);
1650 
1651       if (expected && !actual) {
1652         if (_verbose) {
1653           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1654                                  "expected: %s, actual: %s",
1655                                  hr->hrm_index(), i,
1656                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1657         }
1658         failures += 1;
1659       }
1660     }
1661 
1662     if (failures > 0 && _verbose)  {
1663       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1664                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1665                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1666                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1667     }
1668 
1669     _failures += failures;
1670 
1671     // We could stop iteration over the heap when we
1672     // find the first violating region by returning true.
1673     return false;
1674   }
1675 };
1676 
1677 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1678 protected:
1679   G1CollectedHeap* _g1h;
1680   ConcurrentMark* _cm;
1681   BitMap* _actual_region_bm;
1682   BitMap* _actual_card_bm;
1683 
1684   uint    _n_workers;
1685 
1686   BitMap* _expected_region_bm;
1687   BitMap* _expected_card_bm;
1688 
1689   int  _failures;
1690   bool _verbose;
1691 
1692   HeapRegionClaimer _hrclaimer;
1693 
1694 public:
1695   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1696                             BitMap* region_bm, BitMap* card_bm,
1697                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1698     : AbstractGangTask("G1 verify final counting"),
1699       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1700       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1701       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1702       _failures(0), _verbose(false),
1703       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1704     assert(VerifyDuringGC, "don't call this otherwise");
1705     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1706     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1707 
1708     _verbose = _cm->verbose_medium();
1709   }
1710 
1711   void work(uint worker_id) {
1712     assert(worker_id < _n_workers, "invariant");
1713 
1714     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1715                                             _actual_region_bm, _actual_card_bm,
1716                                             _expected_region_bm,
1717                                             _expected_card_bm,
1718                                             _verbose);
1719 
1720     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1721 
1722     Atomic::add(verify_cl.failures(), &_failures);
1723   }
1724 
1725   int failures() const { return _failures; }
1726 };
1727 
1728 // Closure that finalizes the liveness counting data.
1729 // Used during the cleanup pause.
1730 // Sets the bits corresponding to the interval [NTAMS, top]
1731 // (which contains the implicitly live objects) in the
1732 // card liveness bitmap. Also sets the bit for each region,
1733 // containing live data, in the region liveness bitmap.
1734 
1735 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1736  public:
1737   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1738                               BitMap* region_bm,
1739                               BitMap* card_bm) :
1740     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1741 
1742   bool doHeapRegion(HeapRegion* hr) {
1743 
1744     if (hr->is_continues_humongous()) {
1745       // We will ignore these here and process them when their
1746       // associated "starts humongous" region is processed (see
1747       // set_bit_for_heap_region()). Note that we cannot rely on their
1748       // associated "starts humongous" region to have their bit set to
1749       // 1 since, due to the region chunking in the parallel region
1750       // iteration, a "continues humongous" region might be visited
1751       // before its associated "starts humongous".
1752       return false;
1753     }
1754 
1755     HeapWord* ntams = hr->next_top_at_mark_start();
1756     HeapWord* top   = hr->top();
1757 
1758     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1759 
1760     // Mark the allocated-since-marking portion...
1761     if (ntams < top) {
1762       // This definitely means the region has live objects.
1763       set_bit_for_region(hr);
1764 
1765       // Now set the bits in the card bitmap for [ntams, top)
1766       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1767       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1768 
1769       // Note: if we're looking at the last region in heap - top
1770       // could be actually just beyond the end of the heap; end_idx
1771       // will then correspond to a (non-existent) card that is also
1772       // just beyond the heap.
1773       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1774         // end of object is not card aligned - increment to cover
1775         // all the cards spanned by the object
1776         end_idx += 1;
1777       }
1778 
1779       assert(end_idx <= _card_bm->size(),
1780              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1781                      end_idx, _card_bm->size()));
1782       assert(start_idx < _card_bm->size(),
1783              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1784                      start_idx, _card_bm->size()));
1785 
1786       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1787     }
1788 
1789     // Set the bit for the region if it contains live data
1790     if (hr->next_marked_bytes() > 0) {
1791       set_bit_for_region(hr);
1792     }
1793 
1794     return false;
1795   }
1796 };
1797 
1798 class G1ParFinalCountTask: public AbstractGangTask {
1799 protected:
1800   G1CollectedHeap* _g1h;
1801   ConcurrentMark* _cm;
1802   BitMap* _actual_region_bm;
1803   BitMap* _actual_card_bm;
1804 
1805   uint    _n_workers;
1806   HeapRegionClaimer _hrclaimer;
1807 
1808 public:
1809   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1810     : AbstractGangTask("G1 final counting"),
1811       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1812       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1813       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1814   }
1815 
1816   void work(uint worker_id) {
1817     assert(worker_id < _n_workers, "invariant");
1818 
1819     FinalCountDataUpdateClosure final_update_cl(_g1h,
1820                                                 _actual_region_bm,
1821                                                 _actual_card_bm);
1822 
1823     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1824   }
1825 };
1826 
1827 class G1ParNoteEndTask;
1828 
1829 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1830   G1CollectedHeap* _g1;
1831   size_t _max_live_bytes;
1832   uint _regions_claimed;
1833   size_t _freed_bytes;
1834   FreeRegionList* _local_cleanup_list;
1835   HeapRegionSetCount _old_regions_removed;
1836   HeapRegionSetCount _humongous_regions_removed;
1837   HRRSCleanupTask* _hrrs_cleanup_task;
1838   double _claimed_region_time;
1839   double _max_region_time;
1840 
1841 public:
1842   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1843                              FreeRegionList* local_cleanup_list,
1844                              HRRSCleanupTask* hrrs_cleanup_task) :
1845     _g1(g1),
1846     _max_live_bytes(0), _regions_claimed(0),
1847     _freed_bytes(0),
1848     _claimed_region_time(0.0), _max_region_time(0.0),
1849     _local_cleanup_list(local_cleanup_list),
1850     _old_regions_removed(),
1851     _humongous_regions_removed(),
1852     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1853 
1854   size_t freed_bytes() { return _freed_bytes; }
1855   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1856   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1857 
1858   bool doHeapRegion(HeapRegion *hr) {
1859     if (hr->is_continues_humongous()) {
1860       return false;
1861     }
1862     // We use a claim value of zero here because all regions
1863     // were claimed with value 1 in the FinalCount task.
1864     _g1->reset_gc_time_stamps(hr);
1865     double start = os::elapsedTime();
1866     _regions_claimed++;
1867     hr->note_end_of_marking();
1868     _max_live_bytes += hr->max_live_bytes();
1869 
1870     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1871       _freed_bytes += hr->used();
1872       hr->set_containing_set(NULL);
1873       if (hr->is_humongous()) {
1874         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1875         _humongous_regions_removed.increment(1u, hr->capacity());
1876         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1877       } else {
1878         _old_regions_removed.increment(1u, hr->capacity());
1879         _g1->free_region(hr, _local_cleanup_list, true);
1880       }
1881     } else {
1882       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1883     }
1884 
1885     double region_time = (os::elapsedTime() - start);
1886     _claimed_region_time += region_time;
1887     if (region_time > _max_region_time) {
1888       _max_region_time = region_time;
1889     }
1890     return false;
1891   }
1892 
1893   size_t max_live_bytes() { return _max_live_bytes; }
1894   uint regions_claimed() { return _regions_claimed; }
1895   double claimed_region_time_sec() { return _claimed_region_time; }
1896   double max_region_time_sec() { return _max_region_time; }
1897 };
1898 
1899 class G1ParNoteEndTask: public AbstractGangTask {
1900   friend class G1NoteEndOfConcMarkClosure;
1901 
1902 protected:
1903   G1CollectedHeap* _g1h;
1904   size_t _max_live_bytes;
1905   size_t _freed_bytes;
1906   FreeRegionList* _cleanup_list;
1907   HeapRegionClaimer _hrclaimer;
1908 
1909 public:
1910   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1911       AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1912   }
1913 
1914   void work(uint worker_id) {
1915     FreeRegionList local_cleanup_list("Local Cleanup List");
1916     HRRSCleanupTask hrrs_cleanup_task;
1917     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1918                                            &hrrs_cleanup_task);
1919     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1920     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1921 
1922     // Now update the lists
1923     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1924     {
1925       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1926       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1927       _max_live_bytes += g1_note_end.max_live_bytes();
1928       _freed_bytes += g1_note_end.freed_bytes();
1929 
1930       // If we iterate over the global cleanup list at the end of
1931       // cleanup to do this printing we will not guarantee to only
1932       // generate output for the newly-reclaimed regions (the list
1933       // might not be empty at the beginning of cleanup; we might
1934       // still be working on its previous contents). So we do the
1935       // printing here, before we append the new regions to the global
1936       // cleanup list.
1937 
1938       G1HRPrinter* hr_printer = _g1h->hr_printer();
1939       if (hr_printer->is_active()) {
1940         FreeRegionListIterator iter(&local_cleanup_list);
1941         while (iter.more_available()) {
1942           HeapRegion* hr = iter.get_next();
1943           hr_printer->cleanup(hr);
1944         }
1945       }
1946 
1947       _cleanup_list->add_ordered(&local_cleanup_list);
1948       assert(local_cleanup_list.is_empty(), "post-condition");
1949 
1950       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1951     }
1952   }
1953   size_t max_live_bytes() { return _max_live_bytes; }
1954   size_t freed_bytes() { return _freed_bytes; }
1955 };
1956 
1957 class G1ParScrubRemSetTask: public AbstractGangTask {
1958 protected:
1959   G1RemSet* _g1rs;
1960   BitMap* _region_bm;
1961   BitMap* _card_bm;
1962   HeapRegionClaimer _hrclaimer;
1963 
1964 public:
1965   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1966       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1967   }
1968 
1969   void work(uint worker_id) {
1970     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1971   }
1972 
1973 };
1974 
1975 void ConcurrentMark::cleanup() {
1976   // world is stopped at this checkpoint
1977   assert(SafepointSynchronize::is_at_safepoint(),
1978          "world should be stopped");
1979   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1980 
1981   // If a full collection has happened, we shouldn't do this.
1982   if (has_aborted()) {
1983     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1984     return;
1985   }
1986 
1987   g1h->verify_region_sets_optional();
1988 
1989   if (VerifyDuringGC) {
1990     HandleMark hm;  // handle scope
1991     g1h->prepare_for_verify();
1992     Universe::verify(VerifyOption_G1UsePrevMarking,
1993                      " VerifyDuringGC:(before)");
1994   }
1995   g1h->check_bitmaps("Cleanup Start");
1996 
1997   G1CollectorPolicy* g1p = g1h->g1_policy();
1998   g1p->record_concurrent_mark_cleanup_start();
1999 
2000   double start = os::elapsedTime();
2001 
2002   HeapRegionRemSet::reset_for_cleanup_tasks();
2003 
2004   uint n_workers;
2005 
2006   // Do counting once more with the world stopped for good measure.
2007   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2008 
2009   g1h->set_par_threads();
2010   n_workers = g1h->n_par_threads();
2011   assert(g1h->n_par_threads() == n_workers,
2012          "Should not have been reset");
2013   g1h->workers()->run_task(&g1_par_count_task);
2014   // Done with the parallel phase so reset to 0.
2015   g1h->set_par_threads(0);
2016 
2017   if (VerifyDuringGC) {
2018     // Verify that the counting data accumulated during marking matches
2019     // that calculated by walking the marking bitmap.
2020 
2021     // Bitmaps to hold expected values
2022     BitMap expected_region_bm(_region_bm.size(), true);
2023     BitMap expected_card_bm(_card_bm.size(), true);
2024 
2025     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2026                                                  &_region_bm,
2027                                                  &_card_bm,
2028                                                  &expected_region_bm,
2029                                                  &expected_card_bm);
2030 
2031     g1h->set_par_threads((int)n_workers);
2032     g1h->workers()->run_task(&g1_par_verify_task);
2033     // Done with the parallel phase so reset to 0.
2034     g1h->set_par_threads(0);
2035 
2036     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2037   }
2038 
2039   size_t start_used_bytes = g1h->used();
2040   g1h->set_marking_complete();
2041 
2042   double count_end = os::elapsedTime();
2043   double this_final_counting_time = (count_end - start);
2044   _total_counting_time += this_final_counting_time;
2045 
2046   if (G1PrintRegionLivenessInfo) {
2047     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2048     _g1h->heap_region_iterate(&cl);
2049   }
2050 
2051   // Install newly created mark bitMap as "prev".
2052   swapMarkBitMaps();
2053 
2054   g1h->reset_gc_time_stamp();
2055 
2056   // Note end of marking in all heap regions.
2057   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
2058   g1h->set_par_threads((int)n_workers);
2059   g1h->workers()->run_task(&g1_par_note_end_task);
2060   g1h->set_par_threads(0);
2061   g1h->check_gc_time_stamps();
2062 
2063   if (!cleanup_list_is_empty()) {
2064     // The cleanup list is not empty, so we'll have to process it
2065     // concurrently. Notify anyone else that might be wanting free
2066     // regions that there will be more free regions coming soon.
2067     g1h->set_free_regions_coming();
2068   }
2069 
2070   // call below, since it affects the metric by which we sort the heap
2071   // regions.
2072   if (G1ScrubRemSets) {
2073     double rs_scrub_start = os::elapsedTime();
2074     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
2075     g1h->set_par_threads((int)n_workers);
2076     g1h->workers()->run_task(&g1_par_scrub_rs_task);
2077     g1h->set_par_threads(0);
2078 
2079     double rs_scrub_end = os::elapsedTime();
2080     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2081     _total_rs_scrub_time += this_rs_scrub_time;
2082   }
2083 
2084   // this will also free any regions totally full of garbage objects,
2085   // and sort the regions.
2086   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2087 
2088   // Statistics.
2089   double end = os::elapsedTime();
2090   _cleanup_times.add((end - start) * 1000.0);
2091 
2092   if (G1Log::fine()) {
2093     g1h->g1_policy()->print_heap_transition(start_used_bytes);
2094   }
2095 
2096   // Clean up will have freed any regions completely full of garbage.
2097   // Update the soft reference policy with the new heap occupancy.
2098   Universe::update_heap_info_at_gc();
2099 
2100   if (VerifyDuringGC) {
2101     HandleMark hm;  // handle scope
2102     g1h->prepare_for_verify();
2103     Universe::verify(VerifyOption_G1UsePrevMarking,
2104                      " VerifyDuringGC:(after)");
2105   }
2106 
2107   g1h->check_bitmaps("Cleanup End");
2108 
2109   g1h->verify_region_sets_optional();
2110 
2111   // We need to make this be a "collection" so any collection pause that
2112   // races with it goes around and waits for completeCleanup to finish.
2113   g1h->increment_total_collections();
2114 
2115   // Clean out dead classes and update Metaspace sizes.
2116   if (ClassUnloadingWithConcurrentMark) {
2117     ClassLoaderDataGraph::purge();
2118   }
2119   MetaspaceGC::compute_new_size();
2120 
2121   // We reclaimed old regions so we should calculate the sizes to make
2122   // sure we update the old gen/space data.
2123   g1h->g1mm()->update_sizes();
2124   g1h->allocation_context_stats().update_after_mark();
2125 
2126   g1h->trace_heap_after_concurrent_cycle();
2127 }
2128 
2129 void ConcurrentMark::completeCleanup() {
2130   if (has_aborted()) return;
2131 
2132   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2133 
2134   _cleanup_list.verify_optional();
2135   FreeRegionList tmp_free_list("Tmp Free List");
2136 
2137   if (G1ConcRegionFreeingVerbose) {
2138     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2139                            "cleanup list has %u entries",
2140                            _cleanup_list.length());
2141   }
2142 
2143   // No one else should be accessing the _cleanup_list at this point,
2144   // so it is not necessary to take any locks
2145   while (!_cleanup_list.is_empty()) {
2146     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2147     assert(hr != NULL, "Got NULL from a non-empty list");
2148     hr->par_clear();
2149     tmp_free_list.add_ordered(hr);
2150 
2151     // Instead of adding one region at a time to the secondary_free_list,
2152     // we accumulate them in the local list and move them a few at a
2153     // time. This also cuts down on the number of notify_all() calls
2154     // we do during this process. We'll also append the local list when
2155     // _cleanup_list is empty (which means we just removed the last
2156     // region from the _cleanup_list).
2157     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2158         _cleanup_list.is_empty()) {
2159       if (G1ConcRegionFreeingVerbose) {
2160         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2161                                "appending %u entries to the secondary_free_list, "
2162                                "cleanup list still has %u entries",
2163                                tmp_free_list.length(),
2164                                _cleanup_list.length());
2165       }
2166 
2167       {
2168         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2169         g1h->secondary_free_list_add(&tmp_free_list);
2170         SecondaryFreeList_lock->notify_all();
2171       }
2172 #ifndef PRODUCT
2173       if (G1StressConcRegionFreeing) {
2174         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2175           os::sleep(Thread::current(), (jlong) 1, false);
2176         }
2177       }
2178 #endif
2179     }
2180   }
2181   assert(tmp_free_list.is_empty(), "post-condition");
2182 }
2183 
2184 // Supporting Object and Oop closures for reference discovery
2185 // and processing in during marking
2186 
2187 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2188   HeapWord* addr = (HeapWord*)obj;
2189   return addr != NULL &&
2190          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2191 }
2192 
2193 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2194 // Uses the CMTask associated with a worker thread (for serial reference
2195 // processing the CMTask for worker 0 is used) to preserve (mark) and
2196 // trace referent objects.
2197 //
2198 // Using the CMTask and embedded local queues avoids having the worker
2199 // threads operating on the global mark stack. This reduces the risk
2200 // of overflowing the stack - which we would rather avoid at this late
2201 // state. Also using the tasks' local queues removes the potential
2202 // of the workers interfering with each other that could occur if
2203 // operating on the global stack.
2204 
2205 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2206   ConcurrentMark* _cm;
2207   CMTask*         _task;
2208   int             _ref_counter_limit;
2209   int             _ref_counter;
2210   bool            _is_serial;
2211  public:
2212   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2213     _cm(cm), _task(task), _is_serial(is_serial),
2214     _ref_counter_limit(G1RefProcDrainInterval) {
2215     assert(_ref_counter_limit > 0, "sanity");
2216     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2217     _ref_counter = _ref_counter_limit;
2218   }
2219 
2220   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2221   virtual void do_oop(      oop* p) { do_oop_work(p); }
2222 
2223   template <class T> void do_oop_work(T* p) {
2224     if (!_cm->has_overflown()) {
2225       oop obj = oopDesc::load_decode_heap_oop(p);
2226       if (_cm->verbose_high()) {
2227         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2228                                "*"PTR_FORMAT" = "PTR_FORMAT,
2229                                _task->worker_id(), p2i(p), p2i((void*) obj));
2230       }
2231 
2232       _task->deal_with_reference(obj);
2233       _ref_counter--;
2234 
2235       if (_ref_counter == 0) {
2236         // We have dealt with _ref_counter_limit references, pushing them
2237         // and objects reachable from them on to the local stack (and
2238         // possibly the global stack). Call CMTask::do_marking_step() to
2239         // process these entries.
2240         //
2241         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2242         // there's nothing more to do (i.e. we're done with the entries that
2243         // were pushed as a result of the CMTask::deal_with_reference() calls
2244         // above) or we overflow.
2245         //
2246         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2247         // flag while there may still be some work to do. (See the comment at
2248         // the beginning of CMTask::do_marking_step() for those conditions -
2249         // one of which is reaching the specified time target.) It is only
2250         // when CMTask::do_marking_step() returns without setting the
2251         // has_aborted() flag that the marking step has completed.
2252         do {
2253           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2254           _task->do_marking_step(mark_step_duration_ms,
2255                                  false      /* do_termination */,
2256                                  _is_serial);
2257         } while (_task->has_aborted() && !_cm->has_overflown());
2258         _ref_counter = _ref_counter_limit;
2259       }
2260     } else {
2261       if (_cm->verbose_high()) {
2262          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2263       }
2264     }
2265   }
2266 };
2267 
2268 // 'Drain' oop closure used by both serial and parallel reference processing.
2269 // Uses the CMTask associated with a given worker thread (for serial
2270 // reference processing the CMtask for worker 0 is used). Calls the
2271 // do_marking_step routine, with an unbelievably large timeout value,
2272 // to drain the marking data structures of the remaining entries
2273 // added by the 'keep alive' oop closure above.
2274 
2275 class G1CMDrainMarkingStackClosure: public VoidClosure {
2276   ConcurrentMark* _cm;
2277   CMTask*         _task;
2278   bool            _is_serial;
2279  public:
2280   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2281     _cm(cm), _task(task), _is_serial(is_serial) {
2282     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2283   }
2284 
2285   void do_void() {
2286     do {
2287       if (_cm->verbose_high()) {
2288         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2289                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2290       }
2291 
2292       // We call CMTask::do_marking_step() to completely drain the local
2293       // and global marking stacks of entries pushed by the 'keep alive'
2294       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2295       //
2296       // CMTask::do_marking_step() is called in a loop, which we'll exit
2297       // if there's nothing more to do (i.e. we've completely drained the
2298       // entries that were pushed as a a result of applying the 'keep alive'
2299       // closure to the entries on the discovered ref lists) or we overflow
2300       // the global marking stack.
2301       //
2302       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2303       // flag while there may still be some work to do. (See the comment at
2304       // the beginning of CMTask::do_marking_step() for those conditions -
2305       // one of which is reaching the specified time target.) It is only
2306       // when CMTask::do_marking_step() returns without setting the
2307       // has_aborted() flag that the marking step has completed.
2308 
2309       _task->do_marking_step(1000000000.0 /* something very large */,
2310                              true         /* do_termination */,
2311                              _is_serial);
2312     } while (_task->has_aborted() && !_cm->has_overflown());
2313   }
2314 };
2315 
2316 // Implementation of AbstractRefProcTaskExecutor for parallel
2317 // reference processing at the end of G1 concurrent marking
2318 
2319 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2320 private:
2321   G1CollectedHeap* _g1h;
2322   ConcurrentMark*  _cm;
2323   WorkGang*        _workers;
2324   int              _active_workers;
2325 
2326 public:
2327   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2328                         ConcurrentMark* cm,
2329                         WorkGang* workers,
2330                         int n_workers) :
2331     _g1h(g1h), _cm(cm),
2332     _workers(workers), _active_workers(n_workers) { }
2333 
2334   // Executes the given task using concurrent marking worker threads.
2335   virtual void execute(ProcessTask& task);
2336   virtual void execute(EnqueueTask& task);
2337 };
2338 
2339 class G1CMRefProcTaskProxy: public AbstractGangTask {
2340   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2341   ProcessTask&     _proc_task;
2342   G1CollectedHeap* _g1h;
2343   ConcurrentMark*  _cm;
2344 
2345 public:
2346   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2347                      G1CollectedHeap* g1h,
2348                      ConcurrentMark* cm) :
2349     AbstractGangTask("Process reference objects in parallel"),
2350     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2351     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2352     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2353   }
2354 
2355   virtual void work(uint worker_id) {
2356     ResourceMark rm;
2357     HandleMark hm;
2358     CMTask* task = _cm->task(worker_id);
2359     G1CMIsAliveClosure g1_is_alive(_g1h);
2360     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2361     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2362 
2363     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2364   }
2365 };
2366 
2367 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2368   assert(_workers != NULL, "Need parallel worker threads.");
2369   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2370 
2371   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2372 
2373   // We need to reset the concurrency level before each
2374   // proxy task execution, so that the termination protocol
2375   // and overflow handling in CMTask::do_marking_step() knows
2376   // how many workers to wait for.
2377   _cm->set_concurrency(_active_workers);
2378   _g1h->set_par_threads(_active_workers);
2379   _workers->run_task(&proc_task_proxy);
2380   _g1h->set_par_threads(0);
2381 }
2382 
2383 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2384   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2385   EnqueueTask& _enq_task;
2386 
2387 public:
2388   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2389     AbstractGangTask("Enqueue reference objects in parallel"),
2390     _enq_task(enq_task) { }
2391 
2392   virtual void work(uint worker_id) {
2393     _enq_task.work(worker_id);
2394   }
2395 };
2396 
2397 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2398   assert(_workers != NULL, "Need parallel worker threads.");
2399   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2400 
2401   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2402 
2403   // Not strictly necessary but...
2404   //
2405   // We need to reset the concurrency level before each
2406   // proxy task execution, so that the termination protocol
2407   // and overflow handling in CMTask::do_marking_step() knows
2408   // how many workers to wait for.
2409   _cm->set_concurrency(_active_workers);
2410   _g1h->set_par_threads(_active_workers);
2411   _workers->run_task(&enq_task_proxy);
2412   _g1h->set_par_threads(0);
2413 }
2414 
2415 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2416   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2417 }
2418 
2419 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2420   if (has_overflown()) {
2421     // Skip processing the discovered references if we have
2422     // overflown the global marking stack. Reference objects
2423     // only get discovered once so it is OK to not
2424     // de-populate the discovered reference lists. We could have,
2425     // but the only benefit would be that, when marking restarts,
2426     // less reference objects are discovered.
2427     return;
2428   }
2429 
2430   ResourceMark rm;
2431   HandleMark   hm;
2432 
2433   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2434 
2435   // Is alive closure.
2436   G1CMIsAliveClosure g1_is_alive(g1h);
2437 
2438   // Inner scope to exclude the cleaning of the string and symbol
2439   // tables from the displayed time.
2440   {
2441     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2442 
2443     ReferenceProcessor* rp = g1h->ref_processor_cm();
2444 
2445     // See the comment in G1CollectedHeap::ref_processing_init()
2446     // about how reference processing currently works in G1.
2447 
2448     // Set the soft reference policy
2449     rp->setup_policy(clear_all_soft_refs);
2450     assert(_markStack.isEmpty(), "mark stack should be empty");
2451 
2452     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2453     // in serial reference processing. Note these closures are also
2454     // used for serially processing (by the the current thread) the
2455     // JNI references during parallel reference processing.
2456     //
2457     // These closures do not need to synchronize with the worker
2458     // threads involved in parallel reference processing as these
2459     // instances are executed serially by the current thread (e.g.
2460     // reference processing is not multi-threaded and is thus
2461     // performed by the current thread instead of a gang worker).
2462     //
2463     // The gang tasks involved in parallel reference processing create
2464     // their own instances of these closures, which do their own
2465     // synchronization among themselves.
2466     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2467     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2468 
2469     // We need at least one active thread. If reference processing
2470     // is not multi-threaded we use the current (VMThread) thread,
2471     // otherwise we use the work gang from the G1CollectedHeap and
2472     // we utilize all the worker threads we can.
2473     bool processing_is_mt = rp->processing_is_mt();
2474     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2475     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2476 
2477     // Parallel processing task executor.
2478     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2479                                               g1h->workers(), active_workers);
2480     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2481 
2482     // Set the concurrency level. The phase was already set prior to
2483     // executing the remark task.
2484     set_concurrency(active_workers);
2485 
2486     // Set the degree of MT processing here.  If the discovery was done MT,
2487     // the number of threads involved during discovery could differ from
2488     // the number of active workers.  This is OK as long as the discovered
2489     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2490     rp->set_active_mt_degree(active_workers);
2491 
2492     // Process the weak references.
2493     const ReferenceProcessorStats& stats =
2494         rp->process_discovered_references(&g1_is_alive,
2495                                           &g1_keep_alive,
2496                                           &g1_drain_mark_stack,
2497                                           executor,
2498                                           g1h->gc_timer_cm(),
2499                                           concurrent_gc_id());
2500     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2501 
2502     // The do_oop work routines of the keep_alive and drain_marking_stack
2503     // oop closures will set the has_overflown flag if we overflow the
2504     // global marking stack.
2505 
2506     assert(_markStack.overflow() || _markStack.isEmpty(),
2507             "mark stack should be empty (unless it overflowed)");
2508 
2509     if (_markStack.overflow()) {
2510       // This should have been done already when we tried to push an
2511       // entry on to the global mark stack. But let's do it again.
2512       set_has_overflown();
2513     }
2514 
2515     assert(rp->num_q() == active_workers, "why not");
2516 
2517     rp->enqueue_discovered_references(executor);
2518 
2519     rp->verify_no_references_recorded();
2520     assert(!rp->discovery_enabled(), "Post condition");
2521   }
2522 
2523   if (has_overflown()) {
2524     // We can not trust g1_is_alive if the marking stack overflowed
2525     return;
2526   }
2527 
2528   assert(_markStack.isEmpty(), "Marking should have completed");
2529 
2530   // Unload Klasses, String, Symbols, Code Cache, etc.
2531   {
2532     G1CMTraceTime trace("Unloading", G1Log::finer());
2533 
2534     if (ClassUnloadingWithConcurrentMark) {
2535       bool purged_classes;
2536 
2537       {
2538         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2539         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2540       }
2541 
2542       {
2543         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2544         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2545       }
2546     }
2547 
2548     if (G1StringDedup::is_enabled()) {
2549       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2550       G1StringDedup::unlink(&g1_is_alive);
2551     }
2552   }
2553 }
2554 
2555 void ConcurrentMark::swapMarkBitMaps() {
2556   CMBitMapRO* temp = _prevMarkBitMap;
2557   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2558   _nextMarkBitMap  = (CMBitMap*)  temp;
2559 }
2560 
2561 class CMObjectClosure;
2562 
2563 // Closure for iterating over objects, currently only used for
2564 // processing SATB buffers.
2565 class CMObjectClosure : public ObjectClosure {
2566 private:
2567   CMTask* _task;
2568 
2569 public:
2570   void do_object(oop obj) {
2571     _task->deal_with_reference(obj);
2572   }
2573 
2574   CMObjectClosure(CMTask* task) : _task(task) { }
2575 };
2576 
2577 class G1RemarkThreadsClosure : public ThreadClosure {
2578   CMObjectClosure _cm_obj;
2579   G1CMOopClosure _cm_cl;
2580   MarkingCodeBlobClosure _code_cl;
2581   int _thread_parity;
2582 
2583  public:
2584   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2585     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2586     _thread_parity(Threads::thread_claim_parity()) {}
2587 
2588   void do_thread(Thread* thread) {
2589     if (thread->is_Java_thread()) {
2590       if (thread->claim_oops_do(true, _thread_parity)) {
2591         JavaThread* jt = (JavaThread*)thread;
2592 
2593         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2594         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2595         // * Alive if on the stack of an executing method
2596         // * Weakly reachable otherwise
2597         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2598         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2599         jt->nmethods_do(&_code_cl);
2600 
2601         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2602       }
2603     } else if (thread->is_VM_thread()) {
2604       if (thread->claim_oops_do(true, _thread_parity)) {
2605         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2606       }
2607     }
2608   }
2609 };
2610 
2611 class CMRemarkTask: public AbstractGangTask {
2612 private:
2613   ConcurrentMark* _cm;
2614 public:
2615   void work(uint worker_id) {
2616     // Since all available tasks are actually started, we should
2617     // only proceed if we're supposed to be active.
2618     if (worker_id < _cm->active_tasks()) {
2619       CMTask* task = _cm->task(worker_id);
2620       task->record_start_time();
2621       {
2622         ResourceMark rm;
2623         HandleMark hm;
2624 
2625         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2626         Threads::threads_do(&threads_f);
2627       }
2628 
2629       do {
2630         task->do_marking_step(1000000000.0 /* something very large */,
2631                               true         /* do_termination       */,
2632                               false        /* is_serial            */);
2633       } while (task->has_aborted() && !_cm->has_overflown());
2634       // If we overflow, then we do not want to restart. We instead
2635       // want to abort remark and do concurrent marking again.
2636       task->record_end_time();
2637     }
2638   }
2639 
2640   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2641     AbstractGangTask("Par Remark"), _cm(cm) {
2642     _cm->terminator()->reset_for_reuse(active_workers);
2643   }
2644 };
2645 
2646 void ConcurrentMark::checkpointRootsFinalWork() {
2647   ResourceMark rm;
2648   HandleMark   hm;
2649   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2650 
2651   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2652 
2653   g1h->ensure_parsability(false);
2654 
2655   StrongRootsScope srs;
2656   // this is remark, so we'll use up all active threads
2657   uint active_workers = g1h->workers()->active_workers();
2658   if (active_workers == 0) {
2659     assert(active_workers > 0, "Should have been set earlier");
2660     active_workers = (uint) ParallelGCThreads;
2661     g1h->workers()->set_active_workers(active_workers);
2662   }
2663   set_concurrency_and_phase(active_workers, false /* concurrent */);
2664   // Leave _parallel_marking_threads at it's
2665   // value originally calculated in the ConcurrentMark
2666   // constructor and pass values of the active workers
2667   // through the gang in the task.
2668 
2669   CMRemarkTask remarkTask(this, active_workers);
2670   // We will start all available threads, even if we decide that the
2671   // active_workers will be fewer. The extra ones will just bail out
2672   // immediately.
2673   g1h->set_par_threads(active_workers);
2674   g1h->workers()->run_task(&remarkTask);
2675   g1h->set_par_threads(0);
2676 
2677   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2678   guarantee(has_overflown() ||
2679             satb_mq_set.completed_buffers_num() == 0,
2680             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2681                     BOOL_TO_STR(has_overflown()),
2682                     satb_mq_set.completed_buffers_num()));
2683 
2684   print_stats();
2685 }
2686 
2687 #ifndef PRODUCT
2688 
2689 class PrintReachableOopClosure: public OopClosure {
2690 private:
2691   G1CollectedHeap* _g1h;
2692   outputStream*    _out;
2693   VerifyOption     _vo;
2694   bool             _all;
2695 
2696 public:
2697   PrintReachableOopClosure(outputStream* out,
2698                            VerifyOption  vo,
2699                            bool          all) :
2700     _g1h(G1CollectedHeap::heap()),
2701     _out(out), _vo(vo), _all(all) { }
2702 
2703   void do_oop(narrowOop* p) { do_oop_work(p); }
2704   void do_oop(      oop* p) { do_oop_work(p); }
2705 
2706   template <class T> void do_oop_work(T* p) {
2707     oop         obj = oopDesc::load_decode_heap_oop(p);
2708     const char* str = NULL;
2709     const char* str2 = "";
2710 
2711     if (obj == NULL) {
2712       str = "";
2713     } else if (!_g1h->is_in_g1_reserved(obj)) {
2714       str = " O";
2715     } else {
2716       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2717       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2718       bool marked = _g1h->is_marked(obj, _vo);
2719 
2720       if (over_tams) {
2721         str = " >";
2722         if (marked) {
2723           str2 = " AND MARKED";
2724         }
2725       } else if (marked) {
2726         str = " M";
2727       } else {
2728         str = " NOT";
2729       }
2730     }
2731 
2732     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2733                    p2i(p), p2i((void*) obj), str, str2);
2734   }
2735 };
2736 
2737 class PrintReachableObjectClosure : public ObjectClosure {
2738 private:
2739   G1CollectedHeap* _g1h;
2740   outputStream*    _out;
2741   VerifyOption     _vo;
2742   bool             _all;
2743   HeapRegion*      _hr;
2744 
2745 public:
2746   PrintReachableObjectClosure(outputStream* out,
2747                               VerifyOption  vo,
2748                               bool          all,
2749                               HeapRegion*   hr) :
2750     _g1h(G1CollectedHeap::heap()),
2751     _out(out), _vo(vo), _all(all), _hr(hr) { }
2752 
2753   void do_object(oop o) {
2754     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2755     bool marked = _g1h->is_marked(o, _vo);
2756     bool print_it = _all || over_tams || marked;
2757 
2758     if (print_it) {
2759       _out->print_cr(" "PTR_FORMAT"%s",
2760                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2761       PrintReachableOopClosure oopCl(_out, _vo, _all);
2762       o->oop_iterate_no_header(&oopCl);
2763     }
2764   }
2765 };
2766 
2767 class PrintReachableRegionClosure : public HeapRegionClosure {
2768 private:
2769   G1CollectedHeap* _g1h;
2770   outputStream*    _out;
2771   VerifyOption     _vo;
2772   bool             _all;
2773 
2774 public:
2775   bool doHeapRegion(HeapRegion* hr) {
2776     HeapWord* b = hr->bottom();
2777     HeapWord* e = hr->end();
2778     HeapWord* t = hr->top();
2779     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2780     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2781                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2782     _out->cr();
2783 
2784     HeapWord* from = b;
2785     HeapWord* to   = t;
2786 
2787     if (to > from) {
2788       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2789       _out->cr();
2790       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2791       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2792       _out->cr();
2793     }
2794 
2795     return false;
2796   }
2797 
2798   PrintReachableRegionClosure(outputStream* out,
2799                               VerifyOption  vo,
2800                               bool          all) :
2801     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2802 };
2803 
2804 void ConcurrentMark::print_reachable(const char* str,
2805                                      VerifyOption vo,
2806                                      bool all) {
2807   gclog_or_tty->cr();
2808   gclog_or_tty->print_cr("== Doing heap dump... ");
2809 
2810   if (G1PrintReachableBaseFile == NULL) {
2811     gclog_or_tty->print_cr("  #### error: no base file defined");
2812     return;
2813   }
2814 
2815   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2816       (JVM_MAXPATHLEN - 1)) {
2817     gclog_or_tty->print_cr("  #### error: file name too long");
2818     return;
2819   }
2820 
2821   char file_name[JVM_MAXPATHLEN];
2822   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2823   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2824 
2825   fileStream fout(file_name);
2826   if (!fout.is_open()) {
2827     gclog_or_tty->print_cr("  #### error: could not open file");
2828     return;
2829   }
2830 
2831   outputStream* out = &fout;
2832   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2833   out->cr();
2834 
2835   out->print_cr("--- ITERATING OVER REGIONS");
2836   out->cr();
2837   PrintReachableRegionClosure rcl(out, vo, all);
2838   _g1h->heap_region_iterate(&rcl);
2839   out->cr();
2840 
2841   gclog_or_tty->print_cr("  done");
2842   gclog_or_tty->flush();
2843 }
2844 
2845 #endif // PRODUCT
2846 
2847 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2848   // Note we are overriding the read-only view of the prev map here, via
2849   // the cast.
2850   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2851 }
2852 
2853 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2854   _nextMarkBitMap->clearRange(mr);
2855 }
2856 
2857 HeapRegion*
2858 ConcurrentMark::claim_region(uint worker_id) {
2859   // "checkpoint" the finger
2860   HeapWord* finger = _finger;
2861 
2862   // _heap_end will not change underneath our feet; it only changes at
2863   // yield points.
2864   while (finger < _heap_end) {
2865     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2866 
2867     // Note on how this code handles humongous regions. In the
2868     // normal case the finger will reach the start of a "starts
2869     // humongous" (SH) region. Its end will either be the end of the
2870     // last "continues humongous" (CH) region in the sequence, or the
2871     // standard end of the SH region (if the SH is the only region in
2872     // the sequence). That way claim_region() will skip over the CH
2873     // regions. However, there is a subtle race between a CM thread
2874     // executing this method and a mutator thread doing a humongous
2875     // object allocation. The two are not mutually exclusive as the CM
2876     // thread does not need to hold the Heap_lock when it gets
2877     // here. So there is a chance that claim_region() will come across
2878     // a free region that's in the progress of becoming a SH or a CH
2879     // region. In the former case, it will either
2880     //   a) Miss the update to the region's end, in which case it will
2881     //      visit every subsequent CH region, will find their bitmaps
2882     //      empty, and do nothing, or
2883     //   b) Will observe the update of the region's end (in which case
2884     //      it will skip the subsequent CH regions).
2885     // If it comes across a region that suddenly becomes CH, the
2886     // scenario will be similar to b). So, the race between
2887     // claim_region() and a humongous object allocation might force us
2888     // to do a bit of unnecessary work (due to some unnecessary bitmap
2889     // iterations) but it should not introduce and correctness issues.
2890     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2891 
2892     // Above heap_region_containing_raw may return NULL as we always scan claim
2893     // until the end of the heap. In this case, just jump to the next region.
2894     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2895 
2896     // Is the gap between reading the finger and doing the CAS too long?
2897     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2898     if (res == finger && curr_region != NULL) {
2899       // we succeeded
2900       HeapWord*   bottom        = curr_region->bottom();
2901       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2902 
2903       if (verbose_low()) {
2904         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2905                                "["PTR_FORMAT", "PTR_FORMAT"), "
2906                                "limit = "PTR_FORMAT,
2907                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2908       }
2909 
2910       // notice that _finger == end cannot be guaranteed here since,
2911       // someone else might have moved the finger even further
2912       assert(_finger >= end, "the finger should have moved forward");
2913 
2914       if (verbose_low()) {
2915         gclog_or_tty->print_cr("[%u] we were successful with region = "
2916                                PTR_FORMAT, worker_id, p2i(curr_region));
2917       }
2918 
2919       if (limit > bottom) {
2920         if (verbose_low()) {
2921           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2922                                  "returning it ", worker_id, p2i(curr_region));
2923         }
2924         return curr_region;
2925       } else {
2926         assert(limit == bottom,
2927                "the region limit should be at bottom");
2928         if (verbose_low()) {
2929           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2930                                  "returning NULL", worker_id, p2i(curr_region));
2931         }
2932         // we return NULL and the caller should try calling
2933         // claim_region() again.
2934         return NULL;
2935       }
2936     } else {
2937       assert(_finger > finger, "the finger should have moved forward");
2938       if (verbose_low()) {
2939         if (curr_region == NULL) {
2940           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2941                                  "global finger = "PTR_FORMAT", "
2942                                  "our finger = "PTR_FORMAT,
2943                                  worker_id, p2i(_finger), p2i(finger));
2944         } else {
2945           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2946                                  "global finger = "PTR_FORMAT", "
2947                                  "our finger = "PTR_FORMAT,
2948                                  worker_id, p2i(_finger), p2i(finger));
2949         }
2950       }
2951 
2952       // read it again
2953       finger = _finger;
2954     }
2955   }
2956 
2957   return NULL;
2958 }
2959 
2960 #ifndef PRODUCT
2961 enum VerifyNoCSetOopsPhase {
2962   VerifyNoCSetOopsStack,
2963   VerifyNoCSetOopsQueues,
2964   VerifyNoCSetOopsSATBCompleted,
2965   VerifyNoCSetOopsSATBThread
2966 };
2967 
2968 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2969 private:
2970   G1CollectedHeap* _g1h;
2971   VerifyNoCSetOopsPhase _phase;
2972   int _info;
2973 
2974   const char* phase_str() {
2975     switch (_phase) {
2976     case VerifyNoCSetOopsStack:         return "Stack";
2977     case VerifyNoCSetOopsQueues:        return "Queue";
2978     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2979     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2980     default:                            ShouldNotReachHere();
2981     }
2982     return NULL;
2983   }
2984 
2985   void do_object_work(oop obj) {
2986     guarantee(!_g1h->obj_in_cs(obj),
2987               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2988                       p2i((void*) obj), phase_str(), _info));
2989   }
2990 
2991 public:
2992   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2993 
2994   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2995     _phase = phase;
2996     _info = info;
2997   }
2998 
2999   virtual void do_oop(oop* p) {
3000     oop obj = oopDesc::load_decode_heap_oop(p);
3001     do_object_work(obj);
3002   }
3003 
3004   virtual void do_oop(narrowOop* p) {
3005     // We should not come across narrow oops while scanning marking
3006     // stacks and SATB buffers.
3007     ShouldNotReachHere();
3008   }
3009 
3010   virtual void do_object(oop obj) {
3011     do_object_work(obj);
3012   }
3013 };
3014 
3015 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3016                                          bool verify_enqueued_buffers,
3017                                          bool verify_thread_buffers,
3018                                          bool verify_fingers) {
3019   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3020   if (!G1CollectedHeap::heap()->mark_in_progress()) {
3021     return;
3022   }
3023 
3024   VerifyNoCSetOopsClosure cl;
3025 
3026   if (verify_stacks) {
3027     // Verify entries on the global mark stack
3028     cl.set_phase(VerifyNoCSetOopsStack);
3029     _markStack.oops_do(&cl);
3030 
3031     // Verify entries on the task queues
3032     for (uint i = 0; i < _max_worker_id; i += 1) {
3033       cl.set_phase(VerifyNoCSetOopsQueues, i);
3034       CMTaskQueue* queue = _task_queues->queue(i);
3035       queue->oops_do(&cl);
3036     }
3037   }
3038 
3039   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3040 
3041   // Verify entries on the enqueued SATB buffers
3042   if (verify_enqueued_buffers) {
3043     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3044     satb_qs.iterate_completed_buffers_read_only(&cl);
3045   }
3046 
3047   // Verify entries on the per-thread SATB buffers
3048   if (verify_thread_buffers) {
3049     cl.set_phase(VerifyNoCSetOopsSATBThread);
3050     satb_qs.iterate_thread_buffers_read_only(&cl);
3051   }
3052 
3053   if (verify_fingers) {
3054     // Verify the global finger
3055     HeapWord* global_finger = finger();
3056     if (global_finger != NULL && global_finger < _heap_end) {
3057       // The global finger always points to a heap region boundary. We
3058       // use heap_region_containing_raw() to get the containing region
3059       // given that the global finger could be pointing to a free region
3060       // which subsequently becomes continues humongous. If that
3061       // happens, heap_region_containing() will return the bottom of the
3062       // corresponding starts humongous region and the check below will
3063       // not hold any more.
3064       // Since we always iterate over all regions, we might get a NULL HeapRegion
3065       // here.
3066       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3067       guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3068                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3069                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3070     }
3071 
3072     // Verify the task fingers
3073     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3074     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3075       CMTask* task = _tasks[i];
3076       HeapWord* task_finger = task->finger();
3077       if (task_finger != NULL && task_finger < _heap_end) {
3078         // See above note on the global finger verification.
3079         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3080         guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3081                   !task_hr->in_collection_set(),
3082                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3083                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3084       }
3085     }
3086   }
3087 }
3088 #endif // PRODUCT
3089 
3090 // Aggregate the counting data that was constructed concurrently
3091 // with marking.
3092 class AggregateCountDataHRClosure: public HeapRegionClosure {
3093   G1CollectedHeap* _g1h;
3094   ConcurrentMark* _cm;
3095   CardTableModRefBS* _ct_bs;
3096   BitMap* _cm_card_bm;
3097   uint _max_worker_id;
3098 
3099  public:
3100   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3101                               BitMap* cm_card_bm,
3102                               uint max_worker_id) :
3103     _g1h(g1h), _cm(g1h->concurrent_mark()),
3104     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
3105     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3106 
3107   bool doHeapRegion(HeapRegion* hr) {
3108     if (hr->is_continues_humongous()) {
3109       // We will ignore these here and process them when their
3110       // associated "starts humongous" region is processed.
3111       // Note that we cannot rely on their associated
3112       // "starts humongous" region to have their bit set to 1
3113       // since, due to the region chunking in the parallel region
3114       // iteration, a "continues humongous" region might be visited
3115       // before its associated "starts humongous".
3116       return false;
3117     }
3118 
3119     HeapWord* start = hr->bottom();
3120     HeapWord* limit = hr->next_top_at_mark_start();
3121     HeapWord* end = hr->end();
3122 
3123     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3124            err_msg("Preconditions not met - "
3125                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3126                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3127                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3128 
3129     assert(hr->next_marked_bytes() == 0, "Precondition");
3130 
3131     if (start == limit) {
3132       // NTAMS of this region has not been set so nothing to do.
3133       return false;
3134     }
3135 
3136     // 'start' should be in the heap.
3137     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3138     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3139     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3140 
3141     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3142     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3143     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3144 
3145     // If ntams is not card aligned then we bump card bitmap index
3146     // for limit so that we get the all the cards spanned by
3147     // the object ending at ntams.
3148     // Note: if this is the last region in the heap then ntams
3149     // could be actually just beyond the end of the the heap;
3150     // limit_idx will then  correspond to a (non-existent) card
3151     // that is also outside the heap.
3152     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3153       limit_idx += 1;
3154     }
3155 
3156     assert(limit_idx <= end_idx, "or else use atomics");
3157 
3158     // Aggregate the "stripe" in the count data associated with hr.
3159     uint hrm_index = hr->hrm_index();
3160     size_t marked_bytes = 0;
3161 
3162     for (uint i = 0; i < _max_worker_id; i += 1) {
3163       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3164       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3165 
3166       // Fetch the marked_bytes in this region for task i and
3167       // add it to the running total for this region.
3168       marked_bytes += marked_bytes_array[hrm_index];
3169 
3170       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3171       // into the global card bitmap.
3172       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3173 
3174       while (scan_idx < limit_idx) {
3175         assert(task_card_bm->at(scan_idx) == true, "should be");
3176         _cm_card_bm->set_bit(scan_idx);
3177         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3178 
3179         // BitMap::get_next_one_offset() can handle the case when
3180         // its left_offset parameter is greater than its right_offset
3181         // parameter. It does, however, have an early exit if
3182         // left_offset == right_offset. So let's limit the value
3183         // passed in for left offset here.
3184         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3185         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3186       }
3187     }
3188 
3189     // Update the marked bytes for this region.
3190     hr->add_to_marked_bytes(marked_bytes);
3191 
3192     // Next heap region
3193     return false;
3194   }
3195 };
3196 
3197 class G1AggregateCountDataTask: public AbstractGangTask {
3198 protected:
3199   G1CollectedHeap* _g1h;
3200   ConcurrentMark* _cm;
3201   BitMap* _cm_card_bm;
3202   uint _max_worker_id;
3203   int _active_workers;
3204   HeapRegionClaimer _hrclaimer;
3205 
3206 public:
3207   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3208                            ConcurrentMark* cm,
3209                            BitMap* cm_card_bm,
3210                            uint max_worker_id,
3211                            int n_workers) :
3212       AbstractGangTask("Count Aggregation"),
3213       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3214       _max_worker_id(max_worker_id),
3215       _active_workers(n_workers),
3216       _hrclaimer(_active_workers) {
3217   }
3218 
3219   void work(uint worker_id) {
3220     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3221 
3222     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
3223   }
3224 };
3225 
3226 
3227 void ConcurrentMark::aggregate_count_data() {
3228   int n_workers = _g1h->workers()->active_workers();
3229 
3230   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3231                                            _max_worker_id, n_workers);
3232 
3233   _g1h->set_par_threads(n_workers);
3234   _g1h->workers()->run_task(&g1_par_agg_task);
3235   _g1h->set_par_threads(0);
3236 }
3237 
3238 // Clear the per-worker arrays used to store the per-region counting data
3239 void ConcurrentMark::clear_all_count_data() {
3240   // Clear the global card bitmap - it will be filled during
3241   // liveness count aggregation (during remark) and the
3242   // final counting task.
3243   _card_bm.clear();
3244 
3245   // Clear the global region bitmap - it will be filled as part
3246   // of the final counting task.
3247   _region_bm.clear();
3248 
3249   uint max_regions = _g1h->max_regions();
3250   assert(_max_worker_id > 0, "uninitialized");
3251 
3252   for (uint i = 0; i < _max_worker_id; i += 1) {
3253     BitMap* task_card_bm = count_card_bitmap_for(i);
3254     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3255 
3256     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3257     assert(marked_bytes_array != NULL, "uninitialized");
3258 
3259     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3260     task_card_bm->clear();
3261   }
3262 }
3263 
3264 void ConcurrentMark::print_stats() {
3265   if (verbose_stats()) {
3266     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3267     for (size_t i = 0; i < _active_tasks; ++i) {
3268       _tasks[i]->print_stats();
3269       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3270     }
3271   }
3272 }
3273 
3274 // abandon current marking iteration due to a Full GC
3275 void ConcurrentMark::abort() {
3276   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3277   // concurrent bitmap clearing.
3278   _nextMarkBitMap->clearAll();
3279 
3280   // Note we cannot clear the previous marking bitmap here
3281   // since VerifyDuringGC verifies the objects marked during
3282   // a full GC against the previous bitmap.
3283 
3284   // Clear the liveness counting data
3285   clear_all_count_data();
3286   // Empty mark stack
3287   reset_marking_state();
3288   for (uint i = 0; i < _max_worker_id; ++i) {
3289     _tasks[i]->clear_region_fields();
3290   }
3291   _first_overflow_barrier_sync.abort();
3292   _second_overflow_barrier_sync.abort();
3293   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3294   if (!gc_id.is_undefined()) {
3295     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3296     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3297     _aborted_gc_id = gc_id;
3298    }
3299   _has_aborted = true;
3300 
3301   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3302   satb_mq_set.abandon_partial_marking();
3303   // This can be called either during or outside marking, we'll read
3304   // the expected_active value from the SATB queue set.
3305   satb_mq_set.set_active_all_threads(
3306                                  false, /* new active value */
3307                                  satb_mq_set.is_active() /* expected_active */);
3308 
3309   _g1h->trace_heap_after_concurrent_cycle();
3310   _g1h->register_concurrent_cycle_end();
3311 }
3312 
3313 const GCId& ConcurrentMark::concurrent_gc_id() {
3314   if (has_aborted()) {
3315     return _aborted_gc_id;
3316   }
3317   return _g1h->gc_tracer_cm()->gc_id();
3318 }
3319 
3320 static void print_ms_time_info(const char* prefix, const char* name,
3321                                NumberSeq& ns) {
3322   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3323                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3324   if (ns.num() > 0) {
3325     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3326                            prefix, ns.sd(), ns.maximum());
3327   }
3328 }
3329 
3330 void ConcurrentMark::print_summary_info() {
3331   gclog_or_tty->print_cr(" Concurrent marking:");
3332   print_ms_time_info("  ", "init marks", _init_times);
3333   print_ms_time_info("  ", "remarks", _remark_times);
3334   {
3335     print_ms_time_info("     ", "final marks", _remark_mark_times);
3336     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3337 
3338   }
3339   print_ms_time_info("  ", "cleanups", _cleanup_times);
3340   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3341                          _total_counting_time,
3342                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3343                           (double)_cleanup_times.num()
3344                          : 0.0));
3345   if (G1ScrubRemSets) {
3346     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3347                            _total_rs_scrub_time,
3348                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3349                             (double)_cleanup_times.num()
3350                            : 0.0));
3351   }
3352   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3353                          (_init_times.sum() + _remark_times.sum() +
3354                           _cleanup_times.sum())/1000.0);
3355   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3356                 "(%8.2f s marking).",
3357                 cmThread()->vtime_accum(),
3358                 cmThread()->vtime_mark_accum());
3359 }
3360 
3361 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3362   _parallel_workers->print_worker_threads_on(st);
3363 }
3364 
3365 void ConcurrentMark::print_on_error(outputStream* st) const {
3366   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3367       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3368   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3369   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3370 }
3371 
3372 // We take a break if someone is trying to stop the world.
3373 bool ConcurrentMark::do_yield_check(uint worker_id) {
3374   if (SuspendibleThreadSet::should_yield()) {
3375     if (worker_id == 0) {
3376       _g1h->g1_policy()->record_concurrent_pause();
3377     }
3378     SuspendibleThreadSet::yield();
3379     return true;
3380   } else {
3381     return false;
3382   }
3383 }
3384 
3385 #ifndef PRODUCT
3386 // for debugging purposes
3387 void ConcurrentMark::print_finger() {
3388   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3389                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3390   for (uint i = 0; i < _max_worker_id; ++i) {
3391     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3392   }
3393   gclog_or_tty->cr();
3394 }
3395 #endif
3396 
3397 void CMTask::scan_object(oop obj) {
3398   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3399 
3400   if (_cm->verbose_high()) {
3401     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3402                            _worker_id, p2i((void*) obj));
3403   }
3404 
3405   size_t obj_size = obj->size();
3406   _words_scanned += obj_size;
3407 
3408   obj->oop_iterate(_cm_oop_closure);
3409   statsOnly( ++_objs_scanned );
3410   check_limits();
3411 }
3412 
3413 // Closure for iteration over bitmaps
3414 class CMBitMapClosure : public BitMapClosure {
3415 private:
3416   // the bitmap that is being iterated over
3417   CMBitMap*                   _nextMarkBitMap;
3418   ConcurrentMark*             _cm;
3419   CMTask*                     _task;
3420 
3421 public:
3422   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3423     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3424 
3425   bool do_bit(size_t offset) {
3426     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3427     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3428     assert( addr < _cm->finger(), "invariant");
3429 
3430     statsOnly( _task->increase_objs_found_on_bitmap() );
3431     assert(addr >= _task->finger(), "invariant");
3432 
3433     // We move that task's local finger along.
3434     _task->move_finger_to(addr);
3435 
3436     _task->scan_object(oop(addr));
3437     // we only partially drain the local queue and global stack
3438     _task->drain_local_queue(true);
3439     _task->drain_global_stack(true);
3440 
3441     // if the has_aborted flag has been raised, we need to bail out of
3442     // the iteration
3443     return !_task->has_aborted();
3444   }
3445 };
3446 
3447 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3448                                ConcurrentMark* cm,
3449                                CMTask* task)
3450   : _g1h(g1h), _cm(cm), _task(task) {
3451   assert(_ref_processor == NULL, "should be initialized to NULL");
3452 
3453   if (G1UseConcMarkReferenceProcessing) {
3454     _ref_processor = g1h->ref_processor_cm();
3455     assert(_ref_processor != NULL, "should not be NULL");
3456   }
3457 }
3458 
3459 void CMTask::setup_for_region(HeapRegion* hr) {
3460   assert(hr != NULL,
3461         "claim_region() should have filtered out NULL regions");
3462   assert(!hr->is_continues_humongous(),
3463         "claim_region() should have filtered out continues humongous regions");
3464 
3465   if (_cm->verbose_low()) {
3466     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3467                            _worker_id, p2i(hr));
3468   }
3469 
3470   _curr_region  = hr;
3471   _finger       = hr->bottom();
3472   update_region_limit();
3473 }
3474 
3475 void CMTask::update_region_limit() {
3476   HeapRegion* hr            = _curr_region;
3477   HeapWord* bottom          = hr->bottom();
3478   HeapWord* limit           = hr->next_top_at_mark_start();
3479 
3480   if (limit == bottom) {
3481     if (_cm->verbose_low()) {
3482       gclog_or_tty->print_cr("[%u] found an empty region "
3483                              "["PTR_FORMAT", "PTR_FORMAT")",
3484                              _worker_id, p2i(bottom), p2i(limit));
3485     }
3486     // The region was collected underneath our feet.
3487     // We set the finger to bottom to ensure that the bitmap
3488     // iteration that will follow this will not do anything.
3489     // (this is not a condition that holds when we set the region up,
3490     // as the region is not supposed to be empty in the first place)
3491     _finger = bottom;
3492   } else if (limit >= _region_limit) {
3493     assert(limit >= _finger, "peace of mind");
3494   } else {
3495     assert(limit < _region_limit, "only way to get here");
3496     // This can happen under some pretty unusual circumstances.  An
3497     // evacuation pause empties the region underneath our feet (NTAMS
3498     // at bottom). We then do some allocation in the region (NTAMS
3499     // stays at bottom), followed by the region being used as a GC
3500     // alloc region (NTAMS will move to top() and the objects
3501     // originally below it will be grayed). All objects now marked in
3502     // the region are explicitly grayed, if below the global finger,
3503     // and we do not need in fact to scan anything else. So, we simply
3504     // set _finger to be limit to ensure that the bitmap iteration
3505     // doesn't do anything.
3506     _finger = limit;
3507   }
3508 
3509   _region_limit = limit;
3510 }
3511 
3512 void CMTask::giveup_current_region() {
3513   assert(_curr_region != NULL, "invariant");
3514   if (_cm->verbose_low()) {
3515     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3516                            _worker_id, p2i(_curr_region));
3517   }
3518   clear_region_fields();
3519 }
3520 
3521 void CMTask::clear_region_fields() {
3522   // Values for these three fields that indicate that we're not
3523   // holding on to a region.
3524   _curr_region   = NULL;
3525   _finger        = NULL;
3526   _region_limit  = NULL;
3527 }
3528 
3529 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3530   if (cm_oop_closure == NULL) {
3531     assert(_cm_oop_closure != NULL, "invariant");
3532   } else {
3533     assert(_cm_oop_closure == NULL, "invariant");
3534   }
3535   _cm_oop_closure = cm_oop_closure;
3536 }
3537 
3538 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3539   guarantee(nextMarkBitMap != NULL, "invariant");
3540 
3541   if (_cm->verbose_low()) {
3542     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3543   }
3544 
3545   _nextMarkBitMap                = nextMarkBitMap;
3546   clear_region_fields();
3547 
3548   _calls                         = 0;
3549   _elapsed_time_ms               = 0.0;
3550   _termination_time_ms           = 0.0;
3551   _termination_start_time_ms     = 0.0;
3552 
3553 #if _MARKING_STATS_
3554   _aborted                       = 0;
3555   _aborted_overflow              = 0;
3556   _aborted_cm_aborted            = 0;
3557   _aborted_yield                 = 0;
3558   _aborted_timed_out             = 0;
3559   _aborted_satb                  = 0;
3560   _aborted_termination           = 0;
3561   _steal_attempts                = 0;
3562   _steals                        = 0;
3563   _local_pushes                  = 0;
3564   _local_pops                    = 0;
3565   _local_max_size                = 0;
3566   _objs_scanned                  = 0;
3567   _global_pushes                 = 0;
3568   _global_pops                   = 0;
3569   _global_max_size               = 0;
3570   _global_transfers_to           = 0;
3571   _global_transfers_from         = 0;
3572   _regions_claimed               = 0;
3573   _objs_found_on_bitmap          = 0;
3574   _satb_buffers_processed        = 0;
3575 #endif // _MARKING_STATS_
3576 }
3577 
3578 bool CMTask::should_exit_termination() {
3579   regular_clock_call();
3580   // This is called when we are in the termination protocol. We should
3581   // quit if, for some reason, this task wants to abort or the global
3582   // stack is not empty (this means that we can get work from it).
3583   return !_cm->mark_stack_empty() || has_aborted();
3584 }
3585 
3586 void CMTask::reached_limit() {
3587   assert(_words_scanned >= _words_scanned_limit ||
3588          _refs_reached >= _refs_reached_limit ,
3589          "shouldn't have been called otherwise");
3590   regular_clock_call();
3591 }
3592 
3593 void CMTask::regular_clock_call() {
3594   if (has_aborted()) return;
3595 
3596   // First, we need to recalculate the words scanned and refs reached
3597   // limits for the next clock call.
3598   recalculate_limits();
3599 
3600   // During the regular clock call we do the following
3601 
3602   // (1) If an overflow has been flagged, then we abort.
3603   if (_cm->has_overflown()) {
3604     set_has_aborted();
3605     return;
3606   }
3607 
3608   // If we are not concurrent (i.e. we're doing remark) we don't need
3609   // to check anything else. The other steps are only needed during
3610   // the concurrent marking phase.
3611   if (!concurrent()) return;
3612 
3613   // (2) If marking has been aborted for Full GC, then we also abort.
3614   if (_cm->has_aborted()) {
3615     set_has_aborted();
3616     statsOnly( ++_aborted_cm_aborted );
3617     return;
3618   }
3619 
3620   double curr_time_ms = os::elapsedVTime() * 1000.0;
3621 
3622   // (3) If marking stats are enabled, then we update the step history.
3623 #if _MARKING_STATS_
3624   if (_words_scanned >= _words_scanned_limit) {
3625     ++_clock_due_to_scanning;
3626   }
3627   if (_refs_reached >= _refs_reached_limit) {
3628     ++_clock_due_to_marking;
3629   }
3630 
3631   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3632   _interval_start_time_ms = curr_time_ms;
3633   _all_clock_intervals_ms.add(last_interval_ms);
3634 
3635   if (_cm->verbose_medium()) {
3636       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3637                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3638                         _worker_id, last_interval_ms,
3639                         _words_scanned,
3640                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3641                         _refs_reached,
3642                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3643   }
3644 #endif // _MARKING_STATS_
3645 
3646   // (4) We check whether we should yield. If we have to, then we abort.
3647   if (SuspendibleThreadSet::should_yield()) {
3648     // We should yield. To do this we abort the task. The caller is
3649     // responsible for yielding.
3650     set_has_aborted();
3651     statsOnly( ++_aborted_yield );
3652     return;
3653   }
3654 
3655   // (5) We check whether we've reached our time quota. If we have,
3656   // then we abort.
3657   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3658   if (elapsed_time_ms > _time_target_ms) {
3659     set_has_aborted();
3660     _has_timed_out = true;
3661     statsOnly( ++_aborted_timed_out );
3662     return;
3663   }
3664 
3665   // (6) Finally, we check whether there are enough completed STAB
3666   // buffers available for processing. If there are, we abort.
3667   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3668   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3669     if (_cm->verbose_low()) {
3670       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3671                              _worker_id);
3672     }
3673     // we do need to process SATB buffers, we'll abort and restart
3674     // the marking task to do so
3675     set_has_aborted();
3676     statsOnly( ++_aborted_satb );
3677     return;
3678   }
3679 }
3680 
3681 void CMTask::recalculate_limits() {
3682   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3683   _words_scanned_limit      = _real_words_scanned_limit;
3684 
3685   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3686   _refs_reached_limit       = _real_refs_reached_limit;
3687 }
3688 
3689 void CMTask::decrease_limits() {
3690   // This is called when we believe that we're going to do an infrequent
3691   // operation which will increase the per byte scanned cost (i.e. move
3692   // entries to/from the global stack). It basically tries to decrease the
3693   // scanning limit so that the clock is called earlier.
3694 
3695   if (_cm->verbose_medium()) {
3696     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3697   }
3698 
3699   _words_scanned_limit = _real_words_scanned_limit -
3700     3 * words_scanned_period / 4;
3701   _refs_reached_limit  = _real_refs_reached_limit -
3702     3 * refs_reached_period / 4;
3703 }
3704 
3705 void CMTask::move_entries_to_global_stack() {
3706   // local array where we'll store the entries that will be popped
3707   // from the local queue
3708   oop buffer[global_stack_transfer_size];
3709 
3710   int n = 0;
3711   oop obj;
3712   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3713     buffer[n] = obj;
3714     ++n;
3715   }
3716 
3717   if (n > 0) {
3718     // we popped at least one entry from the local queue
3719 
3720     statsOnly( ++_global_transfers_to; _local_pops += n );
3721 
3722     if (!_cm->mark_stack_push(buffer, n)) {
3723       if (_cm->verbose_low()) {
3724         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3725                                _worker_id);
3726       }
3727       set_has_aborted();
3728     } else {
3729       // the transfer was successful
3730 
3731       if (_cm->verbose_medium()) {
3732         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3733                                _worker_id, n);
3734       }
3735       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3736                  if (tmp_size > _global_max_size) {
3737                    _global_max_size = tmp_size;
3738                  }
3739                  _global_pushes += n );
3740     }
3741   }
3742 
3743   // this operation was quite expensive, so decrease the limits
3744   decrease_limits();
3745 }
3746 
3747 void CMTask::get_entries_from_global_stack() {
3748   // local array where we'll store the entries that will be popped
3749   // from the global stack.
3750   oop buffer[global_stack_transfer_size];
3751   int n;
3752   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3753   assert(n <= global_stack_transfer_size,
3754          "we should not pop more than the given limit");
3755   if (n > 0) {
3756     // yes, we did actually pop at least one entry
3757 
3758     statsOnly( ++_global_transfers_from; _global_pops += n );
3759     if (_cm->verbose_medium()) {
3760       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3761                              _worker_id, n);
3762     }
3763     for (int i = 0; i < n; ++i) {
3764       bool success = _task_queue->push(buffer[i]);
3765       // We only call this when the local queue is empty or under a
3766       // given target limit. So, we do not expect this push to fail.
3767       assert(success, "invariant");
3768     }
3769 
3770     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3771                if (tmp_size > _local_max_size) {
3772                  _local_max_size = tmp_size;
3773                }
3774                _local_pushes += n );
3775   }
3776 
3777   // this operation was quite expensive, so decrease the limits
3778   decrease_limits();
3779 }
3780 
3781 void CMTask::drain_local_queue(bool partially) {
3782   if (has_aborted()) return;
3783 
3784   // Decide what the target size is, depending whether we're going to
3785   // drain it partially (so that other tasks can steal if they run out
3786   // of things to do) or totally (at the very end).
3787   size_t target_size;
3788   if (partially) {
3789     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3790   } else {
3791     target_size = 0;
3792   }
3793 
3794   if (_task_queue->size() > target_size) {
3795     if (_cm->verbose_high()) {
3796       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3797                              _worker_id, target_size);
3798     }
3799 
3800     oop obj;
3801     bool ret = _task_queue->pop_local(obj);
3802     while (ret) {
3803       statsOnly( ++_local_pops );
3804 
3805       if (_cm->verbose_high()) {
3806         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3807                                p2i((void*) obj));
3808       }
3809 
3810       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3811       assert(!_g1h->is_on_master_free_list(
3812                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3813 
3814       scan_object(obj);
3815 
3816       if (_task_queue->size() <= target_size || has_aborted()) {
3817         ret = false;
3818       } else {
3819         ret = _task_queue->pop_local(obj);
3820       }
3821     }
3822 
3823     if (_cm->verbose_high()) {
3824       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3825                              _worker_id, _task_queue->size());
3826     }
3827   }
3828 }
3829 
3830 void CMTask::drain_global_stack(bool partially) {
3831   if (has_aborted()) return;
3832 
3833   // We have a policy to drain the local queue before we attempt to
3834   // drain the global stack.
3835   assert(partially || _task_queue->size() == 0, "invariant");
3836 
3837   // Decide what the target size is, depending whether we're going to
3838   // drain it partially (so that other tasks can steal if they run out
3839   // of things to do) or totally (at the very end).  Notice that,
3840   // because we move entries from the global stack in chunks or
3841   // because another task might be doing the same, we might in fact
3842   // drop below the target. But, this is not a problem.
3843   size_t target_size;
3844   if (partially) {
3845     target_size = _cm->partial_mark_stack_size_target();
3846   } else {
3847     target_size = 0;
3848   }
3849 
3850   if (_cm->mark_stack_size() > target_size) {
3851     if (_cm->verbose_low()) {
3852       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3853                              _worker_id, target_size);
3854     }
3855 
3856     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3857       get_entries_from_global_stack();
3858       drain_local_queue(partially);
3859     }
3860 
3861     if (_cm->verbose_low()) {
3862       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3863                              _worker_id, _cm->mark_stack_size());
3864     }
3865   }
3866 }
3867 
3868 // SATB Queue has several assumptions on whether to call the par or
3869 // non-par versions of the methods. this is why some of the code is
3870 // replicated. We should really get rid of the single-threaded version
3871 // of the code to simplify things.
3872 void CMTask::drain_satb_buffers() {
3873   if (has_aborted()) return;
3874 
3875   // We set this so that the regular clock knows that we're in the
3876   // middle of draining buffers and doesn't set the abort flag when it
3877   // notices that SATB buffers are available for draining. It'd be
3878   // very counter productive if it did that. :-)
3879   _draining_satb_buffers = true;
3880 
3881   CMObjectClosure oc(this);
3882   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3883   satb_mq_set.set_closure(_worker_id, &oc);
3884 
3885   // This keeps claiming and applying the closure to completed buffers
3886   // until we run out of buffers or we need to abort.
3887   while (!has_aborted() &&
3888          satb_mq_set.apply_closure_to_completed_buffer(_worker_id)) {
3889     if (_cm->verbose_medium()) {
3890       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3891     }
3892     statsOnly( ++_satb_buffers_processed );
3893     regular_clock_call();
3894   }
3895 
3896   _draining_satb_buffers = false;
3897 
3898   assert(has_aborted() ||
3899          concurrent() ||
3900          satb_mq_set.completed_buffers_num() == 0, "invariant");
3901 
3902   satb_mq_set.set_closure(_worker_id, NULL);
3903 
3904   // again, this was a potentially expensive operation, decrease the
3905   // limits to get the regular clock call early
3906   decrease_limits();
3907 }
3908 
3909 void CMTask::print_stats() {
3910   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3911                          _worker_id, _calls);
3912   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3913                          _elapsed_time_ms, _termination_time_ms);
3914   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3915                          _step_times_ms.num(), _step_times_ms.avg(),
3916                          _step_times_ms.sd());
3917   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3918                          _step_times_ms.maximum(), _step_times_ms.sum());
3919 
3920 #if _MARKING_STATS_
3921   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3922                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3923                          _all_clock_intervals_ms.sd());
3924   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3925                          _all_clock_intervals_ms.maximum(),
3926                          _all_clock_intervals_ms.sum());
3927   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3928                          _clock_due_to_scanning, _clock_due_to_marking);
3929   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3930                          _objs_scanned, _objs_found_on_bitmap);
3931   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3932                          _local_pushes, _local_pops, _local_max_size);
3933   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3934                          _global_pushes, _global_pops, _global_max_size);
3935   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3936                          _global_transfers_to,_global_transfers_from);
3937   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3938   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3939   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3940                          _steal_attempts, _steals);
3941   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3942   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3943                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3944   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3945                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3946 #endif // _MARKING_STATS_
3947 }
3948 
3949 /*****************************************************************************
3950 
3951     The do_marking_step(time_target_ms, ...) method is the building
3952     block of the parallel marking framework. It can be called in parallel
3953     with other invocations of do_marking_step() on different tasks
3954     (but only one per task, obviously) and concurrently with the
3955     mutator threads, or during remark, hence it eliminates the need
3956     for two versions of the code. When called during remark, it will
3957     pick up from where the task left off during the concurrent marking
3958     phase. Interestingly, tasks are also claimable during evacuation
3959     pauses too, since do_marking_step() ensures that it aborts before
3960     it needs to yield.
3961 
3962     The data structures that it uses to do marking work are the
3963     following:
3964 
3965       (1) Marking Bitmap. If there are gray objects that appear only
3966       on the bitmap (this happens either when dealing with an overflow
3967       or when the initial marking phase has simply marked the roots
3968       and didn't push them on the stack), then tasks claim heap
3969       regions whose bitmap they then scan to find gray objects. A
3970       global finger indicates where the end of the last claimed region
3971       is. A local finger indicates how far into the region a task has
3972       scanned. The two fingers are used to determine how to gray an
3973       object (i.e. whether simply marking it is OK, as it will be
3974       visited by a task in the future, or whether it needs to be also
3975       pushed on a stack).
3976 
3977       (2) Local Queue. The local queue of the task which is accessed
3978       reasonably efficiently by the task. Other tasks can steal from
3979       it when they run out of work. Throughout the marking phase, a
3980       task attempts to keep its local queue short but not totally
3981       empty, so that entries are available for stealing by other
3982       tasks. Only when there is no more work, a task will totally
3983       drain its local queue.
3984 
3985       (3) Global Mark Stack. This handles local queue overflow. During
3986       marking only sets of entries are moved between it and the local
3987       queues, as access to it requires a mutex and more fine-grain
3988       interaction with it which might cause contention. If it
3989       overflows, then the marking phase should restart and iterate
3990       over the bitmap to identify gray objects. Throughout the marking
3991       phase, tasks attempt to keep the global mark stack at a small
3992       length but not totally empty, so that entries are available for
3993       popping by other tasks. Only when there is no more work, tasks
3994       will totally drain the global mark stack.
3995 
3996       (4) SATB Buffer Queue. This is where completed SATB buffers are
3997       made available. Buffers are regularly removed from this queue
3998       and scanned for roots, so that the queue doesn't get too
3999       long. During remark, all completed buffers are processed, as
4000       well as the filled in parts of any uncompleted buffers.
4001 
4002     The do_marking_step() method tries to abort when the time target
4003     has been reached. There are a few other cases when the
4004     do_marking_step() method also aborts:
4005 
4006       (1) When the marking phase has been aborted (after a Full GC).
4007 
4008       (2) When a global overflow (on the global stack) has been
4009       triggered. Before the task aborts, it will actually sync up with
4010       the other tasks to ensure that all the marking data structures
4011       (local queues, stacks, fingers etc.)  are re-initialized so that
4012       when do_marking_step() completes, the marking phase can
4013       immediately restart.
4014 
4015       (3) When enough completed SATB buffers are available. The
4016       do_marking_step() method only tries to drain SATB buffers right
4017       at the beginning. So, if enough buffers are available, the
4018       marking step aborts and the SATB buffers are processed at
4019       the beginning of the next invocation.
4020 
4021       (4) To yield. when we have to yield then we abort and yield
4022       right at the end of do_marking_step(). This saves us from a lot
4023       of hassle as, by yielding we might allow a Full GC. If this
4024       happens then objects will be compacted underneath our feet, the
4025       heap might shrink, etc. We save checking for this by just
4026       aborting and doing the yield right at the end.
4027 
4028     From the above it follows that the do_marking_step() method should
4029     be called in a loop (or, otherwise, regularly) until it completes.
4030 
4031     If a marking step completes without its has_aborted() flag being
4032     true, it means it has completed the current marking phase (and
4033     also all other marking tasks have done so and have all synced up).
4034 
4035     A method called regular_clock_call() is invoked "regularly" (in
4036     sub ms intervals) throughout marking. It is this clock method that
4037     checks all the abort conditions which were mentioned above and
4038     decides when the task should abort. A work-based scheme is used to
4039     trigger this clock method: when the number of object words the
4040     marking phase has scanned or the number of references the marking
4041     phase has visited reach a given limit. Additional invocations to
4042     the method clock have been planted in a few other strategic places
4043     too. The initial reason for the clock method was to avoid calling
4044     vtime too regularly, as it is quite expensive. So, once it was in
4045     place, it was natural to piggy-back all the other conditions on it
4046     too and not constantly check them throughout the code.
4047 
4048     If do_termination is true then do_marking_step will enter its
4049     termination protocol.
4050 
4051     The value of is_serial must be true when do_marking_step is being
4052     called serially (i.e. by the VMThread) and do_marking_step should
4053     skip any synchronization in the termination and overflow code.
4054     Examples include the serial remark code and the serial reference
4055     processing closures.
4056 
4057     The value of is_serial must be false when do_marking_step is
4058     being called by any of the worker threads in a work gang.
4059     Examples include the concurrent marking code (CMMarkingTask),
4060     the MT remark code, and the MT reference processing closures.
4061 
4062  *****************************************************************************/
4063 
4064 void CMTask::do_marking_step(double time_target_ms,
4065                              bool do_termination,
4066                              bool is_serial) {
4067   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4068   assert(concurrent() == _cm->concurrent(), "they should be the same");
4069 
4070   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4071   assert(_task_queues != NULL, "invariant");
4072   assert(_task_queue != NULL, "invariant");
4073   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4074 
4075   assert(!_claimed,
4076          "only one thread should claim this task at any one time");
4077 
4078   // OK, this doesn't safeguard again all possible scenarios, as it is
4079   // possible for two threads to set the _claimed flag at the same
4080   // time. But it is only for debugging purposes anyway and it will
4081   // catch most problems.
4082   _claimed = true;
4083 
4084   _start_time_ms = os::elapsedVTime() * 1000.0;
4085   statsOnly( _interval_start_time_ms = _start_time_ms );
4086 
4087   // If do_stealing is true then do_marking_step will attempt to
4088   // steal work from the other CMTasks. It only makes sense to
4089   // enable stealing when the termination protocol is enabled
4090   // and do_marking_step() is not being called serially.
4091   bool do_stealing = do_termination && !is_serial;
4092 
4093   double diff_prediction_ms =
4094     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4095   _time_target_ms = time_target_ms - diff_prediction_ms;
4096 
4097   // set up the variables that are used in the work-based scheme to
4098   // call the regular clock method
4099   _words_scanned = 0;
4100   _refs_reached  = 0;
4101   recalculate_limits();
4102 
4103   // clear all flags
4104   clear_has_aborted();
4105   _has_timed_out = false;
4106   _draining_satb_buffers = false;
4107 
4108   ++_calls;
4109 
4110   if (_cm->verbose_low()) {
4111     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4112                            "target = %1.2lfms >>>>>>>>>>",
4113                            _worker_id, _calls, _time_target_ms);
4114   }
4115 
4116   // Set up the bitmap and oop closures. Anything that uses them is
4117   // eventually called from this method, so it is OK to allocate these
4118   // statically.
4119   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4120   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4121   set_cm_oop_closure(&cm_oop_closure);
4122 
4123   if (_cm->has_overflown()) {
4124     // This can happen if the mark stack overflows during a GC pause
4125     // and this task, after a yield point, restarts. We have to abort
4126     // as we need to get into the overflow protocol which happens
4127     // right at the end of this task.
4128     set_has_aborted();
4129   }
4130 
4131   // First drain any available SATB buffers. After this, we will not
4132   // look at SATB buffers before the next invocation of this method.
4133   // If enough completed SATB buffers are queued up, the regular clock
4134   // will abort this task so that it restarts.
4135   drain_satb_buffers();
4136   // ...then partially drain the local queue and the global stack
4137   drain_local_queue(true);
4138   drain_global_stack(true);
4139 
4140   do {
4141     if (!has_aborted() && _curr_region != NULL) {
4142       // This means that we're already holding on to a region.
4143       assert(_finger != NULL, "if region is not NULL, then the finger "
4144              "should not be NULL either");
4145 
4146       // We might have restarted this task after an evacuation pause
4147       // which might have evacuated the region we're holding on to
4148       // underneath our feet. Let's read its limit again to make sure
4149       // that we do not iterate over a region of the heap that
4150       // contains garbage (update_region_limit() will also move
4151       // _finger to the start of the region if it is found empty).
4152       update_region_limit();
4153       // We will start from _finger not from the start of the region,
4154       // as we might be restarting this task after aborting half-way
4155       // through scanning this region. In this case, _finger points to
4156       // the address where we last found a marked object. If this is a
4157       // fresh region, _finger points to start().
4158       MemRegion mr = MemRegion(_finger, _region_limit);
4159 
4160       if (_cm->verbose_low()) {
4161         gclog_or_tty->print_cr("[%u] we're scanning part "
4162                                "["PTR_FORMAT", "PTR_FORMAT") "
4163                                "of region "HR_FORMAT,
4164                                _worker_id, p2i(_finger), p2i(_region_limit),
4165                                HR_FORMAT_PARAMS(_curr_region));
4166       }
4167 
4168       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
4169              "humongous regions should go around loop once only");
4170 
4171       // Some special cases:
4172       // If the memory region is empty, we can just give up the region.
4173       // If the current region is humongous then we only need to check
4174       // the bitmap for the bit associated with the start of the object,
4175       // scan the object if it's live, and give up the region.
4176       // Otherwise, let's iterate over the bitmap of the part of the region
4177       // that is left.
4178       // If the iteration is successful, give up the region.
4179       if (mr.is_empty()) {
4180         giveup_current_region();
4181         regular_clock_call();
4182       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
4183         if (_nextMarkBitMap->isMarked(mr.start())) {
4184           // The object is marked - apply the closure
4185           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4186           bitmap_closure.do_bit(offset);
4187         }
4188         // Even if this task aborted while scanning the humongous object
4189         // we can (and should) give up the current region.
4190         giveup_current_region();
4191         regular_clock_call();
4192       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4193         giveup_current_region();
4194         regular_clock_call();
4195       } else {
4196         assert(has_aborted(), "currently the only way to do so");
4197         // The only way to abort the bitmap iteration is to return
4198         // false from the do_bit() method. However, inside the
4199         // do_bit() method we move the _finger to point to the
4200         // object currently being looked at. So, if we bail out, we
4201         // have definitely set _finger to something non-null.
4202         assert(_finger != NULL, "invariant");
4203 
4204         // Region iteration was actually aborted. So now _finger
4205         // points to the address of the object we last scanned. If we
4206         // leave it there, when we restart this task, we will rescan
4207         // the object. It is easy to avoid this. We move the finger by
4208         // enough to point to the next possible object header (the
4209         // bitmap knows by how much we need to move it as it knows its
4210         // granularity).
4211         assert(_finger < _region_limit, "invariant");
4212         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4213         // Check if bitmap iteration was aborted while scanning the last object
4214         if (new_finger >= _region_limit) {
4215           giveup_current_region();
4216         } else {
4217           move_finger_to(new_finger);
4218         }
4219       }
4220     }
4221     // At this point we have either completed iterating over the
4222     // region we were holding on to, or we have aborted.
4223 
4224     // We then partially drain the local queue and the global stack.
4225     // (Do we really need this?)
4226     drain_local_queue(true);
4227     drain_global_stack(true);
4228 
4229     // Read the note on the claim_region() method on why it might
4230     // return NULL with potentially more regions available for
4231     // claiming and why we have to check out_of_regions() to determine
4232     // whether we're done or not.
4233     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4234       // We are going to try to claim a new region. We should have
4235       // given up on the previous one.
4236       // Separated the asserts so that we know which one fires.
4237       assert(_curr_region  == NULL, "invariant");
4238       assert(_finger       == NULL, "invariant");
4239       assert(_region_limit == NULL, "invariant");
4240       if (_cm->verbose_low()) {
4241         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4242       }
4243       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4244       if (claimed_region != NULL) {
4245         // Yes, we managed to claim one
4246         statsOnly( ++_regions_claimed );
4247 
4248         if (_cm->verbose_low()) {
4249           gclog_or_tty->print_cr("[%u] we successfully claimed "
4250                                  "region "PTR_FORMAT,
4251                                  _worker_id, p2i(claimed_region));
4252         }
4253 
4254         setup_for_region(claimed_region);
4255         assert(_curr_region == claimed_region, "invariant");
4256       }
4257       // It is important to call the regular clock here. It might take
4258       // a while to claim a region if, for example, we hit a large
4259       // block of empty regions. So we need to call the regular clock
4260       // method once round the loop to make sure it's called
4261       // frequently enough.
4262       regular_clock_call();
4263     }
4264 
4265     if (!has_aborted() && _curr_region == NULL) {
4266       assert(_cm->out_of_regions(),
4267              "at this point we should be out of regions");
4268     }
4269   } while ( _curr_region != NULL && !has_aborted());
4270 
4271   if (!has_aborted()) {
4272     // We cannot check whether the global stack is empty, since other
4273     // tasks might be pushing objects to it concurrently.
4274     assert(_cm->out_of_regions(),
4275            "at this point we should be out of regions");
4276 
4277     if (_cm->verbose_low()) {
4278       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4279     }
4280 
4281     // Try to reduce the number of available SATB buffers so that
4282     // remark has less work to do.
4283     drain_satb_buffers();
4284   }
4285 
4286   // Since we've done everything else, we can now totally drain the
4287   // local queue and global stack.
4288   drain_local_queue(false);
4289   drain_global_stack(false);
4290 
4291   // Attempt at work stealing from other task's queues.
4292   if (do_stealing && !has_aborted()) {
4293     // We have not aborted. This means that we have finished all that
4294     // we could. Let's try to do some stealing...
4295 
4296     // We cannot check whether the global stack is empty, since other
4297     // tasks might be pushing objects to it concurrently.
4298     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4299            "only way to reach here");
4300 
4301     if (_cm->verbose_low()) {
4302       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4303     }
4304 
4305     while (!has_aborted()) {
4306       oop obj;
4307       statsOnly( ++_steal_attempts );
4308 
4309       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4310         if (_cm->verbose_medium()) {
4311           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4312                                  _worker_id, p2i((void*) obj));
4313         }
4314 
4315         statsOnly( ++_steals );
4316 
4317         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4318                "any stolen object should be marked");
4319         scan_object(obj);
4320 
4321         // And since we're towards the end, let's totally drain the
4322         // local queue and global stack.
4323         drain_local_queue(false);
4324         drain_global_stack(false);
4325       } else {
4326         break;
4327       }
4328     }
4329   }
4330 
4331   // If we are about to wrap up and go into termination, check if we
4332   // should raise the overflow flag.
4333   if (do_termination && !has_aborted()) {
4334     if (_cm->force_overflow()->should_force()) {
4335       _cm->set_has_overflown();
4336       regular_clock_call();
4337     }
4338   }
4339 
4340   // We still haven't aborted. Now, let's try to get into the
4341   // termination protocol.
4342   if (do_termination && !has_aborted()) {
4343     // We cannot check whether the global stack is empty, since other
4344     // tasks might be concurrently pushing objects on it.
4345     // Separated the asserts so that we know which one fires.
4346     assert(_cm->out_of_regions(), "only way to reach here");
4347     assert(_task_queue->size() == 0, "only way to reach here");
4348 
4349     if (_cm->verbose_low()) {
4350       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4351     }
4352 
4353     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4354 
4355     // The CMTask class also extends the TerminatorTerminator class,
4356     // hence its should_exit_termination() method will also decide
4357     // whether to exit the termination protocol or not.
4358     bool finished = (is_serial ||
4359                      _cm->terminator()->offer_termination(this));
4360     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4361     _termination_time_ms +=
4362       termination_end_time_ms - _termination_start_time_ms;
4363 
4364     if (finished) {
4365       // We're all done.
4366 
4367       if (_worker_id == 0) {
4368         // let's allow task 0 to do this
4369         if (concurrent()) {
4370           assert(_cm->concurrent_marking_in_progress(), "invariant");
4371           // we need to set this to false before the next
4372           // safepoint. This way we ensure that the marking phase
4373           // doesn't observe any more heap expansions.
4374           _cm->clear_concurrent_marking_in_progress();
4375         }
4376       }
4377 
4378       // We can now guarantee that the global stack is empty, since
4379       // all other tasks have finished. We separated the guarantees so
4380       // that, if a condition is false, we can immediately find out
4381       // which one.
4382       guarantee(_cm->out_of_regions(), "only way to reach here");
4383       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4384       guarantee(_task_queue->size() == 0, "only way to reach here");
4385       guarantee(!_cm->has_overflown(), "only way to reach here");
4386       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4387 
4388       if (_cm->verbose_low()) {
4389         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4390       }
4391     } else {
4392       // Apparently there's more work to do. Let's abort this task. It
4393       // will restart it and we can hopefully find more things to do.
4394 
4395       if (_cm->verbose_low()) {
4396         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4397                                _worker_id);
4398       }
4399 
4400       set_has_aborted();
4401       statsOnly( ++_aborted_termination );
4402     }
4403   }
4404 
4405   // Mainly for debugging purposes to make sure that a pointer to the
4406   // closure which was statically allocated in this frame doesn't
4407   // escape it by accident.
4408   set_cm_oop_closure(NULL);
4409   double end_time_ms = os::elapsedVTime() * 1000.0;
4410   double elapsed_time_ms = end_time_ms - _start_time_ms;
4411   // Update the step history.
4412   _step_times_ms.add(elapsed_time_ms);
4413 
4414   if (has_aborted()) {
4415     // The task was aborted for some reason.
4416 
4417     statsOnly( ++_aborted );
4418 
4419     if (_has_timed_out) {
4420       double diff_ms = elapsed_time_ms - _time_target_ms;
4421       // Keep statistics of how well we did with respect to hitting
4422       // our target only if we actually timed out (if we aborted for
4423       // other reasons, then the results might get skewed).
4424       _marking_step_diffs_ms.add(diff_ms);
4425     }
4426 
4427     if (_cm->has_overflown()) {
4428       // This is the interesting one. We aborted because a global
4429       // overflow was raised. This means we have to restart the
4430       // marking phase and start iterating over regions. However, in
4431       // order to do this we have to make sure that all tasks stop
4432       // what they are doing and re-initialize in a safe manner. We
4433       // will achieve this with the use of two barrier sync points.
4434 
4435       if (_cm->verbose_low()) {
4436         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4437       }
4438 
4439       if (!is_serial) {
4440         // We only need to enter the sync barrier if being called
4441         // from a parallel context
4442         _cm->enter_first_sync_barrier(_worker_id);
4443 
4444         // When we exit this sync barrier we know that all tasks have
4445         // stopped doing marking work. So, it's now safe to
4446         // re-initialize our data structures. At the end of this method,
4447         // task 0 will clear the global data structures.
4448       }
4449 
4450       statsOnly( ++_aborted_overflow );
4451 
4452       // We clear the local state of this task...
4453       clear_region_fields();
4454 
4455       if (!is_serial) {
4456         // ...and enter the second barrier.
4457         _cm->enter_second_sync_barrier(_worker_id);
4458       }
4459       // At this point, if we're during the concurrent phase of
4460       // marking, everything has been re-initialized and we're
4461       // ready to restart.
4462     }
4463 
4464     if (_cm->verbose_low()) {
4465       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4466                              "elapsed = %1.2lfms <<<<<<<<<<",
4467                              _worker_id, _time_target_ms, elapsed_time_ms);
4468       if (_cm->has_aborted()) {
4469         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4470                                _worker_id);
4471       }
4472     }
4473   } else {
4474     if (_cm->verbose_low()) {
4475       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4476                              "elapsed = %1.2lfms <<<<<<<<<<",
4477                              _worker_id, _time_target_ms, elapsed_time_ms);
4478     }
4479   }
4480 
4481   _claimed = false;
4482 }
4483 
4484 CMTask::CMTask(uint worker_id,
4485                ConcurrentMark* cm,
4486                size_t* marked_bytes,
4487                BitMap* card_bm,
4488                CMTaskQueue* task_queue,
4489                CMTaskQueueSet* task_queues)
4490   : _g1h(G1CollectedHeap::heap()),
4491     _worker_id(worker_id), _cm(cm),
4492     _claimed(false),
4493     _nextMarkBitMap(NULL), _hash_seed(17),
4494     _task_queue(task_queue),
4495     _task_queues(task_queues),
4496     _cm_oop_closure(NULL),
4497     _marked_bytes_array(marked_bytes),
4498     _card_bm(card_bm) {
4499   guarantee(task_queue != NULL, "invariant");
4500   guarantee(task_queues != NULL, "invariant");
4501 
4502   statsOnly( _clock_due_to_scanning = 0;
4503              _clock_due_to_marking  = 0 );
4504 
4505   _marking_step_diffs_ms.add(0.5);
4506 }
4507 
4508 // These are formatting macros that are used below to ensure
4509 // consistent formatting. The *_H_* versions are used to format the
4510 // header for a particular value and they should be kept consistent
4511 // with the corresponding macro. Also note that most of the macros add
4512 // the necessary white space (as a prefix) which makes them a bit
4513 // easier to compose.
4514 
4515 // All the output lines are prefixed with this string to be able to
4516 // identify them easily in a large log file.
4517 #define G1PPRL_LINE_PREFIX            "###"
4518 
4519 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4520 #ifdef _LP64
4521 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4522 #else // _LP64
4523 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4524 #endif // _LP64
4525 
4526 // For per-region info
4527 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4528 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4529 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4530 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4531 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4532 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4533 
4534 // For summary info
4535 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4536 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4537 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4538 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4539 
4540 G1PrintRegionLivenessInfoClosure::
4541 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4542   : _out(out),
4543     _total_used_bytes(0), _total_capacity_bytes(0),
4544     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4545     _hum_used_bytes(0), _hum_capacity_bytes(0),
4546     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4547     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4548   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4549   MemRegion g1_reserved = g1h->g1_reserved();
4550   double now = os::elapsedTime();
4551 
4552   // Print the header of the output.
4553   _out->cr();
4554   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4555   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4556                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4557                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4558                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4559                  HeapRegion::GrainBytes);
4560   _out->print_cr(G1PPRL_LINE_PREFIX);
4561   _out->print_cr(G1PPRL_LINE_PREFIX
4562                 G1PPRL_TYPE_H_FORMAT
4563                 G1PPRL_ADDR_BASE_H_FORMAT
4564                 G1PPRL_BYTE_H_FORMAT
4565                 G1PPRL_BYTE_H_FORMAT
4566                 G1PPRL_BYTE_H_FORMAT
4567                 G1PPRL_DOUBLE_H_FORMAT
4568                 G1PPRL_BYTE_H_FORMAT
4569                 G1PPRL_BYTE_H_FORMAT,
4570                 "type", "address-range",
4571                 "used", "prev-live", "next-live", "gc-eff",
4572                 "remset", "code-roots");
4573   _out->print_cr(G1PPRL_LINE_PREFIX
4574                 G1PPRL_TYPE_H_FORMAT
4575                 G1PPRL_ADDR_BASE_H_FORMAT
4576                 G1PPRL_BYTE_H_FORMAT
4577                 G1PPRL_BYTE_H_FORMAT
4578                 G1PPRL_BYTE_H_FORMAT
4579                 G1PPRL_DOUBLE_H_FORMAT
4580                 G1PPRL_BYTE_H_FORMAT
4581                 G1PPRL_BYTE_H_FORMAT,
4582                 "", "",
4583                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4584                 "(bytes)", "(bytes)");
4585 }
4586 
4587 // It takes as a parameter a reference to one of the _hum_* fields, it
4588 // deduces the corresponding value for a region in a humongous region
4589 // series (either the region size, or what's left if the _hum_* field
4590 // is < the region size), and updates the _hum_* field accordingly.
4591 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4592   size_t bytes = 0;
4593   // The > 0 check is to deal with the prev and next live bytes which
4594   // could be 0.
4595   if (*hum_bytes > 0) {
4596     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4597     *hum_bytes -= bytes;
4598   }
4599   return bytes;
4600 }
4601 
4602 // It deduces the values for a region in a humongous region series
4603 // from the _hum_* fields and updates those accordingly. It assumes
4604 // that that _hum_* fields have already been set up from the "starts
4605 // humongous" region and we visit the regions in address order.
4606 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4607                                                      size_t* capacity_bytes,
4608                                                      size_t* prev_live_bytes,
4609                                                      size_t* next_live_bytes) {
4610   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4611   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4612   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4613   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4614   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4615 }
4616 
4617 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4618   const char* type       = r->get_type_str();
4619   HeapWord* bottom       = r->bottom();
4620   HeapWord* end          = r->end();
4621   size_t capacity_bytes  = r->capacity();
4622   size_t used_bytes      = r->used();
4623   size_t prev_live_bytes = r->live_bytes();
4624   size_t next_live_bytes = r->next_live_bytes();
4625   double gc_eff          = r->gc_efficiency();
4626   size_t remset_bytes    = r->rem_set()->mem_size();
4627   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4628 
4629   if (r->is_starts_humongous()) {
4630     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4631            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4632            "they should have been zeroed after the last time we used them");
4633     // Set up the _hum_* fields.
4634     _hum_capacity_bytes  = capacity_bytes;
4635     _hum_used_bytes      = used_bytes;
4636     _hum_prev_live_bytes = prev_live_bytes;
4637     _hum_next_live_bytes = next_live_bytes;
4638     get_hum_bytes(&used_bytes, &capacity_bytes,
4639                   &prev_live_bytes, &next_live_bytes);
4640     end = bottom + HeapRegion::GrainWords;
4641   } else if (r->is_continues_humongous()) {
4642     get_hum_bytes(&used_bytes, &capacity_bytes,
4643                   &prev_live_bytes, &next_live_bytes);
4644     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4645   }
4646 
4647   _total_used_bytes      += used_bytes;
4648   _total_capacity_bytes  += capacity_bytes;
4649   _total_prev_live_bytes += prev_live_bytes;
4650   _total_next_live_bytes += next_live_bytes;
4651   _total_remset_bytes    += remset_bytes;
4652   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4653 
4654   // Print a line for this particular region.
4655   _out->print_cr(G1PPRL_LINE_PREFIX
4656                  G1PPRL_TYPE_FORMAT
4657                  G1PPRL_ADDR_BASE_FORMAT
4658                  G1PPRL_BYTE_FORMAT
4659                  G1PPRL_BYTE_FORMAT
4660                  G1PPRL_BYTE_FORMAT
4661                  G1PPRL_DOUBLE_FORMAT
4662                  G1PPRL_BYTE_FORMAT
4663                  G1PPRL_BYTE_FORMAT,
4664                  type, p2i(bottom), p2i(end),
4665                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4666                  remset_bytes, strong_code_roots_bytes);
4667 
4668   return false;
4669 }
4670 
4671 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4672   // add static memory usages to remembered set sizes
4673   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4674   // Print the footer of the output.
4675   _out->print_cr(G1PPRL_LINE_PREFIX);
4676   _out->print_cr(G1PPRL_LINE_PREFIX
4677                  " SUMMARY"
4678                  G1PPRL_SUM_MB_FORMAT("capacity")
4679                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4680                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4681                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4682                  G1PPRL_SUM_MB_FORMAT("remset")
4683                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4684                  bytes_to_mb(_total_capacity_bytes),
4685                  bytes_to_mb(_total_used_bytes),
4686                  perc(_total_used_bytes, _total_capacity_bytes),
4687                  bytes_to_mb(_total_prev_live_bytes),
4688                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4689                  bytes_to_mb(_total_next_live_bytes),
4690                  perc(_total_next_live_bytes, _total_capacity_bytes),
4691                  bytes_to_mb(_total_remset_bytes),
4692                  bytes_to_mb(_total_strong_code_roots_bytes));
4693   _out->cr();
4694 }