1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  30 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  33 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  34 #include "gc_implementation/g1/g1Log.hpp"
  35 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  36 #include "gc_implementation/g1/g1RemSet.hpp"
  37 #include "gc_implementation/g1/heapRegion.inline.hpp"
  38 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
  39 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  40 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  41 #include "gc_implementation/shared/vmGCOperations.hpp"
  42 #include "gc_implementation/shared/gcTimer.hpp"
  43 #include "gc_implementation/shared/gcTrace.hpp"
  44 #include "gc_implementation/shared/gcTraceTime.hpp"
  45 #include "memory/allocation.hpp"
  46 #include "memory/genOopClosures.inline.hpp"
  47 #include "memory/referencePolicy.hpp"
  48 #include "memory/resourceArea.hpp"
  49 #include "memory/strongRootsScope.hpp"
  50 #include "oops/oop.inline.hpp"
  51 #include "runtime/handles.inline.hpp"
  52 #include "runtime/java.hpp"
  53 #include "runtime/atomic.inline.hpp"
  54 #include "runtime/prefetch.inline.hpp"
  55 #include "services/memTracker.hpp"
  56 
  57 // Concurrent marking bit map wrapper
  58 
  59 CMBitMapRO::CMBitMapRO(int shifter) :
  60   _bm(),
  61   _shifter(shifter) {
  62   _bmStartWord = 0;
  63   _bmWordSize = 0;
  64 }
  65 
  66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  67                                                const HeapWord* limit) const {
  68   // First we must round addr *up* to a possible object boundary.
  69   addr = (HeapWord*)align_size_up((intptr_t)addr,
  70                                   HeapWordSize << _shifter);
  71   size_t addrOffset = heapWordToOffset(addr);
  72   if (limit == NULL) {
  73     limit = _bmStartWord + _bmWordSize;
  74   }
  75   size_t limitOffset = heapWordToOffset(limit);
  76   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  77   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  78   assert(nextAddr >= addr, "get_next_one postcondition");
  79   assert(nextAddr == limit || isMarked(nextAddr),
  80          "get_next_one postcondition");
  81   return nextAddr;
  82 }
  83 
  84 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  85                                                  const HeapWord* limit) const {
  86   size_t addrOffset = heapWordToOffset(addr);
  87   if (limit == NULL) {
  88     limit = _bmStartWord + _bmWordSize;
  89   }
  90   size_t limitOffset = heapWordToOffset(limit);
  91   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  92   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  93   assert(nextAddr >= addr, "get_next_one postcondition");
  94   assert(nextAddr == limit || !isMarked(nextAddr),
  95          "get_next_one postcondition");
  96   return nextAddr;
  97 }
  98 
  99 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 100   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 101   return (int) (diff >> _shifter);
 102 }
 103 
 104 #ifndef PRODUCT
 105 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 106   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 107   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 108          "size inconsistency");
 109   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 110          _bmWordSize  == heap_rs.word_size();
 111 }
 112 #endif
 113 
 114 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 115   _bm.print_on_error(st, prefix);
 116 }
 117 
 118 size_t CMBitMap::compute_size(size_t heap_size) {
 119   return heap_size / mark_distance();
 120 }
 121 
 122 size_t CMBitMap::mark_distance() {
 123   return MinObjAlignmentInBytes * BitsPerByte;
 124 }
 125 
 126 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 127   _bmStartWord = heap.start();
 128   _bmWordSize = heap.word_size();
 129 
 130   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 131   _bm.set_size(_bmWordSize >> _shifter);
 132 
 133   storage->set_mapping_changed_listener(&_listener);
 134 }
 135 
 136 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 137   if (zero_filled) {
 138     return;
 139   }
 140   // We need to clear the bitmap on commit, removing any existing information.
 141   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 142   _bm->clearRange(mr);
 143 }
 144 
 145 // Closure used for clearing the given mark bitmap.
 146 class ClearBitmapHRClosure : public HeapRegionClosure {
 147  private:
 148   ConcurrentMark* _cm;
 149   CMBitMap* _bitmap;
 150   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 151  public:
 152   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 153     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 154   }
 155 
 156   virtual bool doHeapRegion(HeapRegion* r) {
 157     size_t const chunk_size_in_words = M / HeapWordSize;
 158 
 159     HeapWord* cur = r->bottom();
 160     HeapWord* const end = r->end();
 161 
 162     while (cur < end) {
 163       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 164       _bitmap->clearRange(mr);
 165 
 166       cur += chunk_size_in_words;
 167 
 168       // Abort iteration if after yielding the marking has been aborted.
 169       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 170         return true;
 171       }
 172       // Repeat the asserts from before the start of the closure. We will do them
 173       // as asserts here to minimize their overhead on the product. However, we
 174       // will have them as guarantees at the beginning / end of the bitmap
 175       // clearing to get some checking in the product.
 176       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 177       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 178     }
 179 
 180     return false;
 181   }
 182 };
 183 
 184 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 185   ClearBitmapHRClosure* _cl;
 186   HeapRegionClaimer     _hrclaimer;
 187   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 188 
 189 public:
 190   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 191       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 192 
 193   void work(uint worker_id) {
 194     if (_suspendible) {
 195       SuspendibleThreadSet::join();
 196     }
 197     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 198     if (_suspendible) {
 199       SuspendibleThreadSet::leave();
 200     }
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 #ifdef ASSERT
 250   , _drain_in_progress(false)
 251   , _drain_in_progress_yields(false)
 252 #endif
 253 {}
 254 
 255 bool CMMarkStack::allocate(size_t capacity) {
 256   // allocate a stack of the requisite depth
 257   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 258   if (!rs.is_reserved()) {
 259     warning("ConcurrentMark MarkStack allocation failure");
 260     return false;
 261   }
 262   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 263   if (!_virtual_space.initialize(rs, rs.size())) {
 264     warning("ConcurrentMark MarkStack backing store failure");
 265     // Release the virtual memory reserved for the marking stack
 266     rs.release();
 267     return false;
 268   }
 269   assert(_virtual_space.committed_size() == rs.size(),
 270          "Didn't reserve backing store for all of ConcurrentMark stack?");
 271   _base = (oop*) _virtual_space.low();
 272   setEmpty();
 273   _capacity = (jint) capacity;
 274   _saved_index = -1;
 275   _should_expand = false;
 276   NOT_PRODUCT(_max_depth = 0);
 277   return true;
 278 }
 279 
 280 void CMMarkStack::expand() {
 281   // Called, during remark, if we've overflown the marking stack during marking.
 282   assert(isEmpty(), "stack should been emptied while handling overflow");
 283   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 284   // Clear expansion flag
 285   _should_expand = false;
 286   if (_capacity == (jint) MarkStackSizeMax) {
 287     if (PrintGCDetails && Verbose) {
 288       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 289     }
 290     return;
 291   }
 292   // Double capacity if possible
 293   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 294   // Do not give up existing stack until we have managed to
 295   // get the double capacity that we desired.
 296   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 297                                                            sizeof(oop)));
 298   if (rs.is_reserved()) {
 299     // Release the backing store associated with old stack
 300     _virtual_space.release();
 301     // Reinitialize virtual space for new stack
 302     if (!_virtual_space.initialize(rs, rs.size())) {
 303       fatal("Not enough swap for expanded marking stack capacity");
 304     }
 305     _base = (oop*)(_virtual_space.low());
 306     _index = 0;
 307     _capacity = new_capacity;
 308   } else {
 309     if (PrintGCDetails && Verbose) {
 310       // Failed to double capacity, continue;
 311       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 312                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 313                           _capacity / K, new_capacity / K);
 314     }
 315   }
 316 }
 317 
 318 void CMMarkStack::set_should_expand() {
 319   // If we're resetting the marking state because of an
 320   // marking stack overflow, record that we should, if
 321   // possible, expand the stack.
 322   _should_expand = _cm->has_overflown();
 323 }
 324 
 325 CMMarkStack::~CMMarkStack() {
 326   if (_base != NULL) {
 327     _base = NULL;
 328     _virtual_space.release();
 329   }
 330 }
 331 
 332 void CMMarkStack::par_push(oop ptr) {
 333   while (true) {
 334     if (isFull()) {
 335       _overflow = true;
 336       return;
 337     }
 338     // Otherwise...
 339     jint index = _index;
 340     jint next_index = index+1;
 341     jint res = Atomic::cmpxchg(next_index, &_index, index);
 342     if (res == index) {
 343       _base[index] = ptr;
 344       // Note that we don't maintain this atomically.  We could, but it
 345       // doesn't seem necessary.
 346       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 347       return;
 348     }
 349     // Otherwise, we need to try again.
 350   }
 351 }
 352 
 353 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 354   while (true) {
 355     if (isFull()) {
 356       _overflow = true;
 357       return;
 358     }
 359     // Otherwise...
 360     jint index = _index;
 361     jint next_index = index + n;
 362     if (next_index > _capacity) {
 363       _overflow = true;
 364       return;
 365     }
 366     jint res = Atomic::cmpxchg(next_index, &_index, index);
 367     if (res == index) {
 368       for (int i = 0; i < n; i++) {
 369         int  ind = index + i;
 370         assert(ind < _capacity, "By overflow test above.");
 371         _base[ind] = ptr_arr[i];
 372       }
 373       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 374       return;
 375     }
 376     // Otherwise, we need to try again.
 377   }
 378 }
 379 
 380 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 381   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 382   jint start = _index;
 383   jint next_index = start + n;
 384   if (next_index > _capacity) {
 385     _overflow = true;
 386     return;
 387   }
 388   // Otherwise.
 389   _index = next_index;
 390   for (int i = 0; i < n; i++) {
 391     int ind = start + i;
 392     assert(ind < _capacity, "By overflow test above.");
 393     _base[ind] = ptr_arr[i];
 394   }
 395   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 396 }
 397 
 398 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 399   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 400   jint index = _index;
 401   if (index == 0) {
 402     *n = 0;
 403     return false;
 404   } else {
 405     int k = MIN2(max, index);
 406     jint  new_ind = index - k;
 407     for (int j = 0; j < k; j++) {
 408       ptr_arr[j] = _base[new_ind + j];
 409     }
 410     _index = new_ind;
 411     *n = k;
 412     return true;
 413   }
 414 }
 415 
 416 template<class OopClosureClass>
 417 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 418   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 419          || SafepointSynchronize::is_at_safepoint(),
 420          "Drain recursion must be yield-safe.");
 421   bool res = true;
 422   debug_only(_drain_in_progress = true);
 423   debug_only(_drain_in_progress_yields = yield_after);
 424   while (!isEmpty()) {
 425     oop newOop = pop();
 426     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 427     assert(newOop->is_oop(), "Expected an oop");
 428     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 429            "only grey objects on this stack");
 430     newOop->oop_iterate(cl);
 431     if (yield_after && _cm->do_yield_check()) {
 432       res = false;
 433       break;
 434     }
 435   }
 436   debug_only(_drain_in_progress = false);
 437   return res;
 438 }
 439 
 440 void CMMarkStack::note_start_of_gc() {
 441   assert(_saved_index == -1,
 442          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 443   _saved_index = _index;
 444 }
 445 
 446 void CMMarkStack::note_end_of_gc() {
 447   // This is intentionally a guarantee, instead of an assert. If we
 448   // accidentally add something to the mark stack during GC, it
 449   // will be a correctness issue so it's better if we crash. we'll
 450   // only check this once per GC anyway, so it won't be a performance
 451   // issue in any way.
 452   guarantee(_saved_index == _index,
 453             err_msg("saved index: %d index: %d", _saved_index, _index));
 454   _saved_index = -1;
 455 }
 456 
 457 void CMMarkStack::oops_do(OopClosure* f) {
 458   assert(_saved_index == _index,
 459          err_msg("saved index: %d index: %d", _saved_index, _index));
 460   for (int i = 0; i < _index; i += 1) {
 461     f->do_oop(&_base[i]);
 462   }
 463 }
 464 
 465 CMRootRegions::CMRootRegions() :
 466   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 467   _should_abort(false),  _next_survivor(NULL) { }
 468 
 469 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 470   _young_list = g1h->young_list();
 471   _cm = cm;
 472 }
 473 
 474 void CMRootRegions::prepare_for_scan() {
 475   assert(!scan_in_progress(), "pre-condition");
 476 
 477   // Currently, only survivors can be root regions.
 478   assert(_next_survivor == NULL, "pre-condition");
 479   _next_survivor = _young_list->first_survivor_region();
 480   _scan_in_progress = (_next_survivor != NULL);
 481   _should_abort = false;
 482 }
 483 
 484 HeapRegion* CMRootRegions::claim_next() {
 485   if (_should_abort) {
 486     // If someone has set the should_abort flag, we return NULL to
 487     // force the caller to bail out of their loop.
 488     return NULL;
 489   }
 490 
 491   // Currently, only survivors can be root regions.
 492   HeapRegion* res = _next_survivor;
 493   if (res != NULL) {
 494     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 495     // Read it again in case it changed while we were waiting for the lock.
 496     res = _next_survivor;
 497     if (res != NULL) {
 498       if (res == _young_list->last_survivor_region()) {
 499         // We just claimed the last survivor so store NULL to indicate
 500         // that we're done.
 501         _next_survivor = NULL;
 502       } else {
 503         _next_survivor = res->get_next_young_region();
 504       }
 505     } else {
 506       // Someone else claimed the last survivor while we were trying
 507       // to take the lock so nothing else to do.
 508     }
 509   }
 510   assert(res == NULL || res->is_survivor(), "post-condition");
 511 
 512   return res;
 513 }
 514 
 515 void CMRootRegions::scan_finished() {
 516   assert(scan_in_progress(), "pre-condition");
 517 
 518   // Currently, only survivors can be root regions.
 519   if (!_should_abort) {
 520     assert(_next_survivor == NULL, "we should have claimed all survivors");
 521   }
 522   _next_survivor = NULL;
 523 
 524   {
 525     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 526     _scan_in_progress = false;
 527     RootRegionScan_lock->notify_all();
 528   }
 529 }
 530 
 531 bool CMRootRegions::wait_until_scan_finished() {
 532   if (!scan_in_progress()) return false;
 533 
 534   {
 535     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 536     while (scan_in_progress()) {
 537       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 538     }
 539   }
 540   return true;
 541 }
 542 
 543 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 544 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 545 #endif // _MSC_VER
 546 
 547 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 548   return MAX2((n_par_threads + 2) / 4, 1U);
 549 }
 550 
 551 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 552   _g1h(g1h),
 553   _markBitMap1(),
 554   _markBitMap2(),
 555   _parallel_marking_threads(0),
 556   _max_parallel_marking_threads(0),
 557   _sleep_factor(0.0),
 558   _marking_task_overhead(1.0),
 559   _cleanup_sleep_factor(0.0),
 560   _cleanup_task_overhead(1.0),
 561   _cleanup_list("Cleanup List"),
 562   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 563   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 564             CardTableModRefBS::card_shift,
 565             false /* in_resource_area*/),
 566 
 567   _prevMarkBitMap(&_markBitMap1),
 568   _nextMarkBitMap(&_markBitMap2),
 569 
 570   _markStack(this),
 571   // _finger set in set_non_marking_state
 572 
 573   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 574   // _active_tasks set in set_non_marking_state
 575   // _tasks set inside the constructor
 576   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 577   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 578 
 579   _has_overflown(false),
 580   _concurrent(false),
 581   _has_aborted(false),
 582   _aborted_gc_id(GCId::undefined()),
 583   _restart_for_overflow(false),
 584   _concurrent_marking_in_progress(false),
 585 
 586   // _verbose_level set below
 587 
 588   _init_times(),
 589   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 590   _cleanup_times(),
 591   _total_counting_time(0.0),
 592   _total_rs_scrub_time(0.0),
 593 
 594   _parallel_workers(NULL),
 595 
 596   _count_card_bitmaps(NULL),
 597   _count_marked_bytes(NULL),
 598   _completed_initialization(false) {
 599   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 600   if (verbose_level < no_verbose) {
 601     verbose_level = no_verbose;
 602   }
 603   if (verbose_level > high_verbose) {
 604     verbose_level = high_verbose;
 605   }
 606   _verbose_level = verbose_level;
 607 
 608   if (verbose_low()) {
 609     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 610                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 611   }
 612 
 613   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 614   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 615 
 616   // Create & start a ConcurrentMark thread.
 617   _cmThread = new ConcurrentMarkThread(this);
 618   assert(cmThread() != NULL, "CM Thread should have been created");
 619   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 620   if (_cmThread->osthread() == NULL) {
 621       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 622   }
 623 
 624   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 625   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 626   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 627 
 628   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 629   satb_qs.set_buffer_size(G1SATBBufferSize);
 630 
 631   _root_regions.init(_g1h, this);
 632 
 633   if (ConcGCThreads > ParallelGCThreads) {
 634     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 635             "than ParallelGCThreads (" UINTX_FORMAT ").",
 636             ConcGCThreads, ParallelGCThreads);
 637     return;
 638   }
 639   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 640     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 641     // if both are set
 642     _sleep_factor             = 0.0;
 643     _marking_task_overhead    = 1.0;
 644   } else if (G1MarkingOverheadPercent > 0) {
 645     // We will calculate the number of parallel marking threads based
 646     // on a target overhead with respect to the soft real-time goal
 647     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 648     double overall_cm_overhead =
 649       (double) MaxGCPauseMillis * marking_overhead /
 650       (double) GCPauseIntervalMillis;
 651     double cpu_ratio = 1.0 / (double) os::processor_count();
 652     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 653     double marking_task_overhead =
 654       overall_cm_overhead / marking_thread_num *
 655                                               (double) os::processor_count();
 656     double sleep_factor =
 657                        (1.0 - marking_task_overhead) / marking_task_overhead;
 658 
 659     FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 660     _sleep_factor             = sleep_factor;
 661     _marking_task_overhead    = marking_task_overhead;
 662   } else {
 663     // Calculate the number of parallel marking threads by scaling
 664     // the number of parallel GC threads.
 665     uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 666     FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 667     _sleep_factor             = 0.0;
 668     _marking_task_overhead    = 1.0;
 669   }
 670 
 671   assert(ConcGCThreads > 0, "Should have been set");
 672   _parallel_marking_threads = (uint) ConcGCThreads;
 673   _max_parallel_marking_threads = _parallel_marking_threads;
 674 
 675   if (parallel_marking_threads() > 1) {
 676     _cleanup_task_overhead = 1.0;
 677   } else {
 678     _cleanup_task_overhead = marking_task_overhead();
 679   }
 680   _cleanup_sleep_factor =
 681                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 682 
 683 #if 0
 684   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 685   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 686   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 687   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 688   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 689 #endif
 690 
 691   _parallel_workers = new FlexibleWorkGang("G1 Marker",
 692        _max_parallel_marking_threads, false, true);
 693   if (_parallel_workers == NULL) {
 694     vm_exit_during_initialization("Failed necessary allocation.");
 695   } else {
 696     _parallel_workers->initialize_workers();
 697   }
 698 
 699   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 700     size_t mark_stack_size =
 701       MIN2(MarkStackSizeMax,
 702           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 703     // Verify that the calculated value for MarkStackSize is in range.
 704     // It would be nice to use the private utility routine from Arguments.
 705     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 706       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 707               "must be between 1 and " SIZE_FORMAT,
 708               mark_stack_size, MarkStackSizeMax);
 709       return;
 710     }
 711     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 712   } else {
 713     // Verify MarkStackSize is in range.
 714     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 715       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 716         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 717           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 718                   "must be between 1 and " SIZE_FORMAT,
 719                   MarkStackSize, MarkStackSizeMax);
 720           return;
 721         }
 722       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 723         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 724           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 725                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 726                   MarkStackSize, MarkStackSizeMax);
 727           return;
 728         }
 729       }
 730     }
 731   }
 732 
 733   if (!_markStack.allocate(MarkStackSize)) {
 734     warning("Failed to allocate CM marking stack");
 735     return;
 736   }
 737 
 738   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 739   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 740 
 741   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 742   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 743 
 744   BitMap::idx_t card_bm_size = _card_bm.size();
 745 
 746   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 747   _active_tasks = _max_worker_id;
 748 
 749   uint max_regions = _g1h->max_regions();
 750   for (uint i = 0; i < _max_worker_id; ++i) {
 751     CMTaskQueue* task_queue = new CMTaskQueue();
 752     task_queue->initialize();
 753     _task_queues->register_queue(i, task_queue);
 754 
 755     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 756     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 757 
 758     _tasks[i] = new CMTask(i, this,
 759                            _count_marked_bytes[i],
 760                            &_count_card_bitmaps[i],
 761                            task_queue, _task_queues);
 762 
 763     _accum_task_vtime[i] = 0.0;
 764   }
 765 
 766   // Calculate the card number for the bottom of the heap. Used
 767   // in biasing indexes into the accounting card bitmaps.
 768   _heap_bottom_card_num =
 769     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 770                                 CardTableModRefBS::card_shift);
 771 
 772   // Clear all the liveness counting data
 773   clear_all_count_data();
 774 
 775   // so that the call below can read a sensible value
 776   _heap_start = g1h->reserved_region().start();
 777   set_non_marking_state();
 778   _completed_initialization = true;
 779 }
 780 
 781 void ConcurrentMark::reset() {
 782   // Starting values for these two. This should be called in a STW
 783   // phase.
 784   MemRegion reserved = _g1h->g1_reserved();
 785   _heap_start = reserved.start();
 786   _heap_end   = reserved.end();
 787 
 788   // Separated the asserts so that we know which one fires.
 789   assert(_heap_start != NULL, "heap bounds should look ok");
 790   assert(_heap_end != NULL, "heap bounds should look ok");
 791   assert(_heap_start < _heap_end, "heap bounds should look ok");
 792 
 793   // Reset all the marking data structures and any necessary flags
 794   reset_marking_state();
 795 
 796   if (verbose_low()) {
 797     gclog_or_tty->print_cr("[global] resetting");
 798   }
 799 
 800   // We do reset all of them, since different phases will use
 801   // different number of active threads. So, it's easiest to have all
 802   // of them ready.
 803   for (uint i = 0; i < _max_worker_id; ++i) {
 804     _tasks[i]->reset(_nextMarkBitMap);
 805   }
 806 
 807   // we need this to make sure that the flag is on during the evac
 808   // pause with initial mark piggy-backed
 809   set_concurrent_marking_in_progress();
 810 }
 811 
 812 
 813 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 814   _markStack.set_should_expand();
 815   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 816   if (clear_overflow) {
 817     clear_has_overflown();
 818   } else {
 819     assert(has_overflown(), "pre-condition");
 820   }
 821   _finger = _heap_start;
 822 
 823   for (uint i = 0; i < _max_worker_id; ++i) {
 824     CMTaskQueue* queue = _task_queues->queue(i);
 825     queue->set_empty();
 826   }
 827 }
 828 
 829 void ConcurrentMark::set_concurrency(uint active_tasks) {
 830   assert(active_tasks <= _max_worker_id, "we should not have more");
 831 
 832   _active_tasks = active_tasks;
 833   // Need to update the three data structures below according to the
 834   // number of active threads for this phase.
 835   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 836   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 837   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 838 }
 839 
 840 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 841   set_concurrency(active_tasks);
 842 
 843   _concurrent = concurrent;
 844   // We propagate this to all tasks, not just the active ones.
 845   for (uint i = 0; i < _max_worker_id; ++i)
 846     _tasks[i]->set_concurrent(concurrent);
 847 
 848   if (concurrent) {
 849     set_concurrent_marking_in_progress();
 850   } else {
 851     // We currently assume that the concurrent flag has been set to
 852     // false before we start remark. At this point we should also be
 853     // in a STW phase.
 854     assert(!concurrent_marking_in_progress(), "invariant");
 855     assert(out_of_regions(),
 856            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 857                    p2i(_finger), p2i(_heap_end)));
 858   }
 859 }
 860 
 861 void ConcurrentMark::set_non_marking_state() {
 862   // We set the global marking state to some default values when we're
 863   // not doing marking.
 864   reset_marking_state();
 865   _active_tasks = 0;
 866   clear_concurrent_marking_in_progress();
 867 }
 868 
 869 ConcurrentMark::~ConcurrentMark() {
 870   // The ConcurrentMark instance is never freed.
 871   ShouldNotReachHere();
 872 }
 873 
 874 void ConcurrentMark::clearNextBitmap() {
 875   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 876 
 877   // Make sure that the concurrent mark thread looks to still be in
 878   // the current cycle.
 879   guarantee(cmThread()->during_cycle(), "invariant");
 880 
 881   // We are finishing up the current cycle by clearing the next
 882   // marking bitmap and getting it ready for the next cycle. During
 883   // this time no other cycle can start. So, let's make sure that this
 884   // is the case.
 885   guarantee(!g1h->mark_in_progress(), "invariant");
 886 
 887   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 888   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 889   _parallel_workers->run_task(&task);
 890 
 891   // Clear the liveness counting data. If the marking has been aborted, the abort()
 892   // call already did that.
 893   if (cl.complete()) {
 894     clear_all_count_data();
 895   }
 896 
 897   // Repeat the asserts from above.
 898   guarantee(cmThread()->during_cycle(), "invariant");
 899   guarantee(!g1h->mark_in_progress(), "invariant");
 900 }
 901 
 902 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 903   CMBitMap* _bitmap;
 904   bool _error;
 905  public:
 906   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 907   }
 908 
 909   virtual bool doHeapRegion(HeapRegion* r) {
 910     // This closure can be called concurrently to the mutator, so we must make sure
 911     // that the result of the getNextMarkedWordAddress() call is compared to the
 912     // value passed to it as limit to detect any found bits.
 913     // We can use the region's orig_end() for the limit and the comparison value
 914     // as it always contains the "real" end of the region that never changes and
 915     // has no side effects.
 916     // Due to the latter, there can also be no problem with the compiler generating
 917     // reloads of the orig_end() call.
 918     HeapWord* end = r->orig_end();
 919     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 920   }
 921 };
 922 
 923 bool ConcurrentMark::nextMarkBitmapIsClear() {
 924   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 925   _g1h->heap_region_iterate(&cl);
 926   return cl.complete();
 927 }
 928 
 929 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 930 public:
 931   bool doHeapRegion(HeapRegion* r) {
 932     if (!r->is_continues_humongous()) {
 933       r->note_start_of_marking();
 934     }
 935     return false;
 936   }
 937 };
 938 
 939 void ConcurrentMark::checkpointRootsInitialPre() {
 940   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 941   G1CollectorPolicy* g1p = g1h->g1_policy();
 942 
 943   _has_aborted = false;
 944 
 945 #ifndef PRODUCT
 946   if (G1PrintReachableAtInitialMark) {
 947     print_reachable("at-cycle-start",
 948                     VerifyOption_G1UsePrevMarking, true /* all */);
 949   }
 950 #endif
 951 
 952   // Initialize marking structures. This has to be done in a STW phase.
 953   reset();
 954 
 955   // For each region note start of marking.
 956   NoteStartOfMarkHRClosure startcl;
 957   g1h->heap_region_iterate(&startcl);
 958 }
 959 
 960 
 961 void ConcurrentMark::checkpointRootsInitialPost() {
 962   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 963 
 964   // If we force an overflow during remark, the remark operation will
 965   // actually abort and we'll restart concurrent marking. If we always
 966   // force an overflow during remark we'll never actually complete the
 967   // marking phase. So, we initialize this here, at the start of the
 968   // cycle, so that at the remaining overflow number will decrease at
 969   // every remark and we'll eventually not need to cause one.
 970   force_overflow_stw()->init();
 971 
 972   // Start Concurrent Marking weak-reference discovery.
 973   ReferenceProcessor* rp = g1h->ref_processor_cm();
 974   // enable ("weak") refs discovery
 975   rp->enable_discovery();
 976   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 977 
 978   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 979   // This is the start of  the marking cycle, we're expected all
 980   // threads to have SATB queues with active set to false.
 981   satb_mq_set.set_active_all_threads(true, /* new active value */
 982                                      false /* expected_active */);
 983 
 984   _root_regions.prepare_for_scan();
 985 
 986   // update_g1_committed() will be called at the end of an evac pause
 987   // when marking is on. So, it's also called at the end of the
 988   // initial-mark pause to update the heap end, if the heap expands
 989   // during it. No need to call it here.
 990 }
 991 
 992 /*
 993  * Notice that in the next two methods, we actually leave the STS
 994  * during the barrier sync and join it immediately afterwards. If we
 995  * do not do this, the following deadlock can occur: one thread could
 996  * be in the barrier sync code, waiting for the other thread to also
 997  * sync up, whereas another one could be trying to yield, while also
 998  * waiting for the other threads to sync up too.
 999  *
1000  * Note, however, that this code is also used during remark and in
1001  * this case we should not attempt to leave / enter the STS, otherwise
1002  * we'll either hit an assert (debug / fastdebug) or deadlock
1003  * (product). So we should only leave / enter the STS if we are
1004  * operating concurrently.
1005  *
1006  * Because the thread that does the sync barrier has left the STS, it
1007  * is possible to be suspended for a Full GC or an evacuation pause
1008  * could occur. This is actually safe, since the entering the sync
1009  * barrier is one of the last things do_marking_step() does, and it
1010  * doesn't manipulate any data structures afterwards.
1011  */
1012 
1013 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1014   if (verbose_low()) {
1015     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
1016   }
1017 
1018   if (concurrent()) {
1019     SuspendibleThreadSet::leave();
1020   }
1021 
1022   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
1023 
1024   if (concurrent()) {
1025     SuspendibleThreadSet::join();
1026   }
1027   // at this point everyone should have synced up and not be doing any
1028   // more work
1029 
1030   if (verbose_low()) {
1031     if (barrier_aborted) {
1032       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1033     } else {
1034       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1035     }
1036   }
1037 
1038   if (barrier_aborted) {
1039     // If the barrier aborted we ignore the overflow condition and
1040     // just abort the whole marking phase as quickly as possible.
1041     return;
1042   }
1043 
1044   // If we're executing the concurrent phase of marking, reset the marking
1045   // state; otherwise the marking state is reset after reference processing,
1046   // during the remark pause.
1047   // If we reset here as a result of an overflow during the remark we will
1048   // see assertion failures from any subsequent set_concurrency_and_phase()
1049   // calls.
1050   if (concurrent()) {
1051     // let the task associated with with worker 0 do this
1052     if (worker_id == 0) {
1053       // task 0 is responsible for clearing the global data structures
1054       // We should be here because of an overflow. During STW we should
1055       // not clear the overflow flag since we rely on it being true when
1056       // we exit this method to abort the pause and restart concurrent
1057       // marking.
1058       reset_marking_state(true /* clear_overflow */);
1059       force_overflow()->update();
1060 
1061       if (G1Log::fine()) {
1062         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1063         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1064       }
1065     }
1066   }
1067 
1068   // after this, each task should reset its own data structures then
1069   // then go into the second barrier
1070 }
1071 
1072 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1073   if (verbose_low()) {
1074     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1075   }
1076 
1077   if (concurrent()) {
1078     SuspendibleThreadSet::leave();
1079   }
1080 
1081   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1082 
1083   if (concurrent()) {
1084     SuspendibleThreadSet::join();
1085   }
1086   // at this point everything should be re-initialized and ready to go
1087 
1088   if (verbose_low()) {
1089     if (barrier_aborted) {
1090       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1091     } else {
1092       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1093     }
1094   }
1095 }
1096 
1097 #ifndef PRODUCT
1098 void ForceOverflowSettings::init() {
1099   _num_remaining = G1ConcMarkForceOverflow;
1100   _force = false;
1101   update();
1102 }
1103 
1104 void ForceOverflowSettings::update() {
1105   if (_num_remaining > 0) {
1106     _num_remaining -= 1;
1107     _force = true;
1108   } else {
1109     _force = false;
1110   }
1111 }
1112 
1113 bool ForceOverflowSettings::should_force() {
1114   if (_force) {
1115     _force = false;
1116     return true;
1117   } else {
1118     return false;
1119   }
1120 }
1121 #endif // !PRODUCT
1122 
1123 class CMConcurrentMarkingTask: public AbstractGangTask {
1124 private:
1125   ConcurrentMark*       _cm;
1126   ConcurrentMarkThread* _cmt;
1127 
1128 public:
1129   void work(uint worker_id) {
1130     assert(Thread::current()->is_ConcurrentGC_thread(),
1131            "this should only be done by a conc GC thread");
1132     ResourceMark rm;
1133 
1134     double start_vtime = os::elapsedVTime();
1135 
1136     SuspendibleThreadSet::join();
1137 
1138     assert(worker_id < _cm->active_tasks(), "invariant");
1139     CMTask* the_task = _cm->task(worker_id);
1140     the_task->record_start_time();
1141     if (!_cm->has_aborted()) {
1142       do {
1143         double start_vtime_sec = os::elapsedVTime();
1144         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1145 
1146         the_task->do_marking_step(mark_step_duration_ms,
1147                                   true  /* do_termination */,
1148                                   false /* is_serial*/);
1149 
1150         double end_vtime_sec = os::elapsedVTime();
1151         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1152         _cm->clear_has_overflown();
1153 
1154         _cm->do_yield_check(worker_id);
1155 
1156         jlong sleep_time_ms;
1157         if (!_cm->has_aborted() && the_task->has_aborted()) {
1158           sleep_time_ms =
1159             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1160           SuspendibleThreadSet::leave();
1161           os::sleep(Thread::current(), sleep_time_ms, false);
1162           SuspendibleThreadSet::join();
1163         }
1164       } while (!_cm->has_aborted() && the_task->has_aborted());
1165     }
1166     the_task->record_end_time();
1167     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1168 
1169     SuspendibleThreadSet::leave();
1170 
1171     double end_vtime = os::elapsedVTime();
1172     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1173   }
1174 
1175   CMConcurrentMarkingTask(ConcurrentMark* cm,
1176                           ConcurrentMarkThread* cmt) :
1177       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1178 
1179   ~CMConcurrentMarkingTask() { }
1180 };
1181 
1182 // Calculates the number of active workers for a concurrent
1183 // phase.
1184 uint ConcurrentMark::calc_parallel_marking_threads() {
1185   uint n_conc_workers = 0;
1186   if (!UseDynamicNumberOfGCThreads ||
1187       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1188        !ForceDynamicNumberOfGCThreads)) {
1189     n_conc_workers = max_parallel_marking_threads();
1190   } else {
1191     n_conc_workers =
1192       AdaptiveSizePolicy::calc_default_active_workers(
1193                                    max_parallel_marking_threads(),
1194                                    1, /* Minimum workers */
1195                                    parallel_marking_threads(),
1196                                    Threads::number_of_non_daemon_threads());
1197     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1198     // that scaling has already gone into "_max_parallel_marking_threads".
1199   }
1200   assert(n_conc_workers > 0, "Always need at least 1");
1201   return n_conc_workers;
1202 }
1203 
1204 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1205   // Currently, only survivors can be root regions.
1206   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1207   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1208 
1209   const uintx interval = PrefetchScanIntervalInBytes;
1210   HeapWord* curr = hr->bottom();
1211   const HeapWord* end = hr->top();
1212   while (curr < end) {
1213     Prefetch::read(curr, interval);
1214     oop obj = oop(curr);
1215     int size = obj->oop_iterate(&cl);
1216     assert(size == obj->size(), "sanity");
1217     curr += size;
1218   }
1219 }
1220 
1221 class CMRootRegionScanTask : public AbstractGangTask {
1222 private:
1223   ConcurrentMark* _cm;
1224 
1225 public:
1226   CMRootRegionScanTask(ConcurrentMark* cm) :
1227     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1228 
1229   void work(uint worker_id) {
1230     assert(Thread::current()->is_ConcurrentGC_thread(),
1231            "this should only be done by a conc GC thread");
1232 
1233     CMRootRegions* root_regions = _cm->root_regions();
1234     HeapRegion* hr = root_regions->claim_next();
1235     while (hr != NULL) {
1236       _cm->scanRootRegion(hr, worker_id);
1237       hr = root_regions->claim_next();
1238     }
1239   }
1240 };
1241 
1242 void ConcurrentMark::scanRootRegions() {
1243   // Start of concurrent marking.
1244   ClassLoaderDataGraph::clear_claimed_marks();
1245 
1246   // scan_in_progress() will have been set to true only if there was
1247   // at least one root region to scan. So, if it's false, we
1248   // should not attempt to do any further work.
1249   if (root_regions()->scan_in_progress()) {
1250     _parallel_marking_threads = calc_parallel_marking_threads();
1251     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1252            "Maximum number of marking threads exceeded");
1253     uint active_workers = MAX2(1U, parallel_marking_threads());
1254 
1255     CMRootRegionScanTask task(this);
1256     _parallel_workers->set_active_workers(active_workers);
1257     _parallel_workers->run_task(&task);
1258 
1259     // It's possible that has_aborted() is true here without actually
1260     // aborting the survivor scan earlier. This is OK as it's
1261     // mainly used for sanity checking.
1262     root_regions()->scan_finished();
1263   }
1264 }
1265 
1266 void ConcurrentMark::markFromRoots() {
1267   // we might be tempted to assert that:
1268   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1269   //        "inconsistent argument?");
1270   // However that wouldn't be right, because it's possible that
1271   // a safepoint is indeed in progress as a younger generation
1272   // stop-the-world GC happens even as we mark in this generation.
1273 
1274   _restart_for_overflow = false;
1275   force_overflow_conc()->init();
1276 
1277   // _g1h has _n_par_threads
1278   _parallel_marking_threads = calc_parallel_marking_threads();
1279   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1280     "Maximum number of marking threads exceeded");
1281 
1282   uint active_workers = MAX2(1U, parallel_marking_threads());
1283 
1284   // Parallel task terminator is set in "set_concurrency_and_phase()"
1285   set_concurrency_and_phase(active_workers, true /* concurrent */);
1286 
1287   CMConcurrentMarkingTask markingTask(this, cmThread());
1288   _parallel_workers->set_active_workers(active_workers);
1289   // Don't set _n_par_threads because it affects MT in process_roots()
1290   // and the decisions on that MT processing is made elsewhere.
1291   assert(_parallel_workers->active_workers() > 0, "Should have been set");
1292   _parallel_workers->run_task(&markingTask);
1293   print_stats();
1294 }
1295 
1296 // Helper class to get rid of some boilerplate code.
1297 class G1CMTraceTime : public GCTraceTime {
1298   static bool doit_and_prepend(bool doit) {
1299     if (doit) {
1300       gclog_or_tty->put(' ');
1301     }
1302     return doit;
1303   }
1304 
1305  public:
1306   G1CMTraceTime(const char* title, bool doit)
1307     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1308         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1309   }
1310 };
1311 
1312 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1313   // world is stopped at this checkpoint
1314   assert(SafepointSynchronize::is_at_safepoint(),
1315          "world should be stopped");
1316 
1317   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1318 
1319   // If a full collection has happened, we shouldn't do this.
1320   if (has_aborted()) {
1321     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1322     return;
1323   }
1324 
1325   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1326 
1327   if (VerifyDuringGC) {
1328     HandleMark hm;  // handle scope
1329     Universe::heap()->prepare_for_verify();
1330     Universe::verify(VerifyOption_G1UsePrevMarking,
1331                      " VerifyDuringGC:(before)");
1332   }
1333   g1h->check_bitmaps("Remark Start");
1334 
1335   G1CollectorPolicy* g1p = g1h->g1_policy();
1336   g1p->record_concurrent_mark_remark_start();
1337 
1338   double start = os::elapsedTime();
1339 
1340   checkpointRootsFinalWork();
1341 
1342   double mark_work_end = os::elapsedTime();
1343 
1344   weakRefsWork(clear_all_soft_refs);
1345 
1346   if (has_overflown()) {
1347     // Oops.  We overflowed.  Restart concurrent marking.
1348     _restart_for_overflow = true;
1349     if (G1TraceMarkStackOverflow) {
1350       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1351     }
1352 
1353     // Verify the heap w.r.t. the previous marking bitmap.
1354     if (VerifyDuringGC) {
1355       HandleMark hm;  // handle scope
1356       Universe::heap()->prepare_for_verify();
1357       Universe::verify(VerifyOption_G1UsePrevMarking,
1358                        " VerifyDuringGC:(overflow)");
1359     }
1360 
1361     // Clear the marking state because we will be restarting
1362     // marking due to overflowing the global mark stack.
1363     reset_marking_state();
1364   } else {
1365     {
1366       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1367 
1368       // Aggregate the per-task counting data that we have accumulated
1369       // while marking.
1370       aggregate_count_data();
1371     }
1372 
1373     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1374     // We're done with marking.
1375     // This is the end of  the marking cycle, we're expected all
1376     // threads to have SATB queues with active set to true.
1377     satb_mq_set.set_active_all_threads(false, /* new active value */
1378                                        true /* expected_active */);
1379 
1380     if (VerifyDuringGC) {
1381       HandleMark hm;  // handle scope
1382       Universe::heap()->prepare_for_verify();
1383       Universe::verify(VerifyOption_G1UseNextMarking,
1384                        " VerifyDuringGC:(after)");
1385     }
1386     g1h->check_bitmaps("Remark End");
1387     assert(!restart_for_overflow(), "sanity");
1388     // Completely reset the marking state since marking completed
1389     set_non_marking_state();
1390   }
1391 
1392   // Expand the marking stack, if we have to and if we can.
1393   if (_markStack.should_expand()) {
1394     _markStack.expand();
1395   }
1396 
1397   // Statistics
1398   double now = os::elapsedTime();
1399   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1400   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1401   _remark_times.add((now - start) * 1000.0);
1402 
1403   g1p->record_concurrent_mark_remark_end();
1404 
1405   G1CMIsAliveClosure is_alive(g1h);
1406   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1407 }
1408 
1409 // Base class of the closures that finalize and verify the
1410 // liveness counting data.
1411 class CMCountDataClosureBase: public HeapRegionClosure {
1412 protected:
1413   G1CollectedHeap* _g1h;
1414   ConcurrentMark* _cm;
1415   CardTableModRefBS* _ct_bs;
1416 
1417   BitMap* _region_bm;
1418   BitMap* _card_bm;
1419 
1420   // Takes a region that's not empty (i.e., it has at least one
1421   // live object in it and sets its corresponding bit on the region
1422   // bitmap to 1. If the region is "starts humongous" it will also set
1423   // to 1 the bits on the region bitmap that correspond to its
1424   // associated "continues humongous" regions.
1425   void set_bit_for_region(HeapRegion* hr) {
1426     assert(!hr->is_continues_humongous(), "should have filtered those out");
1427 
1428     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1429     if (!hr->is_starts_humongous()) {
1430       // Normal (non-humongous) case: just set the bit.
1431       _region_bm->par_at_put(index, true);
1432     } else {
1433       // Starts humongous case: calculate how many regions are part of
1434       // this humongous region and then set the bit range.
1435       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1436       _region_bm->par_at_put_range(index, end_index, true);
1437     }
1438   }
1439 
1440 public:
1441   CMCountDataClosureBase(G1CollectedHeap* g1h,
1442                          BitMap* region_bm, BitMap* card_bm):
1443     _g1h(g1h), _cm(g1h->concurrent_mark()),
1444     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1445     _region_bm(region_bm), _card_bm(card_bm) { }
1446 };
1447 
1448 // Closure that calculates the # live objects per region. Used
1449 // for verification purposes during the cleanup pause.
1450 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1451   CMBitMapRO* _bm;
1452   size_t _region_marked_bytes;
1453 
1454 public:
1455   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1456                          BitMap* region_bm, BitMap* card_bm) :
1457     CMCountDataClosureBase(g1h, region_bm, card_bm),
1458     _bm(bm), _region_marked_bytes(0) { }
1459 
1460   bool doHeapRegion(HeapRegion* hr) {
1461 
1462     if (hr->is_continues_humongous()) {
1463       // We will ignore these here and process them when their
1464       // associated "starts humongous" region is processed (see
1465       // set_bit_for_heap_region()). Note that we cannot rely on their
1466       // associated "starts humongous" region to have their bit set to
1467       // 1 since, due to the region chunking in the parallel region
1468       // iteration, a "continues humongous" region might be visited
1469       // before its associated "starts humongous".
1470       return false;
1471     }
1472 
1473     HeapWord* ntams = hr->next_top_at_mark_start();
1474     HeapWord* start = hr->bottom();
1475 
1476     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1477            err_msg("Preconditions not met - "
1478                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1479                    p2i(start), p2i(ntams), p2i(hr->end())));
1480 
1481     // Find the first marked object at or after "start".
1482     start = _bm->getNextMarkedWordAddress(start, ntams);
1483 
1484     size_t marked_bytes = 0;
1485 
1486     while (start < ntams) {
1487       oop obj = oop(start);
1488       int obj_sz = obj->size();
1489       HeapWord* obj_end = start + obj_sz;
1490 
1491       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1492       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1493 
1494       // Note: if we're looking at the last region in heap - obj_end
1495       // could be actually just beyond the end of the heap; end_idx
1496       // will then correspond to a (non-existent) card that is also
1497       // just beyond the heap.
1498       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1499         // end of object is not card aligned - increment to cover
1500         // all the cards spanned by the object
1501         end_idx += 1;
1502       }
1503 
1504       // Set the bits in the card BM for the cards spanned by this object.
1505       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1506 
1507       // Add the size of this object to the number of marked bytes.
1508       marked_bytes += (size_t)obj_sz * HeapWordSize;
1509 
1510       // Find the next marked object after this one.
1511       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1512     }
1513 
1514     // Mark the allocated-since-marking portion...
1515     HeapWord* top = hr->top();
1516     if (ntams < top) {
1517       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1518       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1519 
1520       // Note: if we're looking at the last region in heap - top
1521       // could be actually just beyond the end of the heap; end_idx
1522       // will then correspond to a (non-existent) card that is also
1523       // just beyond the heap.
1524       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1525         // end of object is not card aligned - increment to cover
1526         // all the cards spanned by the object
1527         end_idx += 1;
1528       }
1529       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1530 
1531       // This definitely means the region has live objects.
1532       set_bit_for_region(hr);
1533     }
1534 
1535     // Update the live region bitmap.
1536     if (marked_bytes > 0) {
1537       set_bit_for_region(hr);
1538     }
1539 
1540     // Set the marked bytes for the current region so that
1541     // it can be queried by a calling verification routine
1542     _region_marked_bytes = marked_bytes;
1543 
1544     return false;
1545   }
1546 
1547   size_t region_marked_bytes() const { return _region_marked_bytes; }
1548 };
1549 
1550 // Heap region closure used for verifying the counting data
1551 // that was accumulated concurrently and aggregated during
1552 // the remark pause. This closure is applied to the heap
1553 // regions during the STW cleanup pause.
1554 
1555 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1556   G1CollectedHeap* _g1h;
1557   ConcurrentMark* _cm;
1558   CalcLiveObjectsClosure _calc_cl;
1559   BitMap* _region_bm;   // Region BM to be verified
1560   BitMap* _card_bm;     // Card BM to be verified
1561   bool _verbose;        // verbose output?
1562 
1563   BitMap* _exp_region_bm; // Expected Region BM values
1564   BitMap* _exp_card_bm;   // Expected card BM values
1565 
1566   int _failures;
1567 
1568 public:
1569   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1570                                 BitMap* region_bm,
1571                                 BitMap* card_bm,
1572                                 BitMap* exp_region_bm,
1573                                 BitMap* exp_card_bm,
1574                                 bool verbose) :
1575     _g1h(g1h), _cm(g1h->concurrent_mark()),
1576     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1577     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1578     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1579     _failures(0) { }
1580 
1581   int failures() const { return _failures; }
1582 
1583   bool doHeapRegion(HeapRegion* hr) {
1584     if (hr->is_continues_humongous()) {
1585       // We will ignore these here and process them when their
1586       // associated "starts humongous" region is processed (see
1587       // set_bit_for_heap_region()). Note that we cannot rely on their
1588       // associated "starts humongous" region to have their bit set to
1589       // 1 since, due to the region chunking in the parallel region
1590       // iteration, a "continues humongous" region might be visited
1591       // before its associated "starts humongous".
1592       return false;
1593     }
1594 
1595     int failures = 0;
1596 
1597     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1598     // this region and set the corresponding bits in the expected region
1599     // and card bitmaps.
1600     bool res = _calc_cl.doHeapRegion(hr);
1601     assert(res == false, "should be continuing");
1602 
1603     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1604                     Mutex::_no_safepoint_check_flag);
1605 
1606     // Verify the marked bytes for this region.
1607     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1608     size_t act_marked_bytes = hr->next_marked_bytes();
1609 
1610     // We're not OK if expected marked bytes > actual marked bytes. It means
1611     // we have missed accounting some objects during the actual marking.
1612     if (exp_marked_bytes > act_marked_bytes) {
1613       if (_verbose) {
1614         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1615                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1616                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1617       }
1618       failures += 1;
1619     }
1620 
1621     // Verify the bit, for this region, in the actual and expected
1622     // (which was just calculated) region bit maps.
1623     // We're not OK if the bit in the calculated expected region
1624     // bitmap is set and the bit in the actual region bitmap is not.
1625     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1626 
1627     bool expected = _exp_region_bm->at(index);
1628     bool actual = _region_bm->at(index);
1629     if (expected && !actual) {
1630       if (_verbose) {
1631         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1632                                "expected: %s, actual: %s",
1633                                hr->hrm_index(),
1634                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1635       }
1636       failures += 1;
1637     }
1638 
1639     // Verify that the card bit maps for the cards spanned by the current
1640     // region match. We have an error if we have a set bit in the expected
1641     // bit map and the corresponding bit in the actual bitmap is not set.
1642 
1643     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1644     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1645 
1646     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1647       expected = _exp_card_bm->at(i);
1648       actual = _card_bm->at(i);
1649 
1650       if (expected && !actual) {
1651         if (_verbose) {
1652           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1653                                  "expected: %s, actual: %s",
1654                                  hr->hrm_index(), i,
1655                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1656         }
1657         failures += 1;
1658       }
1659     }
1660 
1661     if (failures > 0 && _verbose)  {
1662       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1663                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1664                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1665                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1666     }
1667 
1668     _failures += failures;
1669 
1670     // We could stop iteration over the heap when we
1671     // find the first violating region by returning true.
1672     return false;
1673   }
1674 };
1675 
1676 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1677 protected:
1678   G1CollectedHeap* _g1h;
1679   ConcurrentMark* _cm;
1680   BitMap* _actual_region_bm;
1681   BitMap* _actual_card_bm;
1682 
1683   uint    _n_workers;
1684 
1685   BitMap* _expected_region_bm;
1686   BitMap* _expected_card_bm;
1687 
1688   int  _failures;
1689   bool _verbose;
1690 
1691   HeapRegionClaimer _hrclaimer;
1692 
1693 public:
1694   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1695                             BitMap* region_bm, BitMap* card_bm,
1696                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1697     : AbstractGangTask("G1 verify final counting"),
1698       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1699       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1700       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1701       _failures(0), _verbose(false),
1702       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1703     assert(VerifyDuringGC, "don't call this otherwise");
1704     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1705     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1706 
1707     _verbose = _cm->verbose_medium();
1708   }
1709 
1710   void work(uint worker_id) {
1711     assert(worker_id < _n_workers, "invariant");
1712 
1713     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1714                                             _actual_region_bm, _actual_card_bm,
1715                                             _expected_region_bm,
1716                                             _expected_card_bm,
1717                                             _verbose);
1718 
1719     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1720 
1721     Atomic::add(verify_cl.failures(), &_failures);
1722   }
1723 
1724   int failures() const { return _failures; }
1725 };
1726 
1727 // Closure that finalizes the liveness counting data.
1728 // Used during the cleanup pause.
1729 // Sets the bits corresponding to the interval [NTAMS, top]
1730 // (which contains the implicitly live objects) in the
1731 // card liveness bitmap. Also sets the bit for each region,
1732 // containing live data, in the region liveness bitmap.
1733 
1734 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1735  public:
1736   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1737                               BitMap* region_bm,
1738                               BitMap* card_bm) :
1739     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1740 
1741   bool doHeapRegion(HeapRegion* hr) {
1742 
1743     if (hr->is_continues_humongous()) {
1744       // We will ignore these here and process them when their
1745       // associated "starts humongous" region is processed (see
1746       // set_bit_for_heap_region()). Note that we cannot rely on their
1747       // associated "starts humongous" region to have their bit set to
1748       // 1 since, due to the region chunking in the parallel region
1749       // iteration, a "continues humongous" region might be visited
1750       // before its associated "starts humongous".
1751       return false;
1752     }
1753 
1754     HeapWord* ntams = hr->next_top_at_mark_start();
1755     HeapWord* top   = hr->top();
1756 
1757     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1758 
1759     // Mark the allocated-since-marking portion...
1760     if (ntams < top) {
1761       // This definitely means the region has live objects.
1762       set_bit_for_region(hr);
1763 
1764       // Now set the bits in the card bitmap for [ntams, top)
1765       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1766       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1767 
1768       // Note: if we're looking at the last region in heap - top
1769       // could be actually just beyond the end of the heap; end_idx
1770       // will then correspond to a (non-existent) card that is also
1771       // just beyond the heap.
1772       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1773         // end of object is not card aligned - increment to cover
1774         // all the cards spanned by the object
1775         end_idx += 1;
1776       }
1777 
1778       assert(end_idx <= _card_bm->size(),
1779              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1780                      end_idx, _card_bm->size()));
1781       assert(start_idx < _card_bm->size(),
1782              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1783                      start_idx, _card_bm->size()));
1784 
1785       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1786     }
1787 
1788     // Set the bit for the region if it contains live data
1789     if (hr->next_marked_bytes() > 0) {
1790       set_bit_for_region(hr);
1791     }
1792 
1793     return false;
1794   }
1795 };
1796 
1797 class G1ParFinalCountTask: public AbstractGangTask {
1798 protected:
1799   G1CollectedHeap* _g1h;
1800   ConcurrentMark* _cm;
1801   BitMap* _actual_region_bm;
1802   BitMap* _actual_card_bm;
1803 
1804   uint    _n_workers;
1805   HeapRegionClaimer _hrclaimer;
1806 
1807 public:
1808   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1809     : AbstractGangTask("G1 final counting"),
1810       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1811       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1812       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1813   }
1814 
1815   void work(uint worker_id) {
1816     assert(worker_id < _n_workers, "invariant");
1817 
1818     FinalCountDataUpdateClosure final_update_cl(_g1h,
1819                                                 _actual_region_bm,
1820                                                 _actual_card_bm);
1821 
1822     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1823   }
1824 };
1825 
1826 class G1ParNoteEndTask;
1827 
1828 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1829   G1CollectedHeap* _g1;
1830   size_t _max_live_bytes;
1831   uint _regions_claimed;
1832   size_t _freed_bytes;
1833   FreeRegionList* _local_cleanup_list;
1834   HeapRegionSetCount _old_regions_removed;
1835   HeapRegionSetCount _humongous_regions_removed;
1836   HRRSCleanupTask* _hrrs_cleanup_task;
1837   double _claimed_region_time;
1838   double _max_region_time;
1839 
1840 public:
1841   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1842                              FreeRegionList* local_cleanup_list,
1843                              HRRSCleanupTask* hrrs_cleanup_task) :
1844     _g1(g1),
1845     _max_live_bytes(0), _regions_claimed(0),
1846     _freed_bytes(0),
1847     _claimed_region_time(0.0), _max_region_time(0.0),
1848     _local_cleanup_list(local_cleanup_list),
1849     _old_regions_removed(),
1850     _humongous_regions_removed(),
1851     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1852 
1853   size_t freed_bytes() { return _freed_bytes; }
1854   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1855   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1856 
1857   bool doHeapRegion(HeapRegion *hr) {
1858     if (hr->is_continues_humongous()) {
1859       return false;
1860     }
1861     // We use a claim value of zero here because all regions
1862     // were claimed with value 1 in the FinalCount task.
1863     _g1->reset_gc_time_stamps(hr);
1864     double start = os::elapsedTime();
1865     _regions_claimed++;
1866     hr->note_end_of_marking();
1867     _max_live_bytes += hr->max_live_bytes();
1868 
1869     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1870       _freed_bytes += hr->used();
1871       hr->set_containing_set(NULL);
1872       if (hr->is_humongous()) {
1873         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1874         _humongous_regions_removed.increment(1u, hr->capacity());
1875         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1876       } else {
1877         _old_regions_removed.increment(1u, hr->capacity());
1878         _g1->free_region(hr, _local_cleanup_list, true);
1879       }
1880     } else {
1881       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1882     }
1883 
1884     double region_time = (os::elapsedTime() - start);
1885     _claimed_region_time += region_time;
1886     if (region_time > _max_region_time) {
1887       _max_region_time = region_time;
1888     }
1889     return false;
1890   }
1891 
1892   size_t max_live_bytes() { return _max_live_bytes; }
1893   uint regions_claimed() { return _regions_claimed; }
1894   double claimed_region_time_sec() { return _claimed_region_time; }
1895   double max_region_time_sec() { return _max_region_time; }
1896 };
1897 
1898 class G1ParNoteEndTask: public AbstractGangTask {
1899   friend class G1NoteEndOfConcMarkClosure;
1900 
1901 protected:
1902   G1CollectedHeap* _g1h;
1903   size_t _max_live_bytes;
1904   size_t _freed_bytes;
1905   FreeRegionList* _cleanup_list;
1906   HeapRegionClaimer _hrclaimer;
1907 
1908 public:
1909   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1910       AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1911   }
1912 
1913   void work(uint worker_id) {
1914     FreeRegionList local_cleanup_list("Local Cleanup List");
1915     HRRSCleanupTask hrrs_cleanup_task;
1916     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1917                                            &hrrs_cleanup_task);
1918     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1919     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1920 
1921     // Now update the lists
1922     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1923     {
1924       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1925       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1926       _max_live_bytes += g1_note_end.max_live_bytes();
1927       _freed_bytes += g1_note_end.freed_bytes();
1928 
1929       // If we iterate over the global cleanup list at the end of
1930       // cleanup to do this printing we will not guarantee to only
1931       // generate output for the newly-reclaimed regions (the list
1932       // might not be empty at the beginning of cleanup; we might
1933       // still be working on its previous contents). So we do the
1934       // printing here, before we append the new regions to the global
1935       // cleanup list.
1936 
1937       G1HRPrinter* hr_printer = _g1h->hr_printer();
1938       if (hr_printer->is_active()) {
1939         FreeRegionListIterator iter(&local_cleanup_list);
1940         while (iter.more_available()) {
1941           HeapRegion* hr = iter.get_next();
1942           hr_printer->cleanup(hr);
1943         }
1944       }
1945 
1946       _cleanup_list->add_ordered(&local_cleanup_list);
1947       assert(local_cleanup_list.is_empty(), "post-condition");
1948 
1949       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1950     }
1951   }
1952   size_t max_live_bytes() { return _max_live_bytes; }
1953   size_t freed_bytes() { return _freed_bytes; }
1954 };
1955 
1956 class G1ParScrubRemSetTask: public AbstractGangTask {
1957 protected:
1958   G1RemSet* _g1rs;
1959   BitMap* _region_bm;
1960   BitMap* _card_bm;
1961   HeapRegionClaimer _hrclaimer;
1962 
1963 public:
1964   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1965       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1966   }
1967 
1968   void work(uint worker_id) {
1969     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1970   }
1971 
1972 };
1973 
1974 void ConcurrentMark::cleanup() {
1975   // world is stopped at this checkpoint
1976   assert(SafepointSynchronize::is_at_safepoint(),
1977          "world should be stopped");
1978   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1979 
1980   // If a full collection has happened, we shouldn't do this.
1981   if (has_aborted()) {
1982     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1983     return;
1984   }
1985 
1986   g1h->verify_region_sets_optional();
1987 
1988   if (VerifyDuringGC) {
1989     HandleMark hm;  // handle scope
1990     Universe::heap()->prepare_for_verify();
1991     Universe::verify(VerifyOption_G1UsePrevMarking,
1992                      " VerifyDuringGC:(before)");
1993   }
1994   g1h->check_bitmaps("Cleanup Start");
1995 
1996   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1997   g1p->record_concurrent_mark_cleanup_start();
1998 
1999   double start = os::elapsedTime();
2000 
2001   HeapRegionRemSet::reset_for_cleanup_tasks();
2002 
2003   uint n_workers;
2004 
2005   // Do counting once more with the world stopped for good measure.
2006   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2007 
2008   g1h->set_par_threads();
2009   n_workers = g1h->n_par_threads();
2010   assert(g1h->n_par_threads() == n_workers,
2011          "Should not have been reset");
2012   g1h->workers()->run_task(&g1_par_count_task);
2013   // Done with the parallel phase so reset to 0.
2014   g1h->set_par_threads(0);
2015 
2016   if (VerifyDuringGC) {
2017     // Verify that the counting data accumulated during marking matches
2018     // that calculated by walking the marking bitmap.
2019 
2020     // Bitmaps to hold expected values
2021     BitMap expected_region_bm(_region_bm.size(), true);
2022     BitMap expected_card_bm(_card_bm.size(), true);
2023 
2024     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2025                                                  &_region_bm,
2026                                                  &_card_bm,
2027                                                  &expected_region_bm,
2028                                                  &expected_card_bm);
2029 
2030     g1h->set_par_threads((int)n_workers);
2031     g1h->workers()->run_task(&g1_par_verify_task);
2032     // Done with the parallel phase so reset to 0.
2033     g1h->set_par_threads(0);
2034 
2035     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2036   }
2037 
2038   size_t start_used_bytes = g1h->used();
2039   g1h->set_marking_complete();
2040 
2041   double count_end = os::elapsedTime();
2042   double this_final_counting_time = (count_end - start);
2043   _total_counting_time += this_final_counting_time;
2044 
2045   if (G1PrintRegionLivenessInfo) {
2046     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2047     _g1h->heap_region_iterate(&cl);
2048   }
2049 
2050   // Install newly created mark bitMap as "prev".
2051   swapMarkBitMaps();
2052 
2053   g1h->reset_gc_time_stamp();
2054 
2055   // Note end of marking in all heap regions.
2056   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
2057   g1h->set_par_threads((int)n_workers);
2058   g1h->workers()->run_task(&g1_par_note_end_task);
2059   g1h->set_par_threads(0);
2060   g1h->check_gc_time_stamps();
2061 
2062   if (!cleanup_list_is_empty()) {
2063     // The cleanup list is not empty, so we'll have to process it
2064     // concurrently. Notify anyone else that might be wanting free
2065     // regions that there will be more free regions coming soon.
2066     g1h->set_free_regions_coming();
2067   }
2068 
2069   // call below, since it affects the metric by which we sort the heap
2070   // regions.
2071   if (G1ScrubRemSets) {
2072     double rs_scrub_start = os::elapsedTime();
2073     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
2074     g1h->set_par_threads((int)n_workers);
2075     g1h->workers()->run_task(&g1_par_scrub_rs_task);
2076     g1h->set_par_threads(0);
2077 
2078     double rs_scrub_end = os::elapsedTime();
2079     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2080     _total_rs_scrub_time += this_rs_scrub_time;
2081   }
2082 
2083   // this will also free any regions totally full of garbage objects,
2084   // and sort the regions.
2085   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2086 
2087   // Statistics.
2088   double end = os::elapsedTime();
2089   _cleanup_times.add((end - start) * 1000.0);
2090 
2091   if (G1Log::fine()) {
2092     g1h->g1_policy()->print_heap_transition(start_used_bytes);
2093   }
2094 
2095   // Clean up will have freed any regions completely full of garbage.
2096   // Update the soft reference policy with the new heap occupancy.
2097   Universe::update_heap_info_at_gc();
2098 
2099   if (VerifyDuringGC) {
2100     HandleMark hm;  // handle scope
2101     Universe::heap()->prepare_for_verify();
2102     Universe::verify(VerifyOption_G1UsePrevMarking,
2103                      " VerifyDuringGC:(after)");
2104   }
2105 
2106   g1h->check_bitmaps("Cleanup End");
2107 
2108   g1h->verify_region_sets_optional();
2109 
2110   // We need to make this be a "collection" so any collection pause that
2111   // races with it goes around and waits for completeCleanup to finish.
2112   g1h->increment_total_collections();
2113 
2114   // Clean out dead classes and update Metaspace sizes.
2115   if (ClassUnloadingWithConcurrentMark) {
2116     ClassLoaderDataGraph::purge();
2117   }
2118   MetaspaceGC::compute_new_size();
2119 
2120   // We reclaimed old regions so we should calculate the sizes to make
2121   // sure we update the old gen/space data.
2122   g1h->g1mm()->update_sizes();
2123   g1h->allocation_context_stats().update_after_mark();
2124 
2125   g1h->trace_heap_after_concurrent_cycle();
2126 }
2127 
2128 void ConcurrentMark::completeCleanup() {
2129   if (has_aborted()) return;
2130 
2131   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2132 
2133   _cleanup_list.verify_optional();
2134   FreeRegionList tmp_free_list("Tmp Free List");
2135 
2136   if (G1ConcRegionFreeingVerbose) {
2137     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2138                            "cleanup list has %u entries",
2139                            _cleanup_list.length());
2140   }
2141 
2142   // No one else should be accessing the _cleanup_list at this point,
2143   // so it is not necessary to take any locks
2144   while (!_cleanup_list.is_empty()) {
2145     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2146     assert(hr != NULL, "Got NULL from a non-empty list");
2147     hr->par_clear();
2148     tmp_free_list.add_ordered(hr);
2149 
2150     // Instead of adding one region at a time to the secondary_free_list,
2151     // we accumulate them in the local list and move them a few at a
2152     // time. This also cuts down on the number of notify_all() calls
2153     // we do during this process. We'll also append the local list when
2154     // _cleanup_list is empty (which means we just removed the last
2155     // region from the _cleanup_list).
2156     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2157         _cleanup_list.is_empty()) {
2158       if (G1ConcRegionFreeingVerbose) {
2159         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2160                                "appending %u entries to the secondary_free_list, "
2161                                "cleanup list still has %u entries",
2162                                tmp_free_list.length(),
2163                                _cleanup_list.length());
2164       }
2165 
2166       {
2167         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2168         g1h->secondary_free_list_add(&tmp_free_list);
2169         SecondaryFreeList_lock->notify_all();
2170       }
2171 #ifndef PRODUCT
2172       if (G1StressConcRegionFreeing) {
2173         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2174           os::sleep(Thread::current(), (jlong) 1, false);
2175         }
2176       }
2177 #endif
2178     }
2179   }
2180   assert(tmp_free_list.is_empty(), "post-condition");
2181 }
2182 
2183 // Supporting Object and Oop closures for reference discovery
2184 // and processing in during marking
2185 
2186 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2187   HeapWord* addr = (HeapWord*)obj;
2188   return addr != NULL &&
2189          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2190 }
2191 
2192 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2193 // Uses the CMTask associated with a worker thread (for serial reference
2194 // processing the CMTask for worker 0 is used) to preserve (mark) and
2195 // trace referent objects.
2196 //
2197 // Using the CMTask and embedded local queues avoids having the worker
2198 // threads operating on the global mark stack. This reduces the risk
2199 // of overflowing the stack - which we would rather avoid at this late
2200 // state. Also using the tasks' local queues removes the potential
2201 // of the workers interfering with each other that could occur if
2202 // operating on the global stack.
2203 
2204 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2205   ConcurrentMark* _cm;
2206   CMTask*         _task;
2207   int             _ref_counter_limit;
2208   int             _ref_counter;
2209   bool            _is_serial;
2210  public:
2211   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2212     _cm(cm), _task(task), _is_serial(is_serial),
2213     _ref_counter_limit(G1RefProcDrainInterval) {
2214     assert(_ref_counter_limit > 0, "sanity");
2215     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2216     _ref_counter = _ref_counter_limit;
2217   }
2218 
2219   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2220   virtual void do_oop(      oop* p) { do_oop_work(p); }
2221 
2222   template <class T> void do_oop_work(T* p) {
2223     if (!_cm->has_overflown()) {
2224       oop obj = oopDesc::load_decode_heap_oop(p);
2225       if (_cm->verbose_high()) {
2226         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2227                                "*"PTR_FORMAT" = "PTR_FORMAT,
2228                                _task->worker_id(), p2i(p), p2i((void*) obj));
2229       }
2230 
2231       _task->deal_with_reference(obj);
2232       _ref_counter--;
2233 
2234       if (_ref_counter == 0) {
2235         // We have dealt with _ref_counter_limit references, pushing them
2236         // and objects reachable from them on to the local stack (and
2237         // possibly the global stack). Call CMTask::do_marking_step() to
2238         // process these entries.
2239         //
2240         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2241         // there's nothing more to do (i.e. we're done with the entries that
2242         // were pushed as a result of the CMTask::deal_with_reference() calls
2243         // above) or we overflow.
2244         //
2245         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2246         // flag while there may still be some work to do. (See the comment at
2247         // the beginning of CMTask::do_marking_step() for those conditions -
2248         // one of which is reaching the specified time target.) It is only
2249         // when CMTask::do_marking_step() returns without setting the
2250         // has_aborted() flag that the marking step has completed.
2251         do {
2252           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2253           _task->do_marking_step(mark_step_duration_ms,
2254                                  false      /* do_termination */,
2255                                  _is_serial);
2256         } while (_task->has_aborted() && !_cm->has_overflown());
2257         _ref_counter = _ref_counter_limit;
2258       }
2259     } else {
2260       if (_cm->verbose_high()) {
2261          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2262       }
2263     }
2264   }
2265 };
2266 
2267 // 'Drain' oop closure used by both serial and parallel reference processing.
2268 // Uses the CMTask associated with a given worker thread (for serial
2269 // reference processing the CMtask for worker 0 is used). Calls the
2270 // do_marking_step routine, with an unbelievably large timeout value,
2271 // to drain the marking data structures of the remaining entries
2272 // added by the 'keep alive' oop closure above.
2273 
2274 class G1CMDrainMarkingStackClosure: public VoidClosure {
2275   ConcurrentMark* _cm;
2276   CMTask*         _task;
2277   bool            _is_serial;
2278  public:
2279   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2280     _cm(cm), _task(task), _is_serial(is_serial) {
2281     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2282   }
2283 
2284   void do_void() {
2285     do {
2286       if (_cm->verbose_high()) {
2287         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2288                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2289       }
2290 
2291       // We call CMTask::do_marking_step() to completely drain the local
2292       // and global marking stacks of entries pushed by the 'keep alive'
2293       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2294       //
2295       // CMTask::do_marking_step() is called in a loop, which we'll exit
2296       // if there's nothing more to do (i.e. we've completely drained the
2297       // entries that were pushed as a a result of applying the 'keep alive'
2298       // closure to the entries on the discovered ref lists) or we overflow
2299       // the global marking stack.
2300       //
2301       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2302       // flag while there may still be some work to do. (See the comment at
2303       // the beginning of CMTask::do_marking_step() for those conditions -
2304       // one of which is reaching the specified time target.) It is only
2305       // when CMTask::do_marking_step() returns without setting the
2306       // has_aborted() flag that the marking step has completed.
2307 
2308       _task->do_marking_step(1000000000.0 /* something very large */,
2309                              true         /* do_termination */,
2310                              _is_serial);
2311     } while (_task->has_aborted() && !_cm->has_overflown());
2312   }
2313 };
2314 
2315 // Implementation of AbstractRefProcTaskExecutor for parallel
2316 // reference processing at the end of G1 concurrent marking
2317 
2318 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2319 private:
2320   G1CollectedHeap* _g1h;
2321   ConcurrentMark*  _cm;
2322   WorkGang*        _workers;
2323   int              _active_workers;
2324 
2325 public:
2326   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2327                         ConcurrentMark* cm,
2328                         WorkGang* workers,
2329                         int n_workers) :
2330     _g1h(g1h), _cm(cm),
2331     _workers(workers), _active_workers(n_workers) { }
2332 
2333   // Executes the given task using concurrent marking worker threads.
2334   virtual void execute(ProcessTask& task);
2335   virtual void execute(EnqueueTask& task);
2336 };
2337 
2338 class G1CMRefProcTaskProxy: public AbstractGangTask {
2339   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2340   ProcessTask&     _proc_task;
2341   G1CollectedHeap* _g1h;
2342   ConcurrentMark*  _cm;
2343 
2344 public:
2345   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2346                      G1CollectedHeap* g1h,
2347                      ConcurrentMark* cm) :
2348     AbstractGangTask("Process reference objects in parallel"),
2349     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2350     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2351     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2352   }
2353 
2354   virtual void work(uint worker_id) {
2355     ResourceMark rm;
2356     HandleMark hm;
2357     CMTask* task = _cm->task(worker_id);
2358     G1CMIsAliveClosure g1_is_alive(_g1h);
2359     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2360     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2361 
2362     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2363   }
2364 };
2365 
2366 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2367   assert(_workers != NULL, "Need parallel worker threads.");
2368   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2369 
2370   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2371 
2372   // We need to reset the concurrency level before each
2373   // proxy task execution, so that the termination protocol
2374   // and overflow handling in CMTask::do_marking_step() knows
2375   // how many workers to wait for.
2376   _cm->set_concurrency(_active_workers);
2377   _g1h->set_par_threads(_active_workers);
2378   _workers->run_task(&proc_task_proxy);
2379   _g1h->set_par_threads(0);
2380 }
2381 
2382 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2383   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2384   EnqueueTask& _enq_task;
2385 
2386 public:
2387   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2388     AbstractGangTask("Enqueue reference objects in parallel"),
2389     _enq_task(enq_task) { }
2390 
2391   virtual void work(uint worker_id) {
2392     _enq_task.work(worker_id);
2393   }
2394 };
2395 
2396 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2397   assert(_workers != NULL, "Need parallel worker threads.");
2398   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2399 
2400   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2401 
2402   // Not strictly necessary but...
2403   //
2404   // We need to reset the concurrency level before each
2405   // proxy task execution, so that the termination protocol
2406   // and overflow handling in CMTask::do_marking_step() knows
2407   // how many workers to wait for.
2408   _cm->set_concurrency(_active_workers);
2409   _g1h->set_par_threads(_active_workers);
2410   _workers->run_task(&enq_task_proxy);
2411   _g1h->set_par_threads(0);
2412 }
2413 
2414 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2415   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2416 }
2417 
2418 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2419   if (has_overflown()) {
2420     // Skip processing the discovered references if we have
2421     // overflown the global marking stack. Reference objects
2422     // only get discovered once so it is OK to not
2423     // de-populate the discovered reference lists. We could have,
2424     // but the only benefit would be that, when marking restarts,
2425     // less reference objects are discovered.
2426     return;
2427   }
2428 
2429   ResourceMark rm;
2430   HandleMark   hm;
2431 
2432   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2433 
2434   // Is alive closure.
2435   G1CMIsAliveClosure g1_is_alive(g1h);
2436 
2437   // Inner scope to exclude the cleaning of the string and symbol
2438   // tables from the displayed time.
2439   {
2440     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2441 
2442     ReferenceProcessor* rp = g1h->ref_processor_cm();
2443 
2444     // See the comment in G1CollectedHeap::ref_processing_init()
2445     // about how reference processing currently works in G1.
2446 
2447     // Set the soft reference policy
2448     rp->setup_policy(clear_all_soft_refs);
2449     assert(_markStack.isEmpty(), "mark stack should be empty");
2450 
2451     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2452     // in serial reference processing. Note these closures are also
2453     // used for serially processing (by the the current thread) the
2454     // JNI references during parallel reference processing.
2455     //
2456     // These closures do not need to synchronize with the worker
2457     // threads involved in parallel reference processing as these
2458     // instances are executed serially by the current thread (e.g.
2459     // reference processing is not multi-threaded and is thus
2460     // performed by the current thread instead of a gang worker).
2461     //
2462     // The gang tasks involved in parallel reference processing create
2463     // their own instances of these closures, which do their own
2464     // synchronization among themselves.
2465     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2466     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2467 
2468     // We need at least one active thread. If reference processing
2469     // is not multi-threaded we use the current (VMThread) thread,
2470     // otherwise we use the work gang from the G1CollectedHeap and
2471     // we utilize all the worker threads we can.
2472     bool processing_is_mt = rp->processing_is_mt();
2473     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2474     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2475 
2476     // Parallel processing task executor.
2477     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2478                                               g1h->workers(), active_workers);
2479     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2480 
2481     // Set the concurrency level. The phase was already set prior to
2482     // executing the remark task.
2483     set_concurrency(active_workers);
2484 
2485     // Set the degree of MT processing here.  If the discovery was done MT,
2486     // the number of threads involved during discovery could differ from
2487     // the number of active workers.  This is OK as long as the discovered
2488     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2489     rp->set_active_mt_degree(active_workers);
2490 
2491     // Process the weak references.
2492     const ReferenceProcessorStats& stats =
2493         rp->process_discovered_references(&g1_is_alive,
2494                                           &g1_keep_alive,
2495                                           &g1_drain_mark_stack,
2496                                           executor,
2497                                           g1h->gc_timer_cm(),
2498                                           concurrent_gc_id());
2499     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2500 
2501     // The do_oop work routines of the keep_alive and drain_marking_stack
2502     // oop closures will set the has_overflown flag if we overflow the
2503     // global marking stack.
2504 
2505     assert(_markStack.overflow() || _markStack.isEmpty(),
2506             "mark stack should be empty (unless it overflowed)");
2507 
2508     if (_markStack.overflow()) {
2509       // This should have been done already when we tried to push an
2510       // entry on to the global mark stack. But let's do it again.
2511       set_has_overflown();
2512     }
2513 
2514     assert(rp->num_q() == active_workers, "why not");
2515 
2516     rp->enqueue_discovered_references(executor);
2517 
2518     rp->verify_no_references_recorded();
2519     assert(!rp->discovery_enabled(), "Post condition");
2520   }
2521 
2522   if (has_overflown()) {
2523     // We can not trust g1_is_alive if the marking stack overflowed
2524     return;
2525   }
2526 
2527   assert(_markStack.isEmpty(), "Marking should have completed");
2528 
2529   // Unload Klasses, String, Symbols, Code Cache, etc.
2530   {
2531     G1CMTraceTime trace("Unloading", G1Log::finer());
2532 
2533     if (ClassUnloadingWithConcurrentMark) {
2534       bool purged_classes;
2535 
2536       {
2537         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2538         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2539       }
2540 
2541       {
2542         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2543         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2544       }
2545     }
2546 
2547     if (G1StringDedup::is_enabled()) {
2548       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2549       G1StringDedup::unlink(&g1_is_alive);
2550     }
2551   }
2552 }
2553 
2554 void ConcurrentMark::swapMarkBitMaps() {
2555   CMBitMapRO* temp = _prevMarkBitMap;
2556   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2557   _nextMarkBitMap  = (CMBitMap*)  temp;
2558 }
2559 
2560 class CMObjectClosure;
2561 
2562 // Closure for iterating over objects, currently only used for
2563 // processing SATB buffers.
2564 class CMObjectClosure : public ObjectClosure {
2565 private:
2566   CMTask* _task;
2567 
2568 public:
2569   void do_object(oop obj) {
2570     _task->deal_with_reference(obj);
2571   }
2572 
2573   CMObjectClosure(CMTask* task) : _task(task) { }
2574 };
2575 
2576 class G1RemarkThreadsClosure : public ThreadClosure {
2577   CMObjectClosure _cm_obj;
2578   G1CMOopClosure _cm_cl;
2579   MarkingCodeBlobClosure _code_cl;
2580   int _thread_parity;
2581 
2582  public:
2583   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2584     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2585     _thread_parity(Threads::thread_claim_parity()) {}
2586 
2587   void do_thread(Thread* thread) {
2588     if (thread->is_Java_thread()) {
2589       if (thread->claim_oops_do(true, _thread_parity)) {
2590         JavaThread* jt = (JavaThread*)thread;
2591 
2592         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2593         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2594         // * Alive if on the stack of an executing method
2595         // * Weakly reachable otherwise
2596         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2597         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2598         jt->nmethods_do(&_code_cl);
2599 
2600         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2601       }
2602     } else if (thread->is_VM_thread()) {
2603       if (thread->claim_oops_do(true, _thread_parity)) {
2604         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2605       }
2606     }
2607   }
2608 };
2609 
2610 class CMRemarkTask: public AbstractGangTask {
2611 private:
2612   ConcurrentMark* _cm;
2613 public:
2614   void work(uint worker_id) {
2615     // Since all available tasks are actually started, we should
2616     // only proceed if we're supposed to be active.
2617     if (worker_id < _cm->active_tasks()) {
2618       CMTask* task = _cm->task(worker_id);
2619       task->record_start_time();
2620       {
2621         ResourceMark rm;
2622         HandleMark hm;
2623 
2624         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2625         Threads::threads_do(&threads_f);
2626       }
2627 
2628       do {
2629         task->do_marking_step(1000000000.0 /* something very large */,
2630                               true         /* do_termination       */,
2631                               false        /* is_serial            */);
2632       } while (task->has_aborted() && !_cm->has_overflown());
2633       // If we overflow, then we do not want to restart. We instead
2634       // want to abort remark and do concurrent marking again.
2635       task->record_end_time();
2636     }
2637   }
2638 
2639   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2640     AbstractGangTask("Par Remark"), _cm(cm) {
2641     _cm->terminator()->reset_for_reuse(active_workers);
2642   }
2643 };
2644 
2645 void ConcurrentMark::checkpointRootsFinalWork() {
2646   ResourceMark rm;
2647   HandleMark   hm;
2648   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2649 
2650   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2651 
2652   g1h->ensure_parsability(false);
2653 
2654   StrongRootsScope srs;
2655   // this is remark, so we'll use up all active threads
2656   uint active_workers = g1h->workers()->active_workers();
2657   if (active_workers == 0) {
2658     assert(active_workers > 0, "Should have been set earlier");
2659     active_workers = (uint) ParallelGCThreads;
2660     g1h->workers()->set_active_workers(active_workers);
2661   }
2662   set_concurrency_and_phase(active_workers, false /* concurrent */);
2663   // Leave _parallel_marking_threads at it's
2664   // value originally calculated in the ConcurrentMark
2665   // constructor and pass values of the active workers
2666   // through the gang in the task.
2667 
2668   CMRemarkTask remarkTask(this, active_workers);
2669   // We will start all available threads, even if we decide that the
2670   // active_workers will be fewer. The extra ones will just bail out
2671   // immediately.
2672   g1h->set_par_threads(active_workers);
2673   g1h->workers()->run_task(&remarkTask);
2674   g1h->set_par_threads(0);
2675 
2676   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2677   guarantee(has_overflown() ||
2678             satb_mq_set.completed_buffers_num() == 0,
2679             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2680                     BOOL_TO_STR(has_overflown()),
2681                     satb_mq_set.completed_buffers_num()));
2682 
2683   print_stats();
2684 }
2685 
2686 #ifndef PRODUCT
2687 
2688 class PrintReachableOopClosure: public OopClosure {
2689 private:
2690   G1CollectedHeap* _g1h;
2691   outputStream*    _out;
2692   VerifyOption     _vo;
2693   bool             _all;
2694 
2695 public:
2696   PrintReachableOopClosure(outputStream* out,
2697                            VerifyOption  vo,
2698                            bool          all) :
2699     _g1h(G1CollectedHeap::heap()),
2700     _out(out), _vo(vo), _all(all) { }
2701 
2702   void do_oop(narrowOop* p) { do_oop_work(p); }
2703   void do_oop(      oop* p) { do_oop_work(p); }
2704 
2705   template <class T> void do_oop_work(T* p) {
2706     oop         obj = oopDesc::load_decode_heap_oop(p);
2707     const char* str = NULL;
2708     const char* str2 = "";
2709 
2710     if (obj == NULL) {
2711       str = "";
2712     } else if (!_g1h->is_in_g1_reserved(obj)) {
2713       str = " O";
2714     } else {
2715       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2716       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2717       bool marked = _g1h->is_marked(obj, _vo);
2718 
2719       if (over_tams) {
2720         str = " >";
2721         if (marked) {
2722           str2 = " AND MARKED";
2723         }
2724       } else if (marked) {
2725         str = " M";
2726       } else {
2727         str = " NOT";
2728       }
2729     }
2730 
2731     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2732                    p2i(p), p2i((void*) obj), str, str2);
2733   }
2734 };
2735 
2736 class PrintReachableObjectClosure : public ObjectClosure {
2737 private:
2738   G1CollectedHeap* _g1h;
2739   outputStream*    _out;
2740   VerifyOption     _vo;
2741   bool             _all;
2742   HeapRegion*      _hr;
2743 
2744 public:
2745   PrintReachableObjectClosure(outputStream* out,
2746                               VerifyOption  vo,
2747                               bool          all,
2748                               HeapRegion*   hr) :
2749     _g1h(G1CollectedHeap::heap()),
2750     _out(out), _vo(vo), _all(all), _hr(hr) { }
2751 
2752   void do_object(oop o) {
2753     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2754     bool marked = _g1h->is_marked(o, _vo);
2755     bool print_it = _all || over_tams || marked;
2756 
2757     if (print_it) {
2758       _out->print_cr(" "PTR_FORMAT"%s",
2759                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2760       PrintReachableOopClosure oopCl(_out, _vo, _all);
2761       o->oop_iterate_no_header(&oopCl);
2762     }
2763   }
2764 };
2765 
2766 class PrintReachableRegionClosure : public HeapRegionClosure {
2767 private:
2768   G1CollectedHeap* _g1h;
2769   outputStream*    _out;
2770   VerifyOption     _vo;
2771   bool             _all;
2772 
2773 public:
2774   bool doHeapRegion(HeapRegion* hr) {
2775     HeapWord* b = hr->bottom();
2776     HeapWord* e = hr->end();
2777     HeapWord* t = hr->top();
2778     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2779     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2780                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2781     _out->cr();
2782 
2783     HeapWord* from = b;
2784     HeapWord* to   = t;
2785 
2786     if (to > from) {
2787       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2788       _out->cr();
2789       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2790       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2791       _out->cr();
2792     }
2793 
2794     return false;
2795   }
2796 
2797   PrintReachableRegionClosure(outputStream* out,
2798                               VerifyOption  vo,
2799                               bool          all) :
2800     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2801 };
2802 
2803 void ConcurrentMark::print_reachable(const char* str,
2804                                      VerifyOption vo,
2805                                      bool all) {
2806   gclog_or_tty->cr();
2807   gclog_or_tty->print_cr("== Doing heap dump... ");
2808 
2809   if (G1PrintReachableBaseFile == NULL) {
2810     gclog_or_tty->print_cr("  #### error: no base file defined");
2811     return;
2812   }
2813 
2814   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2815       (JVM_MAXPATHLEN - 1)) {
2816     gclog_or_tty->print_cr("  #### error: file name too long");
2817     return;
2818   }
2819 
2820   char file_name[JVM_MAXPATHLEN];
2821   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2822   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2823 
2824   fileStream fout(file_name);
2825   if (!fout.is_open()) {
2826     gclog_or_tty->print_cr("  #### error: could not open file");
2827     return;
2828   }
2829 
2830   outputStream* out = &fout;
2831   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2832   out->cr();
2833 
2834   out->print_cr("--- ITERATING OVER REGIONS");
2835   out->cr();
2836   PrintReachableRegionClosure rcl(out, vo, all);
2837   _g1h->heap_region_iterate(&rcl);
2838   out->cr();
2839 
2840   gclog_or_tty->print_cr("  done");
2841   gclog_or_tty->flush();
2842 }
2843 
2844 #endif // PRODUCT
2845 
2846 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2847   // Note we are overriding the read-only view of the prev map here, via
2848   // the cast.
2849   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2850 }
2851 
2852 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2853   _nextMarkBitMap->clearRange(mr);
2854 }
2855 
2856 HeapRegion*
2857 ConcurrentMark::claim_region(uint worker_id) {
2858   // "checkpoint" the finger
2859   HeapWord* finger = _finger;
2860 
2861   // _heap_end will not change underneath our feet; it only changes at
2862   // yield points.
2863   while (finger < _heap_end) {
2864     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2865 
2866     // Note on how this code handles humongous regions. In the
2867     // normal case the finger will reach the start of a "starts
2868     // humongous" (SH) region. Its end will either be the end of the
2869     // last "continues humongous" (CH) region in the sequence, or the
2870     // standard end of the SH region (if the SH is the only region in
2871     // the sequence). That way claim_region() will skip over the CH
2872     // regions. However, there is a subtle race between a CM thread
2873     // executing this method and a mutator thread doing a humongous
2874     // object allocation. The two are not mutually exclusive as the CM
2875     // thread does not need to hold the Heap_lock when it gets
2876     // here. So there is a chance that claim_region() will come across
2877     // a free region that's in the progress of becoming a SH or a CH
2878     // region. In the former case, it will either
2879     //   a) Miss the update to the region's end, in which case it will
2880     //      visit every subsequent CH region, will find their bitmaps
2881     //      empty, and do nothing, or
2882     //   b) Will observe the update of the region's end (in which case
2883     //      it will skip the subsequent CH regions).
2884     // If it comes across a region that suddenly becomes CH, the
2885     // scenario will be similar to b). So, the race between
2886     // claim_region() and a humongous object allocation might force us
2887     // to do a bit of unnecessary work (due to some unnecessary bitmap
2888     // iterations) but it should not introduce and correctness issues.
2889     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2890 
2891     // Above heap_region_containing_raw may return NULL as we always scan claim
2892     // until the end of the heap. In this case, just jump to the next region.
2893     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2894 
2895     // Is the gap between reading the finger and doing the CAS too long?
2896     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2897     if (res == finger && curr_region != NULL) {
2898       // we succeeded
2899       HeapWord*   bottom        = curr_region->bottom();
2900       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2901 
2902       if (verbose_low()) {
2903         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2904                                "["PTR_FORMAT", "PTR_FORMAT"), "
2905                                "limit = "PTR_FORMAT,
2906                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2907       }
2908 
2909       // notice that _finger == end cannot be guaranteed here since,
2910       // someone else might have moved the finger even further
2911       assert(_finger >= end, "the finger should have moved forward");
2912 
2913       if (verbose_low()) {
2914         gclog_or_tty->print_cr("[%u] we were successful with region = "
2915                                PTR_FORMAT, worker_id, p2i(curr_region));
2916       }
2917 
2918       if (limit > bottom) {
2919         if (verbose_low()) {
2920           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2921                                  "returning it ", worker_id, p2i(curr_region));
2922         }
2923         return curr_region;
2924       } else {
2925         assert(limit == bottom,
2926                "the region limit should be at bottom");
2927         if (verbose_low()) {
2928           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2929                                  "returning NULL", worker_id, p2i(curr_region));
2930         }
2931         // we return NULL and the caller should try calling
2932         // claim_region() again.
2933         return NULL;
2934       }
2935     } else {
2936       assert(_finger > finger, "the finger should have moved forward");
2937       if (verbose_low()) {
2938         if (curr_region == NULL) {
2939           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2940                                  "global finger = "PTR_FORMAT", "
2941                                  "our finger = "PTR_FORMAT,
2942                                  worker_id, p2i(_finger), p2i(finger));
2943         } else {
2944           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2945                                  "global finger = "PTR_FORMAT", "
2946                                  "our finger = "PTR_FORMAT,
2947                                  worker_id, p2i(_finger), p2i(finger));
2948         }
2949       }
2950 
2951       // read it again
2952       finger = _finger;
2953     }
2954   }
2955 
2956   return NULL;
2957 }
2958 
2959 #ifndef PRODUCT
2960 enum VerifyNoCSetOopsPhase {
2961   VerifyNoCSetOopsStack,
2962   VerifyNoCSetOopsQueues,
2963   VerifyNoCSetOopsSATBCompleted,
2964   VerifyNoCSetOopsSATBThread
2965 };
2966 
2967 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2968 private:
2969   G1CollectedHeap* _g1h;
2970   VerifyNoCSetOopsPhase _phase;
2971   int _info;
2972 
2973   const char* phase_str() {
2974     switch (_phase) {
2975     case VerifyNoCSetOopsStack:         return "Stack";
2976     case VerifyNoCSetOopsQueues:        return "Queue";
2977     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2978     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2979     default:                            ShouldNotReachHere();
2980     }
2981     return NULL;
2982   }
2983 
2984   void do_object_work(oop obj) {
2985     guarantee(!_g1h->obj_in_cs(obj),
2986               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2987                       p2i((void*) obj), phase_str(), _info));
2988   }
2989 
2990 public:
2991   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2992 
2993   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2994     _phase = phase;
2995     _info = info;
2996   }
2997 
2998   virtual void do_oop(oop* p) {
2999     oop obj = oopDesc::load_decode_heap_oop(p);
3000     do_object_work(obj);
3001   }
3002 
3003   virtual void do_oop(narrowOop* p) {
3004     // We should not come across narrow oops while scanning marking
3005     // stacks and SATB buffers.
3006     ShouldNotReachHere();
3007   }
3008 
3009   virtual void do_object(oop obj) {
3010     do_object_work(obj);
3011   }
3012 };
3013 
3014 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3015                                          bool verify_enqueued_buffers,
3016                                          bool verify_thread_buffers,
3017                                          bool verify_fingers) {
3018   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3019   if (!G1CollectedHeap::heap()->mark_in_progress()) {
3020     return;
3021   }
3022 
3023   VerifyNoCSetOopsClosure cl;
3024 
3025   if (verify_stacks) {
3026     // Verify entries on the global mark stack
3027     cl.set_phase(VerifyNoCSetOopsStack);
3028     _markStack.oops_do(&cl);
3029 
3030     // Verify entries on the task queues
3031     for (uint i = 0; i < _max_worker_id; i += 1) {
3032       cl.set_phase(VerifyNoCSetOopsQueues, i);
3033       CMTaskQueue* queue = _task_queues->queue(i);
3034       queue->oops_do(&cl);
3035     }
3036   }
3037 
3038   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3039 
3040   // Verify entries on the enqueued SATB buffers
3041   if (verify_enqueued_buffers) {
3042     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3043     satb_qs.iterate_completed_buffers_read_only(&cl);
3044   }
3045 
3046   // Verify entries on the per-thread SATB buffers
3047   if (verify_thread_buffers) {
3048     cl.set_phase(VerifyNoCSetOopsSATBThread);
3049     satb_qs.iterate_thread_buffers_read_only(&cl);
3050   }
3051 
3052   if (verify_fingers) {
3053     // Verify the global finger
3054     HeapWord* global_finger = finger();
3055     if (global_finger != NULL && global_finger < _heap_end) {
3056       // The global finger always points to a heap region boundary. We
3057       // use heap_region_containing_raw() to get the containing region
3058       // given that the global finger could be pointing to a free region
3059       // which subsequently becomes continues humongous. If that
3060       // happens, heap_region_containing() will return the bottom of the
3061       // corresponding starts humongous region and the check below will
3062       // not hold any more.
3063       // Since we always iterate over all regions, we might get a NULL HeapRegion
3064       // here.
3065       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3066       guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3067                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3068                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3069     }
3070 
3071     // Verify the task fingers
3072     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3073     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3074       CMTask* task = _tasks[i];
3075       HeapWord* task_finger = task->finger();
3076       if (task_finger != NULL && task_finger < _heap_end) {
3077         // See above note on the global finger verification.
3078         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3079         guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3080                   !task_hr->in_collection_set(),
3081                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3082                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3083       }
3084     }
3085   }
3086 }
3087 #endif // PRODUCT
3088 
3089 // Aggregate the counting data that was constructed concurrently
3090 // with marking.
3091 class AggregateCountDataHRClosure: public HeapRegionClosure {
3092   G1CollectedHeap* _g1h;
3093   ConcurrentMark* _cm;
3094   CardTableModRefBS* _ct_bs;
3095   BitMap* _cm_card_bm;
3096   uint _max_worker_id;
3097 
3098  public:
3099   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3100                               BitMap* cm_card_bm,
3101                               uint max_worker_id) :
3102     _g1h(g1h), _cm(g1h->concurrent_mark()),
3103     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
3104     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3105 
3106   bool doHeapRegion(HeapRegion* hr) {
3107     if (hr->is_continues_humongous()) {
3108       // We will ignore these here and process them when their
3109       // associated "starts humongous" region is processed.
3110       // Note that we cannot rely on their associated
3111       // "starts humongous" region to have their bit set to 1
3112       // since, due to the region chunking in the parallel region
3113       // iteration, a "continues humongous" region might be visited
3114       // before its associated "starts humongous".
3115       return false;
3116     }
3117 
3118     HeapWord* start = hr->bottom();
3119     HeapWord* limit = hr->next_top_at_mark_start();
3120     HeapWord* end = hr->end();
3121 
3122     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3123            err_msg("Preconditions not met - "
3124                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3125                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3126                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3127 
3128     assert(hr->next_marked_bytes() == 0, "Precondition");
3129 
3130     if (start == limit) {
3131       // NTAMS of this region has not been set so nothing to do.
3132       return false;
3133     }
3134 
3135     // 'start' should be in the heap.
3136     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3137     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3138     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3139 
3140     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3141     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3142     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3143 
3144     // If ntams is not card aligned then we bump card bitmap index
3145     // for limit so that we get the all the cards spanned by
3146     // the object ending at ntams.
3147     // Note: if this is the last region in the heap then ntams
3148     // could be actually just beyond the end of the the heap;
3149     // limit_idx will then  correspond to a (non-existent) card
3150     // that is also outside the heap.
3151     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3152       limit_idx += 1;
3153     }
3154 
3155     assert(limit_idx <= end_idx, "or else use atomics");
3156 
3157     // Aggregate the "stripe" in the count data associated with hr.
3158     uint hrm_index = hr->hrm_index();
3159     size_t marked_bytes = 0;
3160 
3161     for (uint i = 0; i < _max_worker_id; i += 1) {
3162       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3163       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3164 
3165       // Fetch the marked_bytes in this region for task i and
3166       // add it to the running total for this region.
3167       marked_bytes += marked_bytes_array[hrm_index];
3168 
3169       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3170       // into the global card bitmap.
3171       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3172 
3173       while (scan_idx < limit_idx) {
3174         assert(task_card_bm->at(scan_idx) == true, "should be");
3175         _cm_card_bm->set_bit(scan_idx);
3176         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3177 
3178         // BitMap::get_next_one_offset() can handle the case when
3179         // its left_offset parameter is greater than its right_offset
3180         // parameter. It does, however, have an early exit if
3181         // left_offset == right_offset. So let's limit the value
3182         // passed in for left offset here.
3183         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3184         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3185       }
3186     }
3187 
3188     // Update the marked bytes for this region.
3189     hr->add_to_marked_bytes(marked_bytes);
3190 
3191     // Next heap region
3192     return false;
3193   }
3194 };
3195 
3196 class G1AggregateCountDataTask: public AbstractGangTask {
3197 protected:
3198   G1CollectedHeap* _g1h;
3199   ConcurrentMark* _cm;
3200   BitMap* _cm_card_bm;
3201   uint _max_worker_id;
3202   int _active_workers;
3203   HeapRegionClaimer _hrclaimer;
3204 
3205 public:
3206   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3207                            ConcurrentMark* cm,
3208                            BitMap* cm_card_bm,
3209                            uint max_worker_id,
3210                            int n_workers) :
3211       AbstractGangTask("Count Aggregation"),
3212       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3213       _max_worker_id(max_worker_id),
3214       _active_workers(n_workers),
3215       _hrclaimer(_active_workers) {
3216   }
3217 
3218   void work(uint worker_id) {
3219     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3220 
3221     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
3222   }
3223 };
3224 
3225 
3226 void ConcurrentMark::aggregate_count_data() {
3227   int n_workers = _g1h->workers()->active_workers();
3228 
3229   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3230                                            _max_worker_id, n_workers);
3231 
3232   _g1h->set_par_threads(n_workers);
3233   _g1h->workers()->run_task(&g1_par_agg_task);
3234   _g1h->set_par_threads(0);
3235 }
3236 
3237 // Clear the per-worker arrays used to store the per-region counting data
3238 void ConcurrentMark::clear_all_count_data() {
3239   // Clear the global card bitmap - it will be filled during
3240   // liveness count aggregation (during remark) and the
3241   // final counting task.
3242   _card_bm.clear();
3243 
3244   // Clear the global region bitmap - it will be filled as part
3245   // of the final counting task.
3246   _region_bm.clear();
3247 
3248   uint max_regions = _g1h->max_regions();
3249   assert(_max_worker_id > 0, "uninitialized");
3250 
3251   for (uint i = 0; i < _max_worker_id; i += 1) {
3252     BitMap* task_card_bm = count_card_bitmap_for(i);
3253     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3254 
3255     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3256     assert(marked_bytes_array != NULL, "uninitialized");
3257 
3258     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3259     task_card_bm->clear();
3260   }
3261 }
3262 
3263 void ConcurrentMark::print_stats() {
3264   if (verbose_stats()) {
3265     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3266     for (size_t i = 0; i < _active_tasks; ++i) {
3267       _tasks[i]->print_stats();
3268       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3269     }
3270   }
3271 }
3272 
3273 // abandon current marking iteration due to a Full GC
3274 void ConcurrentMark::abort() {
3275   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3276   // concurrent bitmap clearing.
3277   _nextMarkBitMap->clearAll();
3278 
3279   // Note we cannot clear the previous marking bitmap here
3280   // since VerifyDuringGC verifies the objects marked during
3281   // a full GC against the previous bitmap.
3282 
3283   // Clear the liveness counting data
3284   clear_all_count_data();
3285   // Empty mark stack
3286   reset_marking_state();
3287   for (uint i = 0; i < _max_worker_id; ++i) {
3288     _tasks[i]->clear_region_fields();
3289   }
3290   _first_overflow_barrier_sync.abort();
3291   _second_overflow_barrier_sync.abort();
3292   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3293   if (!gc_id.is_undefined()) {
3294     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3295     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3296     _aborted_gc_id = gc_id;
3297    }
3298   _has_aborted = true;
3299 
3300   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3301   satb_mq_set.abandon_partial_marking();
3302   // This can be called either during or outside marking, we'll read
3303   // the expected_active value from the SATB queue set.
3304   satb_mq_set.set_active_all_threads(
3305                                  false, /* new active value */
3306                                  satb_mq_set.is_active() /* expected_active */);
3307 
3308   _g1h->trace_heap_after_concurrent_cycle();
3309   _g1h->register_concurrent_cycle_end();
3310 }
3311 
3312 const GCId& ConcurrentMark::concurrent_gc_id() {
3313   if (has_aborted()) {
3314     return _aborted_gc_id;
3315   }
3316   return _g1h->gc_tracer_cm()->gc_id();
3317 }
3318 
3319 static void print_ms_time_info(const char* prefix, const char* name,
3320                                NumberSeq& ns) {
3321   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3322                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3323   if (ns.num() > 0) {
3324     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3325                            prefix, ns.sd(), ns.maximum());
3326   }
3327 }
3328 
3329 void ConcurrentMark::print_summary_info() {
3330   gclog_or_tty->print_cr(" Concurrent marking:");
3331   print_ms_time_info("  ", "init marks", _init_times);
3332   print_ms_time_info("  ", "remarks", _remark_times);
3333   {
3334     print_ms_time_info("     ", "final marks", _remark_mark_times);
3335     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3336 
3337   }
3338   print_ms_time_info("  ", "cleanups", _cleanup_times);
3339   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3340                          _total_counting_time,
3341                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3342                           (double)_cleanup_times.num()
3343                          : 0.0));
3344   if (G1ScrubRemSets) {
3345     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3346                            _total_rs_scrub_time,
3347                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3348                             (double)_cleanup_times.num()
3349                            : 0.0));
3350   }
3351   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3352                          (_init_times.sum() + _remark_times.sum() +
3353                           _cleanup_times.sum())/1000.0);
3354   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3355                 "(%8.2f s marking).",
3356                 cmThread()->vtime_accum(),
3357                 cmThread()->vtime_mark_accum());
3358 }
3359 
3360 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3361   _parallel_workers->print_worker_threads_on(st);
3362 }
3363 
3364 void ConcurrentMark::print_on_error(outputStream* st) const {
3365   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3366       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3367   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3368   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3369 }
3370 
3371 // We take a break if someone is trying to stop the world.
3372 bool ConcurrentMark::do_yield_check(uint worker_id) {
3373   if (SuspendibleThreadSet::should_yield()) {
3374     if (worker_id == 0) {
3375       _g1h->g1_policy()->record_concurrent_pause();
3376     }
3377     SuspendibleThreadSet::yield();
3378     return true;
3379   } else {
3380     return false;
3381   }
3382 }
3383 
3384 #ifndef PRODUCT
3385 // for debugging purposes
3386 void ConcurrentMark::print_finger() {
3387   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3388                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3389   for (uint i = 0; i < _max_worker_id; ++i) {
3390     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3391   }
3392   gclog_or_tty->cr();
3393 }
3394 #endif
3395 
3396 void CMTask::scan_object(oop obj) {
3397   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3398 
3399   if (_cm->verbose_high()) {
3400     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3401                            _worker_id, p2i((void*) obj));
3402   }
3403 
3404   size_t obj_size = obj->size();
3405   _words_scanned += obj_size;
3406 
3407   obj->oop_iterate(_cm_oop_closure);
3408   statsOnly( ++_objs_scanned );
3409   check_limits();
3410 }
3411 
3412 // Closure for iteration over bitmaps
3413 class CMBitMapClosure : public BitMapClosure {
3414 private:
3415   // the bitmap that is being iterated over
3416   CMBitMap*                   _nextMarkBitMap;
3417   ConcurrentMark*             _cm;
3418   CMTask*                     _task;
3419 
3420 public:
3421   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3422     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3423 
3424   bool do_bit(size_t offset) {
3425     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3426     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3427     assert( addr < _cm->finger(), "invariant");
3428 
3429     statsOnly( _task->increase_objs_found_on_bitmap() );
3430     assert(addr >= _task->finger(), "invariant");
3431 
3432     // We move that task's local finger along.
3433     _task->move_finger_to(addr);
3434 
3435     _task->scan_object(oop(addr));
3436     // we only partially drain the local queue and global stack
3437     _task->drain_local_queue(true);
3438     _task->drain_global_stack(true);
3439 
3440     // if the has_aborted flag has been raised, we need to bail out of
3441     // the iteration
3442     return !_task->has_aborted();
3443   }
3444 };
3445 
3446 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3447                                ConcurrentMark* cm,
3448                                CMTask* task)
3449   : _g1h(g1h), _cm(cm), _task(task) {
3450   assert(_ref_processor == NULL, "should be initialized to NULL");
3451 
3452   if (G1UseConcMarkReferenceProcessing) {
3453     _ref_processor = g1h->ref_processor_cm();
3454     assert(_ref_processor != NULL, "should not be NULL");
3455   }
3456 }
3457 
3458 void CMTask::setup_for_region(HeapRegion* hr) {
3459   assert(hr != NULL,
3460         "claim_region() should have filtered out NULL regions");
3461   assert(!hr->is_continues_humongous(),
3462         "claim_region() should have filtered out continues humongous regions");
3463 
3464   if (_cm->verbose_low()) {
3465     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3466                            _worker_id, p2i(hr));
3467   }
3468 
3469   _curr_region  = hr;
3470   _finger       = hr->bottom();
3471   update_region_limit();
3472 }
3473 
3474 void CMTask::update_region_limit() {
3475   HeapRegion* hr            = _curr_region;
3476   HeapWord* bottom          = hr->bottom();
3477   HeapWord* limit           = hr->next_top_at_mark_start();
3478 
3479   if (limit == bottom) {
3480     if (_cm->verbose_low()) {
3481       gclog_or_tty->print_cr("[%u] found an empty region "
3482                              "["PTR_FORMAT", "PTR_FORMAT")",
3483                              _worker_id, p2i(bottom), p2i(limit));
3484     }
3485     // The region was collected underneath our feet.
3486     // We set the finger to bottom to ensure that the bitmap
3487     // iteration that will follow this will not do anything.
3488     // (this is not a condition that holds when we set the region up,
3489     // as the region is not supposed to be empty in the first place)
3490     _finger = bottom;
3491   } else if (limit >= _region_limit) {
3492     assert(limit >= _finger, "peace of mind");
3493   } else {
3494     assert(limit < _region_limit, "only way to get here");
3495     // This can happen under some pretty unusual circumstances.  An
3496     // evacuation pause empties the region underneath our feet (NTAMS
3497     // at bottom). We then do some allocation in the region (NTAMS
3498     // stays at bottom), followed by the region being used as a GC
3499     // alloc region (NTAMS will move to top() and the objects
3500     // originally below it will be grayed). All objects now marked in
3501     // the region are explicitly grayed, if below the global finger,
3502     // and we do not need in fact to scan anything else. So, we simply
3503     // set _finger to be limit to ensure that the bitmap iteration
3504     // doesn't do anything.
3505     _finger = limit;
3506   }
3507 
3508   _region_limit = limit;
3509 }
3510 
3511 void CMTask::giveup_current_region() {
3512   assert(_curr_region != NULL, "invariant");
3513   if (_cm->verbose_low()) {
3514     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3515                            _worker_id, p2i(_curr_region));
3516   }
3517   clear_region_fields();
3518 }
3519 
3520 void CMTask::clear_region_fields() {
3521   // Values for these three fields that indicate that we're not
3522   // holding on to a region.
3523   _curr_region   = NULL;
3524   _finger        = NULL;
3525   _region_limit  = NULL;
3526 }
3527 
3528 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3529   if (cm_oop_closure == NULL) {
3530     assert(_cm_oop_closure != NULL, "invariant");
3531   } else {
3532     assert(_cm_oop_closure == NULL, "invariant");
3533   }
3534   _cm_oop_closure = cm_oop_closure;
3535 }
3536 
3537 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3538   guarantee(nextMarkBitMap != NULL, "invariant");
3539 
3540   if (_cm->verbose_low()) {
3541     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3542   }
3543 
3544   _nextMarkBitMap                = nextMarkBitMap;
3545   clear_region_fields();
3546 
3547   _calls                         = 0;
3548   _elapsed_time_ms               = 0.0;
3549   _termination_time_ms           = 0.0;
3550   _termination_start_time_ms     = 0.0;
3551 
3552 #if _MARKING_STATS_
3553   _aborted                       = 0;
3554   _aborted_overflow              = 0;
3555   _aborted_cm_aborted            = 0;
3556   _aborted_yield                 = 0;
3557   _aborted_timed_out             = 0;
3558   _aborted_satb                  = 0;
3559   _aborted_termination           = 0;
3560   _steal_attempts                = 0;
3561   _steals                        = 0;
3562   _local_pushes                  = 0;
3563   _local_pops                    = 0;
3564   _local_max_size                = 0;
3565   _objs_scanned                  = 0;
3566   _global_pushes                 = 0;
3567   _global_pops                   = 0;
3568   _global_max_size               = 0;
3569   _global_transfers_to           = 0;
3570   _global_transfers_from         = 0;
3571   _regions_claimed               = 0;
3572   _objs_found_on_bitmap          = 0;
3573   _satb_buffers_processed        = 0;
3574 #endif // _MARKING_STATS_
3575 }
3576 
3577 bool CMTask::should_exit_termination() {
3578   regular_clock_call();
3579   // This is called when we are in the termination protocol. We should
3580   // quit if, for some reason, this task wants to abort or the global
3581   // stack is not empty (this means that we can get work from it).
3582   return !_cm->mark_stack_empty() || has_aborted();
3583 }
3584 
3585 void CMTask::reached_limit() {
3586   assert(_words_scanned >= _words_scanned_limit ||
3587          _refs_reached >= _refs_reached_limit ,
3588          "shouldn't have been called otherwise");
3589   regular_clock_call();
3590 }
3591 
3592 void CMTask::regular_clock_call() {
3593   if (has_aborted()) return;
3594 
3595   // First, we need to recalculate the words scanned and refs reached
3596   // limits for the next clock call.
3597   recalculate_limits();
3598 
3599   // During the regular clock call we do the following
3600 
3601   // (1) If an overflow has been flagged, then we abort.
3602   if (_cm->has_overflown()) {
3603     set_has_aborted();
3604     return;
3605   }
3606 
3607   // If we are not concurrent (i.e. we're doing remark) we don't need
3608   // to check anything else. The other steps are only needed during
3609   // the concurrent marking phase.
3610   if (!concurrent()) return;
3611 
3612   // (2) If marking has been aborted for Full GC, then we also abort.
3613   if (_cm->has_aborted()) {
3614     set_has_aborted();
3615     statsOnly( ++_aborted_cm_aborted );
3616     return;
3617   }
3618 
3619   double curr_time_ms = os::elapsedVTime() * 1000.0;
3620 
3621   // (3) If marking stats are enabled, then we update the step history.
3622 #if _MARKING_STATS_
3623   if (_words_scanned >= _words_scanned_limit) {
3624     ++_clock_due_to_scanning;
3625   }
3626   if (_refs_reached >= _refs_reached_limit) {
3627     ++_clock_due_to_marking;
3628   }
3629 
3630   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3631   _interval_start_time_ms = curr_time_ms;
3632   _all_clock_intervals_ms.add(last_interval_ms);
3633 
3634   if (_cm->verbose_medium()) {
3635       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3636                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3637                         _worker_id, last_interval_ms,
3638                         _words_scanned,
3639                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3640                         _refs_reached,
3641                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3642   }
3643 #endif // _MARKING_STATS_
3644 
3645   // (4) We check whether we should yield. If we have to, then we abort.
3646   if (SuspendibleThreadSet::should_yield()) {
3647     // We should yield. To do this we abort the task. The caller is
3648     // responsible for yielding.
3649     set_has_aborted();
3650     statsOnly( ++_aborted_yield );
3651     return;
3652   }
3653 
3654   // (5) We check whether we've reached our time quota. If we have,
3655   // then we abort.
3656   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3657   if (elapsed_time_ms > _time_target_ms) {
3658     set_has_aborted();
3659     _has_timed_out = true;
3660     statsOnly( ++_aborted_timed_out );
3661     return;
3662   }
3663 
3664   // (6) Finally, we check whether there are enough completed STAB
3665   // buffers available for processing. If there are, we abort.
3666   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3667   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3668     if (_cm->verbose_low()) {
3669       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3670                              _worker_id);
3671     }
3672     // we do need to process SATB buffers, we'll abort and restart
3673     // the marking task to do so
3674     set_has_aborted();
3675     statsOnly( ++_aborted_satb );
3676     return;
3677   }
3678 }
3679 
3680 void CMTask::recalculate_limits() {
3681   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3682   _words_scanned_limit      = _real_words_scanned_limit;
3683 
3684   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3685   _refs_reached_limit       = _real_refs_reached_limit;
3686 }
3687 
3688 void CMTask::decrease_limits() {
3689   // This is called when we believe that we're going to do an infrequent
3690   // operation which will increase the per byte scanned cost (i.e. move
3691   // entries to/from the global stack). It basically tries to decrease the
3692   // scanning limit so that the clock is called earlier.
3693 
3694   if (_cm->verbose_medium()) {
3695     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3696   }
3697 
3698   _words_scanned_limit = _real_words_scanned_limit -
3699     3 * words_scanned_period / 4;
3700   _refs_reached_limit  = _real_refs_reached_limit -
3701     3 * refs_reached_period / 4;
3702 }
3703 
3704 void CMTask::move_entries_to_global_stack() {
3705   // local array where we'll store the entries that will be popped
3706   // from the local queue
3707   oop buffer[global_stack_transfer_size];
3708 
3709   int n = 0;
3710   oop obj;
3711   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3712     buffer[n] = obj;
3713     ++n;
3714   }
3715 
3716   if (n > 0) {
3717     // we popped at least one entry from the local queue
3718 
3719     statsOnly( ++_global_transfers_to; _local_pops += n );
3720 
3721     if (!_cm->mark_stack_push(buffer, n)) {
3722       if (_cm->verbose_low()) {
3723         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3724                                _worker_id);
3725       }
3726       set_has_aborted();
3727     } else {
3728       // the transfer was successful
3729 
3730       if (_cm->verbose_medium()) {
3731         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3732                                _worker_id, n);
3733       }
3734       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3735                  if (tmp_size > _global_max_size) {
3736                    _global_max_size = tmp_size;
3737                  }
3738                  _global_pushes += n );
3739     }
3740   }
3741 
3742   // this operation was quite expensive, so decrease the limits
3743   decrease_limits();
3744 }
3745 
3746 void CMTask::get_entries_from_global_stack() {
3747   // local array where we'll store the entries that will be popped
3748   // from the global stack.
3749   oop buffer[global_stack_transfer_size];
3750   int n;
3751   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3752   assert(n <= global_stack_transfer_size,
3753          "we should not pop more than the given limit");
3754   if (n > 0) {
3755     // yes, we did actually pop at least one entry
3756 
3757     statsOnly( ++_global_transfers_from; _global_pops += n );
3758     if (_cm->verbose_medium()) {
3759       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3760                              _worker_id, n);
3761     }
3762     for (int i = 0; i < n; ++i) {
3763       bool success = _task_queue->push(buffer[i]);
3764       // We only call this when the local queue is empty or under a
3765       // given target limit. So, we do not expect this push to fail.
3766       assert(success, "invariant");
3767     }
3768 
3769     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3770                if (tmp_size > _local_max_size) {
3771                  _local_max_size = tmp_size;
3772                }
3773                _local_pushes += n );
3774   }
3775 
3776   // this operation was quite expensive, so decrease the limits
3777   decrease_limits();
3778 }
3779 
3780 void CMTask::drain_local_queue(bool partially) {
3781   if (has_aborted()) return;
3782 
3783   // Decide what the target size is, depending whether we're going to
3784   // drain it partially (so that other tasks can steal if they run out
3785   // of things to do) or totally (at the very end).
3786   size_t target_size;
3787   if (partially) {
3788     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3789   } else {
3790     target_size = 0;
3791   }
3792 
3793   if (_task_queue->size() > target_size) {
3794     if (_cm->verbose_high()) {
3795       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3796                              _worker_id, target_size);
3797     }
3798 
3799     oop obj;
3800     bool ret = _task_queue->pop_local(obj);
3801     while (ret) {
3802       statsOnly( ++_local_pops );
3803 
3804       if (_cm->verbose_high()) {
3805         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3806                                p2i((void*) obj));
3807       }
3808 
3809       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3810       assert(!_g1h->is_on_master_free_list(
3811                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3812 
3813       scan_object(obj);
3814 
3815       if (_task_queue->size() <= target_size || has_aborted()) {
3816         ret = false;
3817       } else {
3818         ret = _task_queue->pop_local(obj);
3819       }
3820     }
3821 
3822     if (_cm->verbose_high()) {
3823       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3824                              _worker_id, _task_queue->size());
3825     }
3826   }
3827 }
3828 
3829 void CMTask::drain_global_stack(bool partially) {
3830   if (has_aborted()) return;
3831 
3832   // We have a policy to drain the local queue before we attempt to
3833   // drain the global stack.
3834   assert(partially || _task_queue->size() == 0, "invariant");
3835 
3836   // Decide what the target size is, depending whether we're going to
3837   // drain it partially (so that other tasks can steal if they run out
3838   // of things to do) or totally (at the very end).  Notice that,
3839   // because we move entries from the global stack in chunks or
3840   // because another task might be doing the same, we might in fact
3841   // drop below the target. But, this is not a problem.
3842   size_t target_size;
3843   if (partially) {
3844     target_size = _cm->partial_mark_stack_size_target();
3845   } else {
3846     target_size = 0;
3847   }
3848 
3849   if (_cm->mark_stack_size() > target_size) {
3850     if (_cm->verbose_low()) {
3851       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3852                              _worker_id, target_size);
3853     }
3854 
3855     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3856       get_entries_from_global_stack();
3857       drain_local_queue(partially);
3858     }
3859 
3860     if (_cm->verbose_low()) {
3861       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3862                              _worker_id, _cm->mark_stack_size());
3863     }
3864   }
3865 }
3866 
3867 // SATB Queue has several assumptions on whether to call the par or
3868 // non-par versions of the methods. this is why some of the code is
3869 // replicated. We should really get rid of the single-threaded version
3870 // of the code to simplify things.
3871 void CMTask::drain_satb_buffers() {
3872   if (has_aborted()) return;
3873 
3874   // We set this so that the regular clock knows that we're in the
3875   // middle of draining buffers and doesn't set the abort flag when it
3876   // notices that SATB buffers are available for draining. It'd be
3877   // very counter productive if it did that. :-)
3878   _draining_satb_buffers = true;
3879 
3880   CMObjectClosure oc(this);
3881   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3882   satb_mq_set.set_closure(_worker_id, &oc);
3883 
3884   // This keeps claiming and applying the closure to completed buffers
3885   // until we run out of buffers or we need to abort.
3886   while (!has_aborted() &&
3887          satb_mq_set.apply_closure_to_completed_buffer(_worker_id)) {
3888     if (_cm->verbose_medium()) {
3889       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3890     }
3891     statsOnly( ++_satb_buffers_processed );
3892     regular_clock_call();
3893   }
3894 
3895   _draining_satb_buffers = false;
3896 
3897   assert(has_aborted() ||
3898          concurrent() ||
3899          satb_mq_set.completed_buffers_num() == 0, "invariant");
3900 
3901   satb_mq_set.set_closure(_worker_id, NULL);
3902 
3903   // again, this was a potentially expensive operation, decrease the
3904   // limits to get the regular clock call early
3905   decrease_limits();
3906 }
3907 
3908 void CMTask::print_stats() {
3909   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3910                          _worker_id, _calls);
3911   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3912                          _elapsed_time_ms, _termination_time_ms);
3913   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3914                          _step_times_ms.num(), _step_times_ms.avg(),
3915                          _step_times_ms.sd());
3916   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3917                          _step_times_ms.maximum(), _step_times_ms.sum());
3918 
3919 #if _MARKING_STATS_
3920   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3921                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3922                          _all_clock_intervals_ms.sd());
3923   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3924                          _all_clock_intervals_ms.maximum(),
3925                          _all_clock_intervals_ms.sum());
3926   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3927                          _clock_due_to_scanning, _clock_due_to_marking);
3928   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3929                          _objs_scanned, _objs_found_on_bitmap);
3930   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3931                          _local_pushes, _local_pops, _local_max_size);
3932   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3933                          _global_pushes, _global_pops, _global_max_size);
3934   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3935                          _global_transfers_to,_global_transfers_from);
3936   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3937   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3938   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3939                          _steal_attempts, _steals);
3940   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3941   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3942                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3943   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3944                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3945 #endif // _MARKING_STATS_
3946 }
3947 
3948 /*****************************************************************************
3949 
3950     The do_marking_step(time_target_ms, ...) method is the building
3951     block of the parallel marking framework. It can be called in parallel
3952     with other invocations of do_marking_step() on different tasks
3953     (but only one per task, obviously) and concurrently with the
3954     mutator threads, or during remark, hence it eliminates the need
3955     for two versions of the code. When called during remark, it will
3956     pick up from where the task left off during the concurrent marking
3957     phase. Interestingly, tasks are also claimable during evacuation
3958     pauses too, since do_marking_step() ensures that it aborts before
3959     it needs to yield.
3960 
3961     The data structures that it uses to do marking work are the
3962     following:
3963 
3964       (1) Marking Bitmap. If there are gray objects that appear only
3965       on the bitmap (this happens either when dealing with an overflow
3966       or when the initial marking phase has simply marked the roots
3967       and didn't push them on the stack), then tasks claim heap
3968       regions whose bitmap they then scan to find gray objects. A
3969       global finger indicates where the end of the last claimed region
3970       is. A local finger indicates how far into the region a task has
3971       scanned. The two fingers are used to determine how to gray an
3972       object (i.e. whether simply marking it is OK, as it will be
3973       visited by a task in the future, or whether it needs to be also
3974       pushed on a stack).
3975 
3976       (2) Local Queue. The local queue of the task which is accessed
3977       reasonably efficiently by the task. Other tasks can steal from
3978       it when they run out of work. Throughout the marking phase, a
3979       task attempts to keep its local queue short but not totally
3980       empty, so that entries are available for stealing by other
3981       tasks. Only when there is no more work, a task will totally
3982       drain its local queue.
3983 
3984       (3) Global Mark Stack. This handles local queue overflow. During
3985       marking only sets of entries are moved between it and the local
3986       queues, as access to it requires a mutex and more fine-grain
3987       interaction with it which might cause contention. If it
3988       overflows, then the marking phase should restart and iterate
3989       over the bitmap to identify gray objects. Throughout the marking
3990       phase, tasks attempt to keep the global mark stack at a small
3991       length but not totally empty, so that entries are available for
3992       popping by other tasks. Only when there is no more work, tasks
3993       will totally drain the global mark stack.
3994 
3995       (4) SATB Buffer Queue. This is where completed SATB buffers are
3996       made available. Buffers are regularly removed from this queue
3997       and scanned for roots, so that the queue doesn't get too
3998       long. During remark, all completed buffers are processed, as
3999       well as the filled in parts of any uncompleted buffers.
4000 
4001     The do_marking_step() method tries to abort when the time target
4002     has been reached. There are a few other cases when the
4003     do_marking_step() method also aborts:
4004 
4005       (1) When the marking phase has been aborted (after a Full GC).
4006 
4007       (2) When a global overflow (on the global stack) has been
4008       triggered. Before the task aborts, it will actually sync up with
4009       the other tasks to ensure that all the marking data structures
4010       (local queues, stacks, fingers etc.)  are re-initialized so that
4011       when do_marking_step() completes, the marking phase can
4012       immediately restart.
4013 
4014       (3) When enough completed SATB buffers are available. The
4015       do_marking_step() method only tries to drain SATB buffers right
4016       at the beginning. So, if enough buffers are available, the
4017       marking step aborts and the SATB buffers are processed at
4018       the beginning of the next invocation.
4019 
4020       (4) To yield. when we have to yield then we abort and yield
4021       right at the end of do_marking_step(). This saves us from a lot
4022       of hassle as, by yielding we might allow a Full GC. If this
4023       happens then objects will be compacted underneath our feet, the
4024       heap might shrink, etc. We save checking for this by just
4025       aborting and doing the yield right at the end.
4026 
4027     From the above it follows that the do_marking_step() method should
4028     be called in a loop (or, otherwise, regularly) until it completes.
4029 
4030     If a marking step completes without its has_aborted() flag being
4031     true, it means it has completed the current marking phase (and
4032     also all other marking tasks have done so and have all synced up).
4033 
4034     A method called regular_clock_call() is invoked "regularly" (in
4035     sub ms intervals) throughout marking. It is this clock method that
4036     checks all the abort conditions which were mentioned above and
4037     decides when the task should abort. A work-based scheme is used to
4038     trigger this clock method: when the number of object words the
4039     marking phase has scanned or the number of references the marking
4040     phase has visited reach a given limit. Additional invocations to
4041     the method clock have been planted in a few other strategic places
4042     too. The initial reason for the clock method was to avoid calling
4043     vtime too regularly, as it is quite expensive. So, once it was in
4044     place, it was natural to piggy-back all the other conditions on it
4045     too and not constantly check them throughout the code.
4046 
4047     If do_termination is true then do_marking_step will enter its
4048     termination protocol.
4049 
4050     The value of is_serial must be true when do_marking_step is being
4051     called serially (i.e. by the VMThread) and do_marking_step should
4052     skip any synchronization in the termination and overflow code.
4053     Examples include the serial remark code and the serial reference
4054     processing closures.
4055 
4056     The value of is_serial must be false when do_marking_step is
4057     being called by any of the worker threads in a work gang.
4058     Examples include the concurrent marking code (CMMarkingTask),
4059     the MT remark code, and the MT reference processing closures.
4060 
4061  *****************************************************************************/
4062 
4063 void CMTask::do_marking_step(double time_target_ms,
4064                              bool do_termination,
4065                              bool is_serial) {
4066   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4067   assert(concurrent() == _cm->concurrent(), "they should be the same");
4068 
4069   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4070   assert(_task_queues != NULL, "invariant");
4071   assert(_task_queue != NULL, "invariant");
4072   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4073 
4074   assert(!_claimed,
4075          "only one thread should claim this task at any one time");
4076 
4077   // OK, this doesn't safeguard again all possible scenarios, as it is
4078   // possible for two threads to set the _claimed flag at the same
4079   // time. But it is only for debugging purposes anyway and it will
4080   // catch most problems.
4081   _claimed = true;
4082 
4083   _start_time_ms = os::elapsedVTime() * 1000.0;
4084   statsOnly( _interval_start_time_ms = _start_time_ms );
4085 
4086   // If do_stealing is true then do_marking_step will attempt to
4087   // steal work from the other CMTasks. It only makes sense to
4088   // enable stealing when the termination protocol is enabled
4089   // and do_marking_step() is not being called serially.
4090   bool do_stealing = do_termination && !is_serial;
4091 
4092   double diff_prediction_ms =
4093     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4094   _time_target_ms = time_target_ms - diff_prediction_ms;
4095 
4096   // set up the variables that are used in the work-based scheme to
4097   // call the regular clock method
4098   _words_scanned = 0;
4099   _refs_reached  = 0;
4100   recalculate_limits();
4101 
4102   // clear all flags
4103   clear_has_aborted();
4104   _has_timed_out = false;
4105   _draining_satb_buffers = false;
4106 
4107   ++_calls;
4108 
4109   if (_cm->verbose_low()) {
4110     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4111                            "target = %1.2lfms >>>>>>>>>>",
4112                            _worker_id, _calls, _time_target_ms);
4113   }
4114 
4115   // Set up the bitmap and oop closures. Anything that uses them is
4116   // eventually called from this method, so it is OK to allocate these
4117   // statically.
4118   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4119   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4120   set_cm_oop_closure(&cm_oop_closure);
4121 
4122   if (_cm->has_overflown()) {
4123     // This can happen if the mark stack overflows during a GC pause
4124     // and this task, after a yield point, restarts. We have to abort
4125     // as we need to get into the overflow protocol which happens
4126     // right at the end of this task.
4127     set_has_aborted();
4128   }
4129 
4130   // First drain any available SATB buffers. After this, we will not
4131   // look at SATB buffers before the next invocation of this method.
4132   // If enough completed SATB buffers are queued up, the regular clock
4133   // will abort this task so that it restarts.
4134   drain_satb_buffers();
4135   // ...then partially drain the local queue and the global stack
4136   drain_local_queue(true);
4137   drain_global_stack(true);
4138 
4139   do {
4140     if (!has_aborted() && _curr_region != NULL) {
4141       // This means that we're already holding on to a region.
4142       assert(_finger != NULL, "if region is not NULL, then the finger "
4143              "should not be NULL either");
4144 
4145       // We might have restarted this task after an evacuation pause
4146       // which might have evacuated the region we're holding on to
4147       // underneath our feet. Let's read its limit again to make sure
4148       // that we do not iterate over a region of the heap that
4149       // contains garbage (update_region_limit() will also move
4150       // _finger to the start of the region if it is found empty).
4151       update_region_limit();
4152       // We will start from _finger not from the start of the region,
4153       // as we might be restarting this task after aborting half-way
4154       // through scanning this region. In this case, _finger points to
4155       // the address where we last found a marked object. If this is a
4156       // fresh region, _finger points to start().
4157       MemRegion mr = MemRegion(_finger, _region_limit);
4158 
4159       if (_cm->verbose_low()) {
4160         gclog_or_tty->print_cr("[%u] we're scanning part "
4161                                "["PTR_FORMAT", "PTR_FORMAT") "
4162                                "of region "HR_FORMAT,
4163                                _worker_id, p2i(_finger), p2i(_region_limit),
4164                                HR_FORMAT_PARAMS(_curr_region));
4165       }
4166 
4167       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
4168              "humongous regions should go around loop once only");
4169 
4170       // Some special cases:
4171       // If the memory region is empty, we can just give up the region.
4172       // If the current region is humongous then we only need to check
4173       // the bitmap for the bit associated with the start of the object,
4174       // scan the object if it's live, and give up the region.
4175       // Otherwise, let's iterate over the bitmap of the part of the region
4176       // that is left.
4177       // If the iteration is successful, give up the region.
4178       if (mr.is_empty()) {
4179         giveup_current_region();
4180         regular_clock_call();
4181       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
4182         if (_nextMarkBitMap->isMarked(mr.start())) {
4183           // The object is marked - apply the closure
4184           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4185           bitmap_closure.do_bit(offset);
4186         }
4187         // Even if this task aborted while scanning the humongous object
4188         // we can (and should) give up the current region.
4189         giveup_current_region();
4190         regular_clock_call();
4191       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4192         giveup_current_region();
4193         regular_clock_call();
4194       } else {
4195         assert(has_aborted(), "currently the only way to do so");
4196         // The only way to abort the bitmap iteration is to return
4197         // false from the do_bit() method. However, inside the
4198         // do_bit() method we move the _finger to point to the
4199         // object currently being looked at. So, if we bail out, we
4200         // have definitely set _finger to something non-null.
4201         assert(_finger != NULL, "invariant");
4202 
4203         // Region iteration was actually aborted. So now _finger
4204         // points to the address of the object we last scanned. If we
4205         // leave it there, when we restart this task, we will rescan
4206         // the object. It is easy to avoid this. We move the finger by
4207         // enough to point to the next possible object header (the
4208         // bitmap knows by how much we need to move it as it knows its
4209         // granularity).
4210         assert(_finger < _region_limit, "invariant");
4211         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4212         // Check if bitmap iteration was aborted while scanning the last object
4213         if (new_finger >= _region_limit) {
4214           giveup_current_region();
4215         } else {
4216           move_finger_to(new_finger);
4217         }
4218       }
4219     }
4220     // At this point we have either completed iterating over the
4221     // region we were holding on to, or we have aborted.
4222 
4223     // We then partially drain the local queue and the global stack.
4224     // (Do we really need this?)
4225     drain_local_queue(true);
4226     drain_global_stack(true);
4227 
4228     // Read the note on the claim_region() method on why it might
4229     // return NULL with potentially more regions available for
4230     // claiming and why we have to check out_of_regions() to determine
4231     // whether we're done or not.
4232     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4233       // We are going to try to claim a new region. We should have
4234       // given up on the previous one.
4235       // Separated the asserts so that we know which one fires.
4236       assert(_curr_region  == NULL, "invariant");
4237       assert(_finger       == NULL, "invariant");
4238       assert(_region_limit == NULL, "invariant");
4239       if (_cm->verbose_low()) {
4240         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4241       }
4242       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4243       if (claimed_region != NULL) {
4244         // Yes, we managed to claim one
4245         statsOnly( ++_regions_claimed );
4246 
4247         if (_cm->verbose_low()) {
4248           gclog_or_tty->print_cr("[%u] we successfully claimed "
4249                                  "region "PTR_FORMAT,
4250                                  _worker_id, p2i(claimed_region));
4251         }
4252 
4253         setup_for_region(claimed_region);
4254         assert(_curr_region == claimed_region, "invariant");
4255       }
4256       // It is important to call the regular clock here. It might take
4257       // a while to claim a region if, for example, we hit a large
4258       // block of empty regions. So we need to call the regular clock
4259       // method once round the loop to make sure it's called
4260       // frequently enough.
4261       regular_clock_call();
4262     }
4263 
4264     if (!has_aborted() && _curr_region == NULL) {
4265       assert(_cm->out_of_regions(),
4266              "at this point we should be out of regions");
4267     }
4268   } while ( _curr_region != NULL && !has_aborted());
4269 
4270   if (!has_aborted()) {
4271     // We cannot check whether the global stack is empty, since other
4272     // tasks might be pushing objects to it concurrently.
4273     assert(_cm->out_of_regions(),
4274            "at this point we should be out of regions");
4275 
4276     if (_cm->verbose_low()) {
4277       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4278     }
4279 
4280     // Try to reduce the number of available SATB buffers so that
4281     // remark has less work to do.
4282     drain_satb_buffers();
4283   }
4284 
4285   // Since we've done everything else, we can now totally drain the
4286   // local queue and global stack.
4287   drain_local_queue(false);
4288   drain_global_stack(false);
4289 
4290   // Attempt at work stealing from other task's queues.
4291   if (do_stealing && !has_aborted()) {
4292     // We have not aborted. This means that we have finished all that
4293     // we could. Let's try to do some stealing...
4294 
4295     // We cannot check whether the global stack is empty, since other
4296     // tasks might be pushing objects to it concurrently.
4297     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4298            "only way to reach here");
4299 
4300     if (_cm->verbose_low()) {
4301       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4302     }
4303 
4304     while (!has_aborted()) {
4305       oop obj;
4306       statsOnly( ++_steal_attempts );
4307 
4308       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4309         if (_cm->verbose_medium()) {
4310           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4311                                  _worker_id, p2i((void*) obj));
4312         }
4313 
4314         statsOnly( ++_steals );
4315 
4316         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4317                "any stolen object should be marked");
4318         scan_object(obj);
4319 
4320         // And since we're towards the end, let's totally drain the
4321         // local queue and global stack.
4322         drain_local_queue(false);
4323         drain_global_stack(false);
4324       } else {
4325         break;
4326       }
4327     }
4328   }
4329 
4330   // If we are about to wrap up and go into termination, check if we
4331   // should raise the overflow flag.
4332   if (do_termination && !has_aborted()) {
4333     if (_cm->force_overflow()->should_force()) {
4334       _cm->set_has_overflown();
4335       regular_clock_call();
4336     }
4337   }
4338 
4339   // We still haven't aborted. Now, let's try to get into the
4340   // termination protocol.
4341   if (do_termination && !has_aborted()) {
4342     // We cannot check whether the global stack is empty, since other
4343     // tasks might be concurrently pushing objects on it.
4344     // Separated the asserts so that we know which one fires.
4345     assert(_cm->out_of_regions(), "only way to reach here");
4346     assert(_task_queue->size() == 0, "only way to reach here");
4347 
4348     if (_cm->verbose_low()) {
4349       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4350     }
4351 
4352     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4353 
4354     // The CMTask class also extends the TerminatorTerminator class,
4355     // hence its should_exit_termination() method will also decide
4356     // whether to exit the termination protocol or not.
4357     bool finished = (is_serial ||
4358                      _cm->terminator()->offer_termination(this));
4359     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4360     _termination_time_ms +=
4361       termination_end_time_ms - _termination_start_time_ms;
4362 
4363     if (finished) {
4364       // We're all done.
4365 
4366       if (_worker_id == 0) {
4367         // let's allow task 0 to do this
4368         if (concurrent()) {
4369           assert(_cm->concurrent_marking_in_progress(), "invariant");
4370           // we need to set this to false before the next
4371           // safepoint. This way we ensure that the marking phase
4372           // doesn't observe any more heap expansions.
4373           _cm->clear_concurrent_marking_in_progress();
4374         }
4375       }
4376 
4377       // We can now guarantee that the global stack is empty, since
4378       // all other tasks have finished. We separated the guarantees so
4379       // that, if a condition is false, we can immediately find out
4380       // which one.
4381       guarantee(_cm->out_of_regions(), "only way to reach here");
4382       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4383       guarantee(_task_queue->size() == 0, "only way to reach here");
4384       guarantee(!_cm->has_overflown(), "only way to reach here");
4385       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4386 
4387       if (_cm->verbose_low()) {
4388         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4389       }
4390     } else {
4391       // Apparently there's more work to do. Let's abort this task. It
4392       // will restart it and we can hopefully find more things to do.
4393 
4394       if (_cm->verbose_low()) {
4395         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4396                                _worker_id);
4397       }
4398 
4399       set_has_aborted();
4400       statsOnly( ++_aborted_termination );
4401     }
4402   }
4403 
4404   // Mainly for debugging purposes to make sure that a pointer to the
4405   // closure which was statically allocated in this frame doesn't
4406   // escape it by accident.
4407   set_cm_oop_closure(NULL);
4408   double end_time_ms = os::elapsedVTime() * 1000.0;
4409   double elapsed_time_ms = end_time_ms - _start_time_ms;
4410   // Update the step history.
4411   _step_times_ms.add(elapsed_time_ms);
4412 
4413   if (has_aborted()) {
4414     // The task was aborted for some reason.
4415 
4416     statsOnly( ++_aborted );
4417 
4418     if (_has_timed_out) {
4419       double diff_ms = elapsed_time_ms - _time_target_ms;
4420       // Keep statistics of how well we did with respect to hitting
4421       // our target only if we actually timed out (if we aborted for
4422       // other reasons, then the results might get skewed).
4423       _marking_step_diffs_ms.add(diff_ms);
4424     }
4425 
4426     if (_cm->has_overflown()) {
4427       // This is the interesting one. We aborted because a global
4428       // overflow was raised. This means we have to restart the
4429       // marking phase and start iterating over regions. However, in
4430       // order to do this we have to make sure that all tasks stop
4431       // what they are doing and re-initialize in a safe manner. We
4432       // will achieve this with the use of two barrier sync points.
4433 
4434       if (_cm->verbose_low()) {
4435         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4436       }
4437 
4438       if (!is_serial) {
4439         // We only need to enter the sync barrier if being called
4440         // from a parallel context
4441         _cm->enter_first_sync_barrier(_worker_id);
4442 
4443         // When we exit this sync barrier we know that all tasks have
4444         // stopped doing marking work. So, it's now safe to
4445         // re-initialize our data structures. At the end of this method,
4446         // task 0 will clear the global data structures.
4447       }
4448 
4449       statsOnly( ++_aborted_overflow );
4450 
4451       // We clear the local state of this task...
4452       clear_region_fields();
4453 
4454       if (!is_serial) {
4455         // ...and enter the second barrier.
4456         _cm->enter_second_sync_barrier(_worker_id);
4457       }
4458       // At this point, if we're during the concurrent phase of
4459       // marking, everything has been re-initialized and we're
4460       // ready to restart.
4461     }
4462 
4463     if (_cm->verbose_low()) {
4464       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4465                              "elapsed = %1.2lfms <<<<<<<<<<",
4466                              _worker_id, _time_target_ms, elapsed_time_ms);
4467       if (_cm->has_aborted()) {
4468         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4469                                _worker_id);
4470       }
4471     }
4472   } else {
4473     if (_cm->verbose_low()) {
4474       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4475                              "elapsed = %1.2lfms <<<<<<<<<<",
4476                              _worker_id, _time_target_ms, elapsed_time_ms);
4477     }
4478   }
4479 
4480   _claimed = false;
4481 }
4482 
4483 CMTask::CMTask(uint worker_id,
4484                ConcurrentMark* cm,
4485                size_t* marked_bytes,
4486                BitMap* card_bm,
4487                CMTaskQueue* task_queue,
4488                CMTaskQueueSet* task_queues)
4489   : _g1h(G1CollectedHeap::heap()),
4490     _worker_id(worker_id), _cm(cm),
4491     _claimed(false),
4492     _nextMarkBitMap(NULL), _hash_seed(17),
4493     _task_queue(task_queue),
4494     _task_queues(task_queues),
4495     _cm_oop_closure(NULL),
4496     _marked_bytes_array(marked_bytes),
4497     _card_bm(card_bm) {
4498   guarantee(task_queue != NULL, "invariant");
4499   guarantee(task_queues != NULL, "invariant");
4500 
4501   statsOnly( _clock_due_to_scanning = 0;
4502              _clock_due_to_marking  = 0 );
4503 
4504   _marking_step_diffs_ms.add(0.5);
4505 }
4506 
4507 // These are formatting macros that are used below to ensure
4508 // consistent formatting. The *_H_* versions are used to format the
4509 // header for a particular value and they should be kept consistent
4510 // with the corresponding macro. Also note that most of the macros add
4511 // the necessary white space (as a prefix) which makes them a bit
4512 // easier to compose.
4513 
4514 // All the output lines are prefixed with this string to be able to
4515 // identify them easily in a large log file.
4516 #define G1PPRL_LINE_PREFIX            "###"
4517 
4518 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4519 #ifdef _LP64
4520 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4521 #else // _LP64
4522 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4523 #endif // _LP64
4524 
4525 // For per-region info
4526 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4527 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4528 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4529 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4530 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4531 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4532 
4533 // For summary info
4534 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4535 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4536 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4537 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4538 
4539 G1PrintRegionLivenessInfoClosure::
4540 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4541   : _out(out),
4542     _total_used_bytes(0), _total_capacity_bytes(0),
4543     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4544     _hum_used_bytes(0), _hum_capacity_bytes(0),
4545     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4546     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4547   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4548   MemRegion g1_reserved = g1h->g1_reserved();
4549   double now = os::elapsedTime();
4550 
4551   // Print the header of the output.
4552   _out->cr();
4553   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4554   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4555                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4556                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4557                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4558                  HeapRegion::GrainBytes);
4559   _out->print_cr(G1PPRL_LINE_PREFIX);
4560   _out->print_cr(G1PPRL_LINE_PREFIX
4561                 G1PPRL_TYPE_H_FORMAT
4562                 G1PPRL_ADDR_BASE_H_FORMAT
4563                 G1PPRL_BYTE_H_FORMAT
4564                 G1PPRL_BYTE_H_FORMAT
4565                 G1PPRL_BYTE_H_FORMAT
4566                 G1PPRL_DOUBLE_H_FORMAT
4567                 G1PPRL_BYTE_H_FORMAT
4568                 G1PPRL_BYTE_H_FORMAT,
4569                 "type", "address-range",
4570                 "used", "prev-live", "next-live", "gc-eff",
4571                 "remset", "code-roots");
4572   _out->print_cr(G1PPRL_LINE_PREFIX
4573                 G1PPRL_TYPE_H_FORMAT
4574                 G1PPRL_ADDR_BASE_H_FORMAT
4575                 G1PPRL_BYTE_H_FORMAT
4576                 G1PPRL_BYTE_H_FORMAT
4577                 G1PPRL_BYTE_H_FORMAT
4578                 G1PPRL_DOUBLE_H_FORMAT
4579                 G1PPRL_BYTE_H_FORMAT
4580                 G1PPRL_BYTE_H_FORMAT,
4581                 "", "",
4582                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4583                 "(bytes)", "(bytes)");
4584 }
4585 
4586 // It takes as a parameter a reference to one of the _hum_* fields, it
4587 // deduces the corresponding value for a region in a humongous region
4588 // series (either the region size, or what's left if the _hum_* field
4589 // is < the region size), and updates the _hum_* field accordingly.
4590 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4591   size_t bytes = 0;
4592   // The > 0 check is to deal with the prev and next live bytes which
4593   // could be 0.
4594   if (*hum_bytes > 0) {
4595     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4596     *hum_bytes -= bytes;
4597   }
4598   return bytes;
4599 }
4600 
4601 // It deduces the values for a region in a humongous region series
4602 // from the _hum_* fields and updates those accordingly. It assumes
4603 // that that _hum_* fields have already been set up from the "starts
4604 // humongous" region and we visit the regions in address order.
4605 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4606                                                      size_t* capacity_bytes,
4607                                                      size_t* prev_live_bytes,
4608                                                      size_t* next_live_bytes) {
4609   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4610   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4611   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4612   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4613   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4614 }
4615 
4616 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4617   const char* type       = r->get_type_str();
4618   HeapWord* bottom       = r->bottom();
4619   HeapWord* end          = r->end();
4620   size_t capacity_bytes  = r->capacity();
4621   size_t used_bytes      = r->used();
4622   size_t prev_live_bytes = r->live_bytes();
4623   size_t next_live_bytes = r->next_live_bytes();
4624   double gc_eff          = r->gc_efficiency();
4625   size_t remset_bytes    = r->rem_set()->mem_size();
4626   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4627 
4628   if (r->is_starts_humongous()) {
4629     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4630            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4631            "they should have been zeroed after the last time we used them");
4632     // Set up the _hum_* fields.
4633     _hum_capacity_bytes  = capacity_bytes;
4634     _hum_used_bytes      = used_bytes;
4635     _hum_prev_live_bytes = prev_live_bytes;
4636     _hum_next_live_bytes = next_live_bytes;
4637     get_hum_bytes(&used_bytes, &capacity_bytes,
4638                   &prev_live_bytes, &next_live_bytes);
4639     end = bottom + HeapRegion::GrainWords;
4640   } else if (r->is_continues_humongous()) {
4641     get_hum_bytes(&used_bytes, &capacity_bytes,
4642                   &prev_live_bytes, &next_live_bytes);
4643     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4644   }
4645 
4646   _total_used_bytes      += used_bytes;
4647   _total_capacity_bytes  += capacity_bytes;
4648   _total_prev_live_bytes += prev_live_bytes;
4649   _total_next_live_bytes += next_live_bytes;
4650   _total_remset_bytes    += remset_bytes;
4651   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4652 
4653   // Print a line for this particular region.
4654   _out->print_cr(G1PPRL_LINE_PREFIX
4655                  G1PPRL_TYPE_FORMAT
4656                  G1PPRL_ADDR_BASE_FORMAT
4657                  G1PPRL_BYTE_FORMAT
4658                  G1PPRL_BYTE_FORMAT
4659                  G1PPRL_BYTE_FORMAT
4660                  G1PPRL_DOUBLE_FORMAT
4661                  G1PPRL_BYTE_FORMAT
4662                  G1PPRL_BYTE_FORMAT,
4663                  type, p2i(bottom), p2i(end),
4664                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4665                  remset_bytes, strong_code_roots_bytes);
4666 
4667   return false;
4668 }
4669 
4670 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4671   // add static memory usages to remembered set sizes
4672   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4673   // Print the footer of the output.
4674   _out->print_cr(G1PPRL_LINE_PREFIX);
4675   _out->print_cr(G1PPRL_LINE_PREFIX
4676                  " SUMMARY"
4677                  G1PPRL_SUM_MB_FORMAT("capacity")
4678                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4679                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4680                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4681                  G1PPRL_SUM_MB_FORMAT("remset")
4682                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4683                  bytes_to_mb(_total_capacity_bytes),
4684                  bytes_to_mb(_total_used_bytes),
4685                  perc(_total_used_bytes, _total_capacity_bytes),
4686                  bytes_to_mb(_total_prev_live_bytes),
4687                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4688                  bytes_to_mb(_total_next_live_bytes),
4689                  perc(_total_next_live_bytes, _total_capacity_bytes),
4690                  bytes_to_mb(_total_remset_bytes),
4691                  bytes_to_mb(_total_strong_code_roots_bytes));
4692   _out->cr();
4693 }