1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "code/codeCache.hpp"
  28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  33 #include "gc_implementation/g1/g1Log.hpp"
  34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  35 #include "gc_implementation/g1/g1RemSet.hpp"
  36 #include "gc_implementation/g1/heapRegion.inline.hpp"
  37 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
  38 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  39 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  40 #include "gc_implementation/shared/vmGCOperations.hpp"
  41 #include "gc_implementation/shared/gcTimer.hpp"
  42 #include "gc_implementation/shared/gcTrace.hpp"
  43 #include "gc_implementation/shared/gcTraceTime.hpp"
  44 #include "memory/allocation.hpp"
  45 #include "memory/genOopClosures.inline.hpp"
  46 #include "memory/referencePolicy.hpp"
  47 #include "memory/resourceArea.hpp"
  48 #include "oops/oop.inline.hpp"
  49 #include "runtime/handles.inline.hpp"
  50 #include "runtime/java.hpp"
  51 #include "runtime/atomic.inline.hpp"
  52 #include "runtime/prefetch.inline.hpp"
  53 #include "services/memTracker.hpp"
  54 
  55 // Concurrent marking bit map wrapper
  56 
  57 CMBitMapRO::CMBitMapRO(int shifter) :
  58   _bm(),
  59   _shifter(shifter) {
  60   _bmStartWord = 0;
  61   _bmWordSize = 0;
  62 }
  63 
  64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  65                                                const HeapWord* limit) const {
  66   // First we must round addr *up* to a possible object boundary.
  67   addr = (HeapWord*)align_size_up((intptr_t)addr,
  68                                   HeapWordSize << _shifter);
  69   size_t addrOffset = heapWordToOffset(addr);
  70   if (limit == NULL) {
  71     limit = _bmStartWord + _bmWordSize;
  72   }
  73   size_t limitOffset = heapWordToOffset(limit);
  74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  76   assert(nextAddr >= addr, "get_next_one postcondition");
  77   assert(nextAddr == limit || isMarked(nextAddr),
  78          "get_next_one postcondition");
  79   return nextAddr;
  80 }
  81 
  82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  83                                                  const HeapWord* limit) const {
  84   size_t addrOffset = heapWordToOffset(addr);
  85   if (limit == NULL) {
  86     limit = _bmStartWord + _bmWordSize;
  87   }
  88   size_t limitOffset = heapWordToOffset(limit);
  89   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  90   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  91   assert(nextAddr >= addr, "get_next_one postcondition");
  92   assert(nextAddr == limit || !isMarked(nextAddr),
  93          "get_next_one postcondition");
  94   return nextAddr;
  95 }
  96 
  97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  98   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  99   return (int) (diff >> _shifter);
 100 }
 101 
 102 #ifndef PRODUCT
 103 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 104   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 105   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 106          "size inconsistency");
 107   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 108          _bmWordSize  == heap_rs.word_size();
 109 }
 110 #endif
 111 
 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 113   _bm.print_on_error(st, prefix);
 114 }
 115 
 116 size_t CMBitMap::compute_size(size_t heap_size) {
 117   return heap_size / mark_distance();
 118 }
 119 
 120 size_t CMBitMap::mark_distance() {
 121   return MinObjAlignmentInBytes * BitsPerByte;
 122 }
 123 
 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 125   _bmStartWord = heap.start();
 126   _bmWordSize = heap.word_size();
 127 
 128   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 129   _bm.set_size(_bmWordSize >> _shifter);
 130 
 131   storage->set_mapping_changed_listener(&_listener);
 132 }
 133 
 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 135   if (zero_filled) {
 136     return;
 137   }
 138   // We need to clear the bitmap on commit, removing any existing information.
 139   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 140   _bm->clearRange(mr);
 141 }
 142 
 143 // Closure used for clearing the given mark bitmap.
 144 class ClearBitmapHRClosure : public HeapRegionClosure {
 145  private:
 146   ConcurrentMark* _cm;
 147   CMBitMap* _bitmap;
 148   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 149  public:
 150   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 151     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 152   }
 153 
 154   virtual bool doHeapRegion(HeapRegion* r) {
 155     size_t const chunk_size_in_words = M / HeapWordSize;
 156 
 157     HeapWord* cur = r->bottom();
 158     HeapWord* const end = r->end();
 159 
 160     while (cur < end) {
 161       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 162       _bitmap->clearRange(mr);
 163 
 164       cur += chunk_size_in_words;
 165 
 166       // Abort iteration if after yielding the marking has been aborted.
 167       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 168         return true;
 169       }
 170       // Repeat the asserts from before the start of the closure. We will do them
 171       // as asserts here to minimize their overhead on the product. However, we
 172       // will have them as guarantees at the beginning / end of the bitmap
 173       // clearing to get some checking in the product.
 174       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 175       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 176     }
 177 
 178     return false;
 179   }
 180 };
 181 
 182 void CMBitMap::clearAll() {
 183   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 184   G1CollectedHeap::heap()->heap_region_iterate(&cl);
 185   guarantee(cl.complete(), "Must have completed iteration.");
 186   return;
 187 }
 188 
 189 void CMBitMap::markRange(MemRegion mr) {
 190   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 191   assert(!mr.is_empty(), "unexpected empty region");
 192   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 193           ((HeapWord *) mr.end())),
 194          "markRange memory region end is not card aligned");
 195   // convert address range into offset range
 196   _bm.at_put_range(heapWordToOffset(mr.start()),
 197                    heapWordToOffset(mr.end()), true);
 198 }
 199 
 200 void CMBitMap::clearRange(MemRegion mr) {
 201   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 202   assert(!mr.is_empty(), "unexpected empty region");
 203   // convert address range into offset range
 204   _bm.at_put_range(heapWordToOffset(mr.start()),
 205                    heapWordToOffset(mr.end()), false);
 206 }
 207 
 208 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 209                                             HeapWord* end_addr) {
 210   HeapWord* start = getNextMarkedWordAddress(addr);
 211   start = MIN2(start, end_addr);
 212   HeapWord* end   = getNextUnmarkedWordAddress(start);
 213   end = MIN2(end, end_addr);
 214   assert(start <= end, "Consistency check");
 215   MemRegion mr(start, end);
 216   if (!mr.is_empty()) {
 217     clearRange(mr);
 218   }
 219   return mr;
 220 }
 221 
 222 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 223   _base(NULL), _cm(cm)
 224 #ifdef ASSERT
 225   , _drain_in_progress(false)
 226   , _drain_in_progress_yields(false)
 227 #endif
 228 {}
 229 
 230 bool CMMarkStack::allocate(size_t capacity) {
 231   // allocate a stack of the requisite depth
 232   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 233   if (!rs.is_reserved()) {
 234     warning("ConcurrentMark MarkStack allocation failure");
 235     return false;
 236   }
 237   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 238   if (!_virtual_space.initialize(rs, rs.size())) {
 239     warning("ConcurrentMark MarkStack backing store failure");
 240     // Release the virtual memory reserved for the marking stack
 241     rs.release();
 242     return false;
 243   }
 244   assert(_virtual_space.committed_size() == rs.size(),
 245          "Didn't reserve backing store for all of ConcurrentMark stack?");
 246   _base = (oop*) _virtual_space.low();
 247   setEmpty();
 248   _capacity = (jint) capacity;
 249   _saved_index = -1;
 250   _should_expand = false;
 251   NOT_PRODUCT(_max_depth = 0);
 252   return true;
 253 }
 254 
 255 void CMMarkStack::expand() {
 256   // Called, during remark, if we've overflown the marking stack during marking.
 257   assert(isEmpty(), "stack should been emptied while handling overflow");
 258   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 259   // Clear expansion flag
 260   _should_expand = false;
 261   if (_capacity == (jint) MarkStackSizeMax) {
 262     if (PrintGCDetails && Verbose) {
 263       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 264     }
 265     return;
 266   }
 267   // Double capacity if possible
 268   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 269   // Do not give up existing stack until we have managed to
 270   // get the double capacity that we desired.
 271   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 272                                                            sizeof(oop)));
 273   if (rs.is_reserved()) {
 274     // Release the backing store associated with old stack
 275     _virtual_space.release();
 276     // Reinitialize virtual space for new stack
 277     if (!_virtual_space.initialize(rs, rs.size())) {
 278       fatal("Not enough swap for expanded marking stack capacity");
 279     }
 280     _base = (oop*)(_virtual_space.low());
 281     _index = 0;
 282     _capacity = new_capacity;
 283   } else {
 284     if (PrintGCDetails && Verbose) {
 285       // Failed to double capacity, continue;
 286       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 287                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 288                           _capacity / K, new_capacity / K);
 289     }
 290   }
 291 }
 292 
 293 void CMMarkStack::set_should_expand() {
 294   // If we're resetting the marking state because of an
 295   // marking stack overflow, record that we should, if
 296   // possible, expand the stack.
 297   _should_expand = _cm->has_overflown();
 298 }
 299 
 300 CMMarkStack::~CMMarkStack() {
 301   if (_base != NULL) {
 302     _base = NULL;
 303     _virtual_space.release();
 304   }
 305 }
 306 
 307 void CMMarkStack::par_push(oop ptr) {
 308   while (true) {
 309     if (isFull()) {
 310       _overflow = true;
 311       return;
 312     }
 313     // Otherwise...
 314     jint index = _index;
 315     jint next_index = index+1;
 316     jint res = Atomic::cmpxchg(next_index, &_index, index);
 317     if (res == index) {
 318       _base[index] = ptr;
 319       // Note that we don't maintain this atomically.  We could, but it
 320       // doesn't seem necessary.
 321       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 322       return;
 323     }
 324     // Otherwise, we need to try again.
 325   }
 326 }
 327 
 328 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 329   while (true) {
 330     if (isFull()) {
 331       _overflow = true;
 332       return;
 333     }
 334     // Otherwise...
 335     jint index = _index;
 336     jint next_index = index + n;
 337     if (next_index > _capacity) {
 338       _overflow = true;
 339       return;
 340     }
 341     jint res = Atomic::cmpxchg(next_index, &_index, index);
 342     if (res == index) {
 343       for (int i = 0; i < n; i++) {
 344         int  ind = index + i;
 345         assert(ind < _capacity, "By overflow test above.");
 346         _base[ind] = ptr_arr[i];
 347       }
 348       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 349       return;
 350     }
 351     // Otherwise, we need to try again.
 352   }
 353 }
 354 
 355 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 356   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 357   jint start = _index;
 358   jint next_index = start + n;
 359   if (next_index > _capacity) {
 360     _overflow = true;
 361     return;
 362   }
 363   // Otherwise.
 364   _index = next_index;
 365   for (int i = 0; i < n; i++) {
 366     int ind = start + i;
 367     assert(ind < _capacity, "By overflow test above.");
 368     _base[ind] = ptr_arr[i];
 369   }
 370   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 371 }
 372 
 373 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 374   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 375   jint index = _index;
 376   if (index == 0) {
 377     *n = 0;
 378     return false;
 379   } else {
 380     int k = MIN2(max, index);
 381     jint  new_ind = index - k;
 382     for (int j = 0; j < k; j++) {
 383       ptr_arr[j] = _base[new_ind + j];
 384     }
 385     _index = new_ind;
 386     *n = k;
 387     return true;
 388   }
 389 }
 390 
 391 template<class OopClosureClass>
 392 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 393   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 394          || SafepointSynchronize::is_at_safepoint(),
 395          "Drain recursion must be yield-safe.");
 396   bool res = true;
 397   debug_only(_drain_in_progress = true);
 398   debug_only(_drain_in_progress_yields = yield_after);
 399   while (!isEmpty()) {
 400     oop newOop = pop();
 401     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 402     assert(newOop->is_oop(), "Expected an oop");
 403     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 404            "only grey objects on this stack");
 405     newOop->oop_iterate(cl);
 406     if (yield_after && _cm->do_yield_check()) {
 407       res = false;
 408       break;
 409     }
 410   }
 411   debug_only(_drain_in_progress = false);
 412   return res;
 413 }
 414 
 415 void CMMarkStack::note_start_of_gc() {
 416   assert(_saved_index == -1,
 417          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 418   _saved_index = _index;
 419 }
 420 
 421 void CMMarkStack::note_end_of_gc() {
 422   // This is intentionally a guarantee, instead of an assert. If we
 423   // accidentally add something to the mark stack during GC, it
 424   // will be a correctness issue so it's better if we crash. we'll
 425   // only check this once per GC anyway, so it won't be a performance
 426   // issue in any way.
 427   guarantee(_saved_index == _index,
 428             err_msg("saved index: %d index: %d", _saved_index, _index));
 429   _saved_index = -1;
 430 }
 431 
 432 void CMMarkStack::oops_do(OopClosure* f) {
 433   assert(_saved_index == _index,
 434          err_msg("saved index: %d index: %d", _saved_index, _index));
 435   for (int i = 0; i < _index; i += 1) {
 436     f->do_oop(&_base[i]);
 437   }
 438 }
 439 
 440 CMRootRegions::CMRootRegions() :
 441   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 442   _should_abort(false),  _next_survivor(NULL) { }
 443 
 444 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 445   _young_list = g1h->young_list();
 446   _cm = cm;
 447 }
 448 
 449 void CMRootRegions::prepare_for_scan() {
 450   assert(!scan_in_progress(), "pre-condition");
 451 
 452   // Currently, only survivors can be root regions.
 453   assert(_next_survivor == NULL, "pre-condition");
 454   _next_survivor = _young_list->first_survivor_region();
 455   _scan_in_progress = (_next_survivor != NULL);
 456   _should_abort = false;
 457 }
 458 
 459 HeapRegion* CMRootRegions::claim_next() {
 460   if (_should_abort) {
 461     // If someone has set the should_abort flag, we return NULL to
 462     // force the caller to bail out of their loop.
 463     return NULL;
 464   }
 465 
 466   // Currently, only survivors can be root regions.
 467   HeapRegion* res = _next_survivor;
 468   if (res != NULL) {
 469     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 470     // Read it again in case it changed while we were waiting for the lock.
 471     res = _next_survivor;
 472     if (res != NULL) {
 473       if (res == _young_list->last_survivor_region()) {
 474         // We just claimed the last survivor so store NULL to indicate
 475         // that we're done.
 476         _next_survivor = NULL;
 477       } else {
 478         _next_survivor = res->get_next_young_region();
 479       }
 480     } else {
 481       // Someone else claimed the last survivor while we were trying
 482       // to take the lock so nothing else to do.
 483     }
 484   }
 485   assert(res == NULL || res->is_survivor(), "post-condition");
 486 
 487   return res;
 488 }
 489 
 490 void CMRootRegions::scan_finished() {
 491   assert(scan_in_progress(), "pre-condition");
 492 
 493   // Currently, only survivors can be root regions.
 494   if (!_should_abort) {
 495     assert(_next_survivor == NULL, "we should have claimed all survivors");
 496   }
 497   _next_survivor = NULL;
 498 
 499   {
 500     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 501     _scan_in_progress = false;
 502     RootRegionScan_lock->notify_all();
 503   }
 504 }
 505 
 506 bool CMRootRegions::wait_until_scan_finished() {
 507   if (!scan_in_progress()) return false;
 508 
 509   {
 510     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 511     while (scan_in_progress()) {
 512       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 513     }
 514   }
 515   return true;
 516 }
 517 
 518 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 519 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 520 #endif // _MSC_VER
 521 
 522 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 523   return MAX2((n_par_threads + 2) / 4, 1U);
 524 }
 525 
 526 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 527   _g1h(g1h),
 528   _markBitMap1(),
 529   _markBitMap2(),
 530   _parallel_marking_threads(0),
 531   _max_parallel_marking_threads(0),
 532   _sleep_factor(0.0),
 533   _marking_task_overhead(1.0),
 534   _cleanup_sleep_factor(0.0),
 535   _cleanup_task_overhead(1.0),
 536   _cleanup_list("Cleanup List"),
 537   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 538   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 539             CardTableModRefBS::card_shift,
 540             false /* in_resource_area*/),
 541 
 542   _prevMarkBitMap(&_markBitMap1),
 543   _nextMarkBitMap(&_markBitMap2),
 544 
 545   _markStack(this),
 546   // _finger set in set_non_marking_state
 547 
 548   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 549   // _active_tasks set in set_non_marking_state
 550   // _tasks set inside the constructor
 551   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 552   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 553 
 554   _has_overflown(false),
 555   _concurrent(false),
 556   _has_aborted(false),
 557   _aborted_gc_id(GCId::undefined()),
 558   _restart_for_overflow(false),
 559   _concurrent_marking_in_progress(false),
 560 
 561   // _verbose_level set below
 562 
 563   _init_times(),
 564   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 565   _cleanup_times(),
 566   _total_counting_time(0.0),
 567   _total_rs_scrub_time(0.0),
 568 
 569   _parallel_workers(NULL),
 570 
 571   _count_card_bitmaps(NULL),
 572   _count_marked_bytes(NULL),
 573   _completed_initialization(false) {
 574   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 575   if (verbose_level < no_verbose) {
 576     verbose_level = no_verbose;
 577   }
 578   if (verbose_level > high_verbose) {
 579     verbose_level = high_verbose;
 580   }
 581   _verbose_level = verbose_level;
 582 
 583   if (verbose_low()) {
 584     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 585                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 586   }
 587 
 588   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 589   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 590 
 591   // Create & start a ConcurrentMark thread.
 592   _cmThread = new ConcurrentMarkThread(this);
 593   assert(cmThread() != NULL, "CM Thread should have been created");
 594   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 595   if (_cmThread->osthread() == NULL) {
 596       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 597   }
 598 
 599   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 600   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 601   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 602 
 603   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 604   satb_qs.set_buffer_size(G1SATBBufferSize);
 605 
 606   _root_regions.init(_g1h, this);
 607 
 608   if (ConcGCThreads > ParallelGCThreads) {
 609     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 610             "than ParallelGCThreads (" UINTX_FORMAT ").",
 611             ConcGCThreads, ParallelGCThreads);
 612     return;
 613   }
 614   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 615     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 616     // if both are set
 617     _sleep_factor             = 0.0;
 618     _marking_task_overhead    = 1.0;
 619   } else if (G1MarkingOverheadPercent > 0) {
 620     // We will calculate the number of parallel marking threads based
 621     // on a target overhead with respect to the soft real-time goal
 622     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 623     double overall_cm_overhead =
 624       (double) MaxGCPauseMillis * marking_overhead /
 625       (double) GCPauseIntervalMillis;
 626     double cpu_ratio = 1.0 / (double) os::processor_count();
 627     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 628     double marking_task_overhead =
 629       overall_cm_overhead / marking_thread_num *
 630                                               (double) os::processor_count();
 631     double sleep_factor =
 632                        (1.0 - marking_task_overhead) / marking_task_overhead;
 633 
 634     FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 635     _sleep_factor             = sleep_factor;
 636     _marking_task_overhead    = marking_task_overhead;
 637   } else {
 638     // Calculate the number of parallel marking threads by scaling
 639     // the number of parallel GC threads.
 640     uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 641     FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 642     _sleep_factor             = 0.0;
 643     _marking_task_overhead    = 1.0;
 644   }
 645 
 646   assert(ConcGCThreads > 0, "Should have been set");
 647   _parallel_marking_threads = (uint) ConcGCThreads;
 648   _max_parallel_marking_threads = _parallel_marking_threads;
 649 
 650   if (parallel_marking_threads() > 1) {
 651     _cleanup_task_overhead = 1.0;
 652   } else {
 653     _cleanup_task_overhead = marking_task_overhead();
 654   }
 655   _cleanup_sleep_factor =
 656                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 657 
 658 #if 0
 659   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 660   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 661   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 662   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 663   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 664 #endif
 665 
 666   _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 667        _max_parallel_marking_threads, false, true);
 668   if (_parallel_workers == NULL) {
 669     vm_exit_during_initialization("Failed necessary allocation.");
 670   } else {
 671     _parallel_workers->initialize_workers();
 672   }
 673 
 674   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 675     uintx mark_stack_size =
 676       MIN2(MarkStackSizeMax,
 677           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 678     // Verify that the calculated value for MarkStackSize is in range.
 679     // It would be nice to use the private utility routine from Arguments.
 680     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 681       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 682               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 683               mark_stack_size, (uintx) 1, MarkStackSizeMax);
 684       return;
 685     }
 686     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 687   } else {
 688     // Verify MarkStackSize is in range.
 689     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 690       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 691         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 692           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 693                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 694                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
 695           return;
 696         }
 697       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 698         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 699           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 700                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 701                   MarkStackSize, MarkStackSizeMax);
 702           return;
 703         }
 704       }
 705     }
 706   }
 707 
 708   if (!_markStack.allocate(MarkStackSize)) {
 709     warning("Failed to allocate CM marking stack");
 710     return;
 711   }
 712 
 713   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 714   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 715 
 716   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 717   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 718 
 719   BitMap::idx_t card_bm_size = _card_bm.size();
 720 
 721   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 722   _active_tasks = _max_worker_id;
 723 
 724   size_t max_regions = (size_t) _g1h->max_regions();
 725   for (uint i = 0; i < _max_worker_id; ++i) {
 726     CMTaskQueue* task_queue = new CMTaskQueue();
 727     task_queue->initialize();
 728     _task_queues->register_queue(i, task_queue);
 729 
 730     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 731     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 732 
 733     _tasks[i] = new CMTask(i, this,
 734                            _count_marked_bytes[i],
 735                            &_count_card_bitmaps[i],
 736                            task_queue, _task_queues);
 737 
 738     _accum_task_vtime[i] = 0.0;
 739   }
 740 
 741   // Calculate the card number for the bottom of the heap. Used
 742   // in biasing indexes into the accounting card bitmaps.
 743   _heap_bottom_card_num =
 744     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 745                                 CardTableModRefBS::card_shift);
 746 
 747   // Clear all the liveness counting data
 748   clear_all_count_data();
 749 
 750   // so that the call below can read a sensible value
 751   _heap_start = g1h->reserved_region().start();
 752   set_non_marking_state();
 753   _completed_initialization = true;
 754 }
 755 
 756 void ConcurrentMark::reset() {
 757   // Starting values for these two. This should be called in a STW
 758   // phase.
 759   MemRegion reserved = _g1h->g1_reserved();
 760   _heap_start = reserved.start();
 761   _heap_end   = reserved.end();
 762 
 763   // Separated the asserts so that we know which one fires.
 764   assert(_heap_start != NULL, "heap bounds should look ok");
 765   assert(_heap_end != NULL, "heap bounds should look ok");
 766   assert(_heap_start < _heap_end, "heap bounds should look ok");
 767 
 768   // Reset all the marking data structures and any necessary flags
 769   reset_marking_state();
 770 
 771   if (verbose_low()) {
 772     gclog_or_tty->print_cr("[global] resetting");
 773   }
 774 
 775   // We do reset all of them, since different phases will use
 776   // different number of active threads. So, it's easiest to have all
 777   // of them ready.
 778   for (uint i = 0; i < _max_worker_id; ++i) {
 779     _tasks[i]->reset(_nextMarkBitMap);
 780   }
 781 
 782   // we need this to make sure that the flag is on during the evac
 783   // pause with initial mark piggy-backed
 784   set_concurrent_marking_in_progress();
 785 }
 786 
 787 
 788 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 789   _markStack.set_should_expand();
 790   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 791   if (clear_overflow) {
 792     clear_has_overflown();
 793   } else {
 794     assert(has_overflown(), "pre-condition");
 795   }
 796   _finger = _heap_start;
 797 
 798   for (uint i = 0; i < _max_worker_id; ++i) {
 799     CMTaskQueue* queue = _task_queues->queue(i);
 800     queue->set_empty();
 801   }
 802 }
 803 
 804 void ConcurrentMark::set_concurrency(uint active_tasks) {
 805   assert(active_tasks <= _max_worker_id, "we should not have more");
 806 
 807   _active_tasks = active_tasks;
 808   // Need to update the three data structures below according to the
 809   // number of active threads for this phase.
 810   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 811   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 812   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 813 }
 814 
 815 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 816   set_concurrency(active_tasks);
 817 
 818   _concurrent = concurrent;
 819   // We propagate this to all tasks, not just the active ones.
 820   for (uint i = 0; i < _max_worker_id; ++i)
 821     _tasks[i]->set_concurrent(concurrent);
 822 
 823   if (concurrent) {
 824     set_concurrent_marking_in_progress();
 825   } else {
 826     // We currently assume that the concurrent flag has been set to
 827     // false before we start remark. At this point we should also be
 828     // in a STW phase.
 829     assert(!concurrent_marking_in_progress(), "invariant");
 830     assert(out_of_regions(),
 831            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 832                    p2i(_finger), p2i(_heap_end)));
 833   }
 834 }
 835 
 836 void ConcurrentMark::set_non_marking_state() {
 837   // We set the global marking state to some default values when we're
 838   // not doing marking.
 839   reset_marking_state();
 840   _active_tasks = 0;
 841   clear_concurrent_marking_in_progress();
 842 }
 843 
 844 ConcurrentMark::~ConcurrentMark() {
 845   // The ConcurrentMark instance is never freed.
 846   ShouldNotReachHere();
 847 }
 848 
 849 void ConcurrentMark::clearNextBitmap() {
 850   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 851 
 852   // Make sure that the concurrent mark thread looks to still be in
 853   // the current cycle.
 854   guarantee(cmThread()->during_cycle(), "invariant");
 855 
 856   // We are finishing up the current cycle by clearing the next
 857   // marking bitmap and getting it ready for the next cycle. During
 858   // this time no other cycle can start. So, let's make sure that this
 859   // is the case.
 860   guarantee(!g1h->mark_in_progress(), "invariant");
 861 
 862   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 863   g1h->heap_region_iterate(&cl);
 864 
 865   // Clear the liveness counting data. If the marking has been aborted, the abort()
 866   // call already did that.
 867   if (cl.complete()) {
 868     clear_all_count_data();
 869   }
 870 
 871   // Repeat the asserts from above.
 872   guarantee(cmThread()->during_cycle(), "invariant");
 873   guarantee(!g1h->mark_in_progress(), "invariant");
 874 }
 875 
 876 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 877   CMBitMap* _bitmap;
 878   bool _error;
 879  public:
 880   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 881   }
 882 
 883   virtual bool doHeapRegion(HeapRegion* r) {
 884     // This closure can be called concurrently to the mutator, so we must make sure
 885     // that the result of the getNextMarkedWordAddress() call is compared to the
 886     // value passed to it as limit to detect any found bits.
 887     // We can use the region's orig_end() for the limit and the comparison value
 888     // as it always contains the "real" end of the region that never changes and
 889     // has no side effects.
 890     // Due to the latter, there can also be no problem with the compiler generating
 891     // reloads of the orig_end() call.
 892     HeapWord* end = r->orig_end();
 893     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 894   }
 895 };
 896 
 897 bool ConcurrentMark::nextMarkBitmapIsClear() {
 898   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 899   _g1h->heap_region_iterate(&cl);
 900   return cl.complete();
 901 }
 902 
 903 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 904 public:
 905   bool doHeapRegion(HeapRegion* r) {
 906     if (!r->is_continues_humongous()) {
 907       r->note_start_of_marking();
 908     }
 909     return false;
 910   }
 911 };
 912 
 913 void ConcurrentMark::checkpointRootsInitialPre() {
 914   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 915   G1CollectorPolicy* g1p = g1h->g1_policy();
 916 
 917   _has_aborted = false;
 918 
 919 #ifndef PRODUCT
 920   if (G1PrintReachableAtInitialMark) {
 921     print_reachable("at-cycle-start",
 922                     VerifyOption_G1UsePrevMarking, true /* all */);
 923   }
 924 #endif
 925 
 926   // Initialize marking structures. This has to be done in a STW phase.
 927   reset();
 928 
 929   // For each region note start of marking.
 930   NoteStartOfMarkHRClosure startcl;
 931   g1h->heap_region_iterate(&startcl);
 932 }
 933 
 934 
 935 void ConcurrentMark::checkpointRootsInitialPost() {
 936   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 937 
 938   // If we force an overflow during remark, the remark operation will
 939   // actually abort and we'll restart concurrent marking. If we always
 940   // force an overflow during remark we'll never actually complete the
 941   // marking phase. So, we initialize this here, at the start of the
 942   // cycle, so that at the remaining overflow number will decrease at
 943   // every remark and we'll eventually not need to cause one.
 944   force_overflow_stw()->init();
 945 
 946   // Start Concurrent Marking weak-reference discovery.
 947   ReferenceProcessor* rp = g1h->ref_processor_cm();
 948   // enable ("weak") refs discovery
 949   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 950   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 951 
 952   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 953   // This is the start of  the marking cycle, we're expected all
 954   // threads to have SATB queues with active set to false.
 955   satb_mq_set.set_active_all_threads(true, /* new active value */
 956                                      false /* expected_active */);
 957 
 958   _root_regions.prepare_for_scan();
 959 
 960   // update_g1_committed() will be called at the end of an evac pause
 961   // when marking is on. So, it's also called at the end of the
 962   // initial-mark pause to update the heap end, if the heap expands
 963   // during it. No need to call it here.
 964 }
 965 
 966 /*
 967  * Notice that in the next two methods, we actually leave the STS
 968  * during the barrier sync and join it immediately afterwards. If we
 969  * do not do this, the following deadlock can occur: one thread could
 970  * be in the barrier sync code, waiting for the other thread to also
 971  * sync up, whereas another one could be trying to yield, while also
 972  * waiting for the other threads to sync up too.
 973  *
 974  * Note, however, that this code is also used during remark and in
 975  * this case we should not attempt to leave / enter the STS, otherwise
 976  * we'll either hit an assert (debug / fastdebug) or deadlock
 977  * (product). So we should only leave / enter the STS if we are
 978  * operating concurrently.
 979  *
 980  * Because the thread that does the sync barrier has left the STS, it
 981  * is possible to be suspended for a Full GC or an evacuation pause
 982  * could occur. This is actually safe, since the entering the sync
 983  * barrier is one of the last things do_marking_step() does, and it
 984  * doesn't manipulate any data structures afterwards.
 985  */
 986 
 987 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 988   if (verbose_low()) {
 989     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 990   }
 991 
 992   if (concurrent()) {
 993     SuspendibleThreadSet::leave();
 994   }
 995 
 996   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 997 
 998   if (concurrent()) {
 999     SuspendibleThreadSet::join();
1000   }
1001   // at this point everyone should have synced up and not be doing any
1002   // more work
1003 
1004   if (verbose_low()) {
1005     if (barrier_aborted) {
1006       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1007     } else {
1008       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1009     }
1010   }
1011 
1012   if (barrier_aborted) {
1013     // If the barrier aborted we ignore the overflow condition and
1014     // just abort the whole marking phase as quickly as possible.
1015     return;
1016   }
1017 
1018   // If we're executing the concurrent phase of marking, reset the marking
1019   // state; otherwise the marking state is reset after reference processing,
1020   // during the remark pause.
1021   // If we reset here as a result of an overflow during the remark we will
1022   // see assertion failures from any subsequent set_concurrency_and_phase()
1023   // calls.
1024   if (concurrent()) {
1025     // let the task associated with with worker 0 do this
1026     if (worker_id == 0) {
1027       // task 0 is responsible for clearing the global data structures
1028       // We should be here because of an overflow. During STW we should
1029       // not clear the overflow flag since we rely on it being true when
1030       // we exit this method to abort the pause and restart concurrent
1031       // marking.
1032       reset_marking_state(true /* clear_overflow */);
1033       force_overflow()->update();
1034 
1035       if (G1Log::fine()) {
1036         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1037         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1038       }
1039     }
1040   }
1041 
1042   // after this, each task should reset its own data structures then
1043   // then go into the second barrier
1044 }
1045 
1046 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1047   if (verbose_low()) {
1048     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1049   }
1050 
1051   if (concurrent()) {
1052     SuspendibleThreadSet::leave();
1053   }
1054 
1055   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1056 
1057   if (concurrent()) {
1058     SuspendibleThreadSet::join();
1059   }
1060   // at this point everything should be re-initialized and ready to go
1061 
1062   if (verbose_low()) {
1063     if (barrier_aborted) {
1064       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1065     } else {
1066       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1067     }
1068   }
1069 }
1070 
1071 #ifndef PRODUCT
1072 void ForceOverflowSettings::init() {
1073   _num_remaining = G1ConcMarkForceOverflow;
1074   _force = false;
1075   update();
1076 }
1077 
1078 void ForceOverflowSettings::update() {
1079   if (_num_remaining > 0) {
1080     _num_remaining -= 1;
1081     _force = true;
1082   } else {
1083     _force = false;
1084   }
1085 }
1086 
1087 bool ForceOverflowSettings::should_force() {
1088   if (_force) {
1089     _force = false;
1090     return true;
1091   } else {
1092     return false;
1093   }
1094 }
1095 #endif // !PRODUCT
1096 
1097 class CMConcurrentMarkingTask: public AbstractGangTask {
1098 private:
1099   ConcurrentMark*       _cm;
1100   ConcurrentMarkThread* _cmt;
1101 
1102 public:
1103   void work(uint worker_id) {
1104     assert(Thread::current()->is_ConcurrentGC_thread(),
1105            "this should only be done by a conc GC thread");
1106     ResourceMark rm;
1107 
1108     double start_vtime = os::elapsedVTime();
1109 
1110     SuspendibleThreadSet::join();
1111 
1112     assert(worker_id < _cm->active_tasks(), "invariant");
1113     CMTask* the_task = _cm->task(worker_id);
1114     the_task->record_start_time();
1115     if (!_cm->has_aborted()) {
1116       do {
1117         double start_vtime_sec = os::elapsedVTime();
1118         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1119 
1120         the_task->do_marking_step(mark_step_duration_ms,
1121                                   true  /* do_termination */,
1122                                   false /* is_serial*/);
1123 
1124         double end_vtime_sec = os::elapsedVTime();
1125         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1126         _cm->clear_has_overflown();
1127 
1128         _cm->do_yield_check(worker_id);
1129 
1130         jlong sleep_time_ms;
1131         if (!_cm->has_aborted() && the_task->has_aborted()) {
1132           sleep_time_ms =
1133             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1134           SuspendibleThreadSet::leave();
1135           os::sleep(Thread::current(), sleep_time_ms, false);
1136           SuspendibleThreadSet::join();
1137         }
1138       } while (!_cm->has_aborted() && the_task->has_aborted());
1139     }
1140     the_task->record_end_time();
1141     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1142 
1143     SuspendibleThreadSet::leave();
1144 
1145     double end_vtime = os::elapsedVTime();
1146     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1147   }
1148 
1149   CMConcurrentMarkingTask(ConcurrentMark* cm,
1150                           ConcurrentMarkThread* cmt) :
1151       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1152 
1153   ~CMConcurrentMarkingTask() { }
1154 };
1155 
1156 // Calculates the number of active workers for a concurrent
1157 // phase.
1158 uint ConcurrentMark::calc_parallel_marking_threads() {
1159   uint n_conc_workers = 0;
1160   if (!UseDynamicNumberOfGCThreads ||
1161       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1162        !ForceDynamicNumberOfGCThreads)) {
1163     n_conc_workers = max_parallel_marking_threads();
1164   } else {
1165     n_conc_workers =
1166       AdaptiveSizePolicy::calc_default_active_workers(
1167                                    max_parallel_marking_threads(),
1168                                    1, /* Minimum workers */
1169                                    parallel_marking_threads(),
1170                                    Threads::number_of_non_daemon_threads());
1171     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1172     // that scaling has already gone into "_max_parallel_marking_threads".
1173   }
1174   assert(n_conc_workers > 0, "Always need at least 1");
1175   return n_conc_workers;
1176 }
1177 
1178 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1179   // Currently, only survivors can be root regions.
1180   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1181   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1182 
1183   const uintx interval = PrefetchScanIntervalInBytes;
1184   HeapWord* curr = hr->bottom();
1185   const HeapWord* end = hr->top();
1186   while (curr < end) {
1187     Prefetch::read(curr, interval);
1188     oop obj = oop(curr);
1189     int size = obj->oop_iterate(&cl);
1190     assert(size == obj->size(), "sanity");
1191     curr += size;
1192   }
1193 }
1194 
1195 class CMRootRegionScanTask : public AbstractGangTask {
1196 private:
1197   ConcurrentMark* _cm;
1198 
1199 public:
1200   CMRootRegionScanTask(ConcurrentMark* cm) :
1201     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1202 
1203   void work(uint worker_id) {
1204     assert(Thread::current()->is_ConcurrentGC_thread(),
1205            "this should only be done by a conc GC thread");
1206 
1207     CMRootRegions* root_regions = _cm->root_regions();
1208     HeapRegion* hr = root_regions->claim_next();
1209     while (hr != NULL) {
1210       _cm->scanRootRegion(hr, worker_id);
1211       hr = root_regions->claim_next();
1212     }
1213   }
1214 };
1215 
1216 void ConcurrentMark::scanRootRegions() {
1217   // Start of concurrent marking.
1218   ClassLoaderDataGraph::clear_claimed_marks();
1219 
1220   // scan_in_progress() will have been set to true only if there was
1221   // at least one root region to scan. So, if it's false, we
1222   // should not attempt to do any further work.
1223   if (root_regions()->scan_in_progress()) {
1224     _parallel_marking_threads = calc_parallel_marking_threads();
1225     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1226            "Maximum number of marking threads exceeded");
1227     uint active_workers = MAX2(1U, parallel_marking_threads());
1228 
1229     CMRootRegionScanTask task(this);
1230     _parallel_workers->set_active_workers(active_workers);
1231     _parallel_workers->run_task(&task);
1232 
1233     // It's possible that has_aborted() is true here without actually
1234     // aborting the survivor scan earlier. This is OK as it's
1235     // mainly used for sanity checking.
1236     root_regions()->scan_finished();
1237   }
1238 }
1239 
1240 void ConcurrentMark::markFromRoots() {
1241   // we might be tempted to assert that:
1242   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1243   //        "inconsistent argument?");
1244   // However that wouldn't be right, because it's possible that
1245   // a safepoint is indeed in progress as a younger generation
1246   // stop-the-world GC happens even as we mark in this generation.
1247 
1248   _restart_for_overflow = false;
1249   force_overflow_conc()->init();
1250 
1251   // _g1h has _n_par_threads
1252   _parallel_marking_threads = calc_parallel_marking_threads();
1253   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1254     "Maximum number of marking threads exceeded");
1255 
1256   uint active_workers = MAX2(1U, parallel_marking_threads());
1257 
1258   // Parallel task terminator is set in "set_concurrency_and_phase()"
1259   set_concurrency_and_phase(active_workers, true /* concurrent */);
1260 
1261   CMConcurrentMarkingTask markingTask(this, cmThread());
1262   _parallel_workers->set_active_workers(active_workers);
1263   // Don't set _n_par_threads because it affects MT in process_roots()
1264   // and the decisions on that MT processing is made elsewhere.
1265   assert(_parallel_workers->active_workers() > 0, "Should have been set");
1266   _parallel_workers->run_task(&markingTask);
1267   print_stats();
1268 }
1269 
1270 // Helper class to get rid of some boilerplate code.
1271 class G1CMTraceTime : public GCTraceTime {
1272   static bool doit_and_prepend(bool doit) {
1273     if (doit) {
1274       gclog_or_tty->put(' ');
1275     }
1276     return doit;
1277   }
1278 
1279  public:
1280   G1CMTraceTime(const char* title, bool doit)
1281     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1282         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1283   }
1284 };
1285 
1286 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1287   // world is stopped at this checkpoint
1288   assert(SafepointSynchronize::is_at_safepoint(),
1289          "world should be stopped");
1290 
1291   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1292 
1293   // If a full collection has happened, we shouldn't do this.
1294   if (has_aborted()) {
1295     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1296     return;
1297   }
1298 
1299   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1300 
1301   if (VerifyDuringGC) {
1302     HandleMark hm;  // handle scope
1303     Universe::heap()->prepare_for_verify();
1304     Universe::verify(VerifyOption_G1UsePrevMarking,
1305                      " VerifyDuringGC:(before)");
1306   }
1307   g1h->check_bitmaps("Remark Start");
1308 
1309   G1CollectorPolicy* g1p = g1h->g1_policy();
1310   g1p->record_concurrent_mark_remark_start();
1311 
1312   double start = os::elapsedTime();
1313 
1314   checkpointRootsFinalWork();
1315 
1316   double mark_work_end = os::elapsedTime();
1317 
1318   weakRefsWork(clear_all_soft_refs);
1319 
1320   if (has_overflown()) {
1321     // Oops.  We overflowed.  Restart concurrent marking.
1322     _restart_for_overflow = true;
1323     if (G1TraceMarkStackOverflow) {
1324       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1325     }
1326 
1327     // Verify the heap w.r.t. the previous marking bitmap.
1328     if (VerifyDuringGC) {
1329       HandleMark hm;  // handle scope
1330       Universe::heap()->prepare_for_verify();
1331       Universe::verify(VerifyOption_G1UsePrevMarking,
1332                        " VerifyDuringGC:(overflow)");
1333     }
1334 
1335     // Clear the marking state because we will be restarting
1336     // marking due to overflowing the global mark stack.
1337     reset_marking_state();
1338   } else {
1339     {
1340       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1341 
1342       // Aggregate the per-task counting data that we have accumulated
1343       // while marking.
1344       aggregate_count_data();
1345     }
1346 
1347     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1348     // We're done with marking.
1349     // This is the end of  the marking cycle, we're expected all
1350     // threads to have SATB queues with active set to true.
1351     satb_mq_set.set_active_all_threads(false, /* new active value */
1352                                        true /* expected_active */);
1353 
1354     if (VerifyDuringGC) {
1355       HandleMark hm;  // handle scope
1356       Universe::heap()->prepare_for_verify();
1357       Universe::verify(VerifyOption_G1UseNextMarking,
1358                        " VerifyDuringGC:(after)");
1359     }
1360     g1h->check_bitmaps("Remark End");
1361     assert(!restart_for_overflow(), "sanity");
1362     // Completely reset the marking state since marking completed
1363     set_non_marking_state();
1364   }
1365 
1366   // Expand the marking stack, if we have to and if we can.
1367   if (_markStack.should_expand()) {
1368     _markStack.expand();
1369   }
1370 
1371   // Statistics
1372   double now = os::elapsedTime();
1373   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1374   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1375   _remark_times.add((now - start) * 1000.0);
1376 
1377   g1p->record_concurrent_mark_remark_end();
1378 
1379   G1CMIsAliveClosure is_alive(g1h);
1380   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1381 }
1382 
1383 // Base class of the closures that finalize and verify the
1384 // liveness counting data.
1385 class CMCountDataClosureBase: public HeapRegionClosure {
1386 protected:
1387   G1CollectedHeap* _g1h;
1388   ConcurrentMark* _cm;
1389   CardTableModRefBS* _ct_bs;
1390 
1391   BitMap* _region_bm;
1392   BitMap* _card_bm;
1393 
1394   // Takes a region that's not empty (i.e., it has at least one
1395   // live object in it and sets its corresponding bit on the region
1396   // bitmap to 1. If the region is "starts humongous" it will also set
1397   // to 1 the bits on the region bitmap that correspond to its
1398   // associated "continues humongous" regions.
1399   void set_bit_for_region(HeapRegion* hr) {
1400     assert(!hr->is_continues_humongous(), "should have filtered those out");
1401 
1402     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1403     if (!hr->is_starts_humongous()) {
1404       // Normal (non-humongous) case: just set the bit.
1405       _region_bm->par_at_put(index, true);
1406     } else {
1407       // Starts humongous case: calculate how many regions are part of
1408       // this humongous region and then set the bit range.
1409       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1410       _region_bm->par_at_put_range(index, end_index, true);
1411     }
1412   }
1413 
1414 public:
1415   CMCountDataClosureBase(G1CollectedHeap* g1h,
1416                          BitMap* region_bm, BitMap* card_bm):
1417     _g1h(g1h), _cm(g1h->concurrent_mark()),
1418     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1419     _region_bm(region_bm), _card_bm(card_bm) { }
1420 };
1421 
1422 // Closure that calculates the # live objects per region. Used
1423 // for verification purposes during the cleanup pause.
1424 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1425   CMBitMapRO* _bm;
1426   size_t _region_marked_bytes;
1427 
1428 public:
1429   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1430                          BitMap* region_bm, BitMap* card_bm) :
1431     CMCountDataClosureBase(g1h, region_bm, card_bm),
1432     _bm(bm), _region_marked_bytes(0) { }
1433 
1434   bool doHeapRegion(HeapRegion* hr) {
1435 
1436     if (hr->is_continues_humongous()) {
1437       // We will ignore these here and process them when their
1438       // associated "starts humongous" region is processed (see
1439       // set_bit_for_heap_region()). Note that we cannot rely on their
1440       // associated "starts humongous" region to have their bit set to
1441       // 1 since, due to the region chunking in the parallel region
1442       // iteration, a "continues humongous" region might be visited
1443       // before its associated "starts humongous".
1444       return false;
1445     }
1446 
1447     HeapWord* ntams = hr->next_top_at_mark_start();
1448     HeapWord* start = hr->bottom();
1449 
1450     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1451            err_msg("Preconditions not met - "
1452                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1453                    p2i(start), p2i(ntams), p2i(hr->end())));
1454 
1455     // Find the first marked object at or after "start".
1456     start = _bm->getNextMarkedWordAddress(start, ntams);
1457 
1458     size_t marked_bytes = 0;
1459 
1460     while (start < ntams) {
1461       oop obj = oop(start);
1462       int obj_sz = obj->size();
1463       HeapWord* obj_end = start + obj_sz;
1464 
1465       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1466       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1467 
1468       // Note: if we're looking at the last region in heap - obj_end
1469       // could be actually just beyond the end of the heap; end_idx
1470       // will then correspond to a (non-existent) card that is also
1471       // just beyond the heap.
1472       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1473         // end of object is not card aligned - increment to cover
1474         // all the cards spanned by the object
1475         end_idx += 1;
1476       }
1477 
1478       // Set the bits in the card BM for the cards spanned by this object.
1479       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1480 
1481       // Add the size of this object to the number of marked bytes.
1482       marked_bytes += (size_t)obj_sz * HeapWordSize;
1483 
1484       // Find the next marked object after this one.
1485       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1486     }
1487 
1488     // Mark the allocated-since-marking portion...
1489     HeapWord* top = hr->top();
1490     if (ntams < top) {
1491       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1492       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1493 
1494       // Note: if we're looking at the last region in heap - top
1495       // could be actually just beyond the end of the heap; end_idx
1496       // will then correspond to a (non-existent) card that is also
1497       // just beyond the heap.
1498       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1499         // end of object is not card aligned - increment to cover
1500         // all the cards spanned by the object
1501         end_idx += 1;
1502       }
1503       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1504 
1505       // This definitely means the region has live objects.
1506       set_bit_for_region(hr);
1507     }
1508 
1509     // Update the live region bitmap.
1510     if (marked_bytes > 0) {
1511       set_bit_for_region(hr);
1512     }
1513 
1514     // Set the marked bytes for the current region so that
1515     // it can be queried by a calling verification routine
1516     _region_marked_bytes = marked_bytes;
1517 
1518     return false;
1519   }
1520 
1521   size_t region_marked_bytes() const { return _region_marked_bytes; }
1522 };
1523 
1524 // Heap region closure used for verifying the counting data
1525 // that was accumulated concurrently and aggregated during
1526 // the remark pause. This closure is applied to the heap
1527 // regions during the STW cleanup pause.
1528 
1529 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1530   G1CollectedHeap* _g1h;
1531   ConcurrentMark* _cm;
1532   CalcLiveObjectsClosure _calc_cl;
1533   BitMap* _region_bm;   // Region BM to be verified
1534   BitMap* _card_bm;     // Card BM to be verified
1535   bool _verbose;        // verbose output?
1536 
1537   BitMap* _exp_region_bm; // Expected Region BM values
1538   BitMap* _exp_card_bm;   // Expected card BM values
1539 
1540   int _failures;
1541 
1542 public:
1543   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1544                                 BitMap* region_bm,
1545                                 BitMap* card_bm,
1546                                 BitMap* exp_region_bm,
1547                                 BitMap* exp_card_bm,
1548                                 bool verbose) :
1549     _g1h(g1h), _cm(g1h->concurrent_mark()),
1550     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1551     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1552     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1553     _failures(0) { }
1554 
1555   int failures() const { return _failures; }
1556 
1557   bool doHeapRegion(HeapRegion* hr) {
1558     if (hr->is_continues_humongous()) {
1559       // We will ignore these here and process them when their
1560       // associated "starts humongous" region is processed (see
1561       // set_bit_for_heap_region()). Note that we cannot rely on their
1562       // associated "starts humongous" region to have their bit set to
1563       // 1 since, due to the region chunking in the parallel region
1564       // iteration, a "continues humongous" region might be visited
1565       // before its associated "starts humongous".
1566       return false;
1567     }
1568 
1569     int failures = 0;
1570 
1571     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1572     // this region and set the corresponding bits in the expected region
1573     // and card bitmaps.
1574     bool res = _calc_cl.doHeapRegion(hr);
1575     assert(res == false, "should be continuing");
1576 
1577     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1578                     Mutex::_no_safepoint_check_flag);
1579 
1580     // Verify the marked bytes for this region.
1581     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1582     size_t act_marked_bytes = hr->next_marked_bytes();
1583 
1584     // We're not OK if expected marked bytes > actual marked bytes. It means
1585     // we have missed accounting some objects during the actual marking.
1586     if (exp_marked_bytes > act_marked_bytes) {
1587       if (_verbose) {
1588         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1589                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1590                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1591       }
1592       failures += 1;
1593     }
1594 
1595     // Verify the bit, for this region, in the actual and expected
1596     // (which was just calculated) region bit maps.
1597     // We're not OK if the bit in the calculated expected region
1598     // bitmap is set and the bit in the actual region bitmap is not.
1599     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1600 
1601     bool expected = _exp_region_bm->at(index);
1602     bool actual = _region_bm->at(index);
1603     if (expected && !actual) {
1604       if (_verbose) {
1605         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1606                                "expected: %s, actual: %s",
1607                                hr->hrm_index(),
1608                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1609       }
1610       failures += 1;
1611     }
1612 
1613     // Verify that the card bit maps for the cards spanned by the current
1614     // region match. We have an error if we have a set bit in the expected
1615     // bit map and the corresponding bit in the actual bitmap is not set.
1616 
1617     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1618     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1619 
1620     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1621       expected = _exp_card_bm->at(i);
1622       actual = _card_bm->at(i);
1623 
1624       if (expected && !actual) {
1625         if (_verbose) {
1626           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1627                                  "expected: %s, actual: %s",
1628                                  hr->hrm_index(), i,
1629                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1630         }
1631         failures += 1;
1632       }
1633     }
1634 
1635     if (failures > 0 && _verbose)  {
1636       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1637                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1638                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1639                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1640     }
1641 
1642     _failures += failures;
1643 
1644     // We could stop iteration over the heap when we
1645     // find the first violating region by returning true.
1646     return false;
1647   }
1648 };
1649 
1650 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1651 protected:
1652   G1CollectedHeap* _g1h;
1653   ConcurrentMark* _cm;
1654   BitMap* _actual_region_bm;
1655   BitMap* _actual_card_bm;
1656 
1657   uint    _n_workers;
1658 
1659   BitMap* _expected_region_bm;
1660   BitMap* _expected_card_bm;
1661 
1662   int  _failures;
1663   bool _verbose;
1664 
1665   HeapRegionClaimer _hrclaimer;
1666 
1667 public:
1668   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1669                             BitMap* region_bm, BitMap* card_bm,
1670                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1671     : AbstractGangTask("G1 verify final counting"),
1672       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1673       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1674       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1675       _failures(0), _verbose(false),
1676       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1677     assert(VerifyDuringGC, "don't call this otherwise");
1678     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1679     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1680 
1681     _verbose = _cm->verbose_medium();
1682   }
1683 
1684   void work(uint worker_id) {
1685     assert(worker_id < _n_workers, "invariant");
1686 
1687     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1688                                             _actual_region_bm, _actual_card_bm,
1689                                             _expected_region_bm,
1690                                             _expected_card_bm,
1691                                             _verbose);
1692 
1693     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1694 
1695     Atomic::add(verify_cl.failures(), &_failures);
1696   }
1697 
1698   int failures() const { return _failures; }
1699 };
1700 
1701 // Closure that finalizes the liveness counting data.
1702 // Used during the cleanup pause.
1703 // Sets the bits corresponding to the interval [NTAMS, top]
1704 // (which contains the implicitly live objects) in the
1705 // card liveness bitmap. Also sets the bit for each region,
1706 // containing live data, in the region liveness bitmap.
1707 
1708 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1709  public:
1710   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1711                               BitMap* region_bm,
1712                               BitMap* card_bm) :
1713     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1714 
1715   bool doHeapRegion(HeapRegion* hr) {
1716 
1717     if (hr->is_continues_humongous()) {
1718       // We will ignore these here and process them when their
1719       // associated "starts humongous" region is processed (see
1720       // set_bit_for_heap_region()). Note that we cannot rely on their
1721       // associated "starts humongous" region to have their bit set to
1722       // 1 since, due to the region chunking in the parallel region
1723       // iteration, a "continues humongous" region might be visited
1724       // before its associated "starts humongous".
1725       return false;
1726     }
1727 
1728     HeapWord* ntams = hr->next_top_at_mark_start();
1729     HeapWord* top   = hr->top();
1730 
1731     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1732 
1733     // Mark the allocated-since-marking portion...
1734     if (ntams < top) {
1735       // This definitely means the region has live objects.
1736       set_bit_for_region(hr);
1737 
1738       // Now set the bits in the card bitmap for [ntams, top)
1739       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1740       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1741 
1742       // Note: if we're looking at the last region in heap - top
1743       // could be actually just beyond the end of the heap; end_idx
1744       // will then correspond to a (non-existent) card that is also
1745       // just beyond the heap.
1746       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1747         // end of object is not card aligned - increment to cover
1748         // all the cards spanned by the object
1749         end_idx += 1;
1750       }
1751 
1752       assert(end_idx <= _card_bm->size(),
1753              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1754                      end_idx, _card_bm->size()));
1755       assert(start_idx < _card_bm->size(),
1756              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1757                      start_idx, _card_bm->size()));
1758 
1759       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1760     }
1761 
1762     // Set the bit for the region if it contains live data
1763     if (hr->next_marked_bytes() > 0) {
1764       set_bit_for_region(hr);
1765     }
1766 
1767     return false;
1768   }
1769 };
1770 
1771 class G1ParFinalCountTask: public AbstractGangTask {
1772 protected:
1773   G1CollectedHeap* _g1h;
1774   ConcurrentMark* _cm;
1775   BitMap* _actual_region_bm;
1776   BitMap* _actual_card_bm;
1777 
1778   uint    _n_workers;
1779   HeapRegionClaimer _hrclaimer;
1780 
1781 public:
1782   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1783     : AbstractGangTask("G1 final counting"),
1784       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1785       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1786       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1787   }
1788 
1789   void work(uint worker_id) {
1790     assert(worker_id < _n_workers, "invariant");
1791 
1792     FinalCountDataUpdateClosure final_update_cl(_g1h,
1793                                                 _actual_region_bm,
1794                                                 _actual_card_bm);
1795 
1796     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1797   }
1798 };
1799 
1800 class G1ParNoteEndTask;
1801 
1802 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1803   G1CollectedHeap* _g1;
1804   size_t _max_live_bytes;
1805   uint _regions_claimed;
1806   size_t _freed_bytes;
1807   FreeRegionList* _local_cleanup_list;
1808   HeapRegionSetCount _old_regions_removed;
1809   HeapRegionSetCount _humongous_regions_removed;
1810   HRRSCleanupTask* _hrrs_cleanup_task;
1811   double _claimed_region_time;
1812   double _max_region_time;
1813 
1814 public:
1815   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1816                              FreeRegionList* local_cleanup_list,
1817                              HRRSCleanupTask* hrrs_cleanup_task) :
1818     _g1(g1),
1819     _max_live_bytes(0), _regions_claimed(0),
1820     _freed_bytes(0),
1821     _claimed_region_time(0.0), _max_region_time(0.0),
1822     _local_cleanup_list(local_cleanup_list),
1823     _old_regions_removed(),
1824     _humongous_regions_removed(),
1825     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1826 
1827   size_t freed_bytes() { return _freed_bytes; }
1828   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1829   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1830 
1831   bool doHeapRegion(HeapRegion *hr) {
1832     if (hr->is_continues_humongous()) {
1833       return false;
1834     }
1835     // We use a claim value of zero here because all regions
1836     // were claimed with value 1 in the FinalCount task.
1837     _g1->reset_gc_time_stamps(hr);
1838     double start = os::elapsedTime();
1839     _regions_claimed++;
1840     hr->note_end_of_marking();
1841     _max_live_bytes += hr->max_live_bytes();
1842 
1843     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1844       _freed_bytes += hr->used();
1845       hr->set_containing_set(NULL);
1846       if (hr->is_humongous()) {
1847         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1848         _humongous_regions_removed.increment(1u, hr->capacity());
1849         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1850       } else {
1851         _old_regions_removed.increment(1u, hr->capacity());
1852         _g1->free_region(hr, _local_cleanup_list, true);
1853       }
1854     } else {
1855       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1856     }
1857 
1858     double region_time = (os::elapsedTime() - start);
1859     _claimed_region_time += region_time;
1860     if (region_time > _max_region_time) {
1861       _max_region_time = region_time;
1862     }
1863     return false;
1864   }
1865 
1866   size_t max_live_bytes() { return _max_live_bytes; }
1867   uint regions_claimed() { return _regions_claimed; }
1868   double claimed_region_time_sec() { return _claimed_region_time; }
1869   double max_region_time_sec() { return _max_region_time; }
1870 };
1871 
1872 class G1ParNoteEndTask: public AbstractGangTask {
1873   friend class G1NoteEndOfConcMarkClosure;
1874 
1875 protected:
1876   G1CollectedHeap* _g1h;
1877   size_t _max_live_bytes;
1878   size_t _freed_bytes;
1879   FreeRegionList* _cleanup_list;
1880   HeapRegionClaimer _hrclaimer;
1881 
1882 public:
1883   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1884       AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1885   }
1886 
1887   void work(uint worker_id) {
1888     double start = os::elapsedTime();
1889     FreeRegionList local_cleanup_list("Local Cleanup List");
1890     HRRSCleanupTask hrrs_cleanup_task;
1891     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1892                                            &hrrs_cleanup_task);
1893     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1894     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1895 
1896     // Now update the lists
1897     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1898     {
1899       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1900       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1901       _max_live_bytes += g1_note_end.max_live_bytes();
1902       _freed_bytes += g1_note_end.freed_bytes();
1903 
1904       // If we iterate over the global cleanup list at the end of
1905       // cleanup to do this printing we will not guarantee to only
1906       // generate output for the newly-reclaimed regions (the list
1907       // might not be empty at the beginning of cleanup; we might
1908       // still be working on its previous contents). So we do the
1909       // printing here, before we append the new regions to the global
1910       // cleanup list.
1911 
1912       G1HRPrinter* hr_printer = _g1h->hr_printer();
1913       if (hr_printer->is_active()) {
1914         FreeRegionListIterator iter(&local_cleanup_list);
1915         while (iter.more_available()) {
1916           HeapRegion* hr = iter.get_next();
1917           hr_printer->cleanup(hr);
1918         }
1919       }
1920 
1921       _cleanup_list->add_ordered(&local_cleanup_list);
1922       assert(local_cleanup_list.is_empty(), "post-condition");
1923 
1924       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1925     }
1926   }
1927   size_t max_live_bytes() { return _max_live_bytes; }
1928   size_t freed_bytes() { return _freed_bytes; }
1929 };
1930 
1931 class G1ParScrubRemSetTask: public AbstractGangTask {
1932 protected:
1933   G1RemSet* _g1rs;
1934   BitMap* _region_bm;
1935   BitMap* _card_bm;
1936   HeapRegionClaimer _hrclaimer;
1937 
1938 public:
1939   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1940       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1941   }
1942 
1943   void work(uint worker_id) {
1944     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1945   }
1946 
1947 };
1948 
1949 void ConcurrentMark::cleanup() {
1950   // world is stopped at this checkpoint
1951   assert(SafepointSynchronize::is_at_safepoint(),
1952          "world should be stopped");
1953   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1954 
1955   // If a full collection has happened, we shouldn't do this.
1956   if (has_aborted()) {
1957     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1958     return;
1959   }
1960 
1961   g1h->verify_region_sets_optional();
1962 
1963   if (VerifyDuringGC) {
1964     HandleMark hm;  // handle scope
1965     Universe::heap()->prepare_for_verify();
1966     Universe::verify(VerifyOption_G1UsePrevMarking,
1967                      " VerifyDuringGC:(before)");
1968   }
1969   g1h->check_bitmaps("Cleanup Start");
1970 
1971   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1972   g1p->record_concurrent_mark_cleanup_start();
1973 
1974   double start = os::elapsedTime();
1975 
1976   HeapRegionRemSet::reset_for_cleanup_tasks();
1977 
1978   uint n_workers;
1979 
1980   // Do counting once more with the world stopped for good measure.
1981   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1982 
1983   g1h->set_par_threads();
1984   n_workers = g1h->n_par_threads();
1985   assert(g1h->n_par_threads() == n_workers,
1986          "Should not have been reset");
1987   g1h->workers()->run_task(&g1_par_count_task);
1988   // Done with the parallel phase so reset to 0.
1989   g1h->set_par_threads(0);
1990 
1991   if (VerifyDuringGC) {
1992     // Verify that the counting data accumulated during marking matches
1993     // that calculated by walking the marking bitmap.
1994 
1995     // Bitmaps to hold expected values
1996     BitMap expected_region_bm(_region_bm.size(), true);
1997     BitMap expected_card_bm(_card_bm.size(), true);
1998 
1999     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2000                                                  &_region_bm,
2001                                                  &_card_bm,
2002                                                  &expected_region_bm,
2003                                                  &expected_card_bm);
2004 
2005     g1h->set_par_threads((int)n_workers);
2006     g1h->workers()->run_task(&g1_par_verify_task);
2007     // Done with the parallel phase so reset to 0.
2008     g1h->set_par_threads(0);
2009 
2010     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2011   }
2012 
2013   size_t start_used_bytes = g1h->used();
2014   g1h->set_marking_complete();
2015 
2016   double count_end = os::elapsedTime();
2017   double this_final_counting_time = (count_end - start);
2018   _total_counting_time += this_final_counting_time;
2019 
2020   if (G1PrintRegionLivenessInfo) {
2021     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2022     _g1h->heap_region_iterate(&cl);
2023   }
2024 
2025   // Install newly created mark bitMap as "prev".
2026   swapMarkBitMaps();
2027 
2028   g1h->reset_gc_time_stamp();
2029 
2030   // Note end of marking in all heap regions.
2031   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
2032   g1h->set_par_threads((int)n_workers);
2033   g1h->workers()->run_task(&g1_par_note_end_task);
2034   g1h->set_par_threads(0);
2035   g1h->check_gc_time_stamps();
2036 
2037   if (!cleanup_list_is_empty()) {
2038     // The cleanup list is not empty, so we'll have to process it
2039     // concurrently. Notify anyone else that might be wanting free
2040     // regions that there will be more free regions coming soon.
2041     g1h->set_free_regions_coming();
2042   }
2043 
2044   // call below, since it affects the metric by which we sort the heap
2045   // regions.
2046   if (G1ScrubRemSets) {
2047     double rs_scrub_start = os::elapsedTime();
2048     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
2049     g1h->set_par_threads((int)n_workers);
2050     g1h->workers()->run_task(&g1_par_scrub_rs_task);
2051     g1h->set_par_threads(0);
2052 
2053     double rs_scrub_end = os::elapsedTime();
2054     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2055     _total_rs_scrub_time += this_rs_scrub_time;
2056   }
2057 
2058   // this will also free any regions totally full of garbage objects,
2059   // and sort the regions.
2060   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2061 
2062   // Statistics.
2063   double end = os::elapsedTime();
2064   _cleanup_times.add((end - start) * 1000.0);
2065 
2066   if (G1Log::fine()) {
2067     g1h->print_size_transition(gclog_or_tty,
2068                                start_used_bytes,
2069                                g1h->used(),
2070                                g1h->capacity());
2071   }
2072 
2073   // Clean up will have freed any regions completely full of garbage.
2074   // Update the soft reference policy with the new heap occupancy.
2075   Universe::update_heap_info_at_gc();
2076 
2077   if (VerifyDuringGC) {
2078     HandleMark hm;  // handle scope
2079     Universe::heap()->prepare_for_verify();
2080     Universe::verify(VerifyOption_G1UsePrevMarking,
2081                      " VerifyDuringGC:(after)");
2082   }
2083 
2084   g1h->check_bitmaps("Cleanup End");
2085 
2086   g1h->verify_region_sets_optional();
2087 
2088   // We need to make this be a "collection" so any collection pause that
2089   // races with it goes around and waits for completeCleanup to finish.
2090   g1h->increment_total_collections();
2091 
2092   // Clean out dead classes and update Metaspace sizes.
2093   if (ClassUnloadingWithConcurrentMark) {
2094     ClassLoaderDataGraph::purge();
2095   }
2096   MetaspaceGC::compute_new_size();
2097 
2098   // We reclaimed old regions so we should calculate the sizes to make
2099   // sure we update the old gen/space data.
2100   g1h->g1mm()->update_sizes();
2101 
2102   g1h->trace_heap_after_concurrent_cycle();
2103 }
2104 
2105 void ConcurrentMark::completeCleanup() {
2106   if (has_aborted()) return;
2107 
2108   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2109 
2110   _cleanup_list.verify_optional();
2111   FreeRegionList tmp_free_list("Tmp Free List");
2112 
2113   if (G1ConcRegionFreeingVerbose) {
2114     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2115                            "cleanup list has %u entries",
2116                            _cleanup_list.length());
2117   }
2118 
2119   // No one else should be accessing the _cleanup_list at this point,
2120   // so it is not necessary to take any locks
2121   while (!_cleanup_list.is_empty()) {
2122     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2123     assert(hr != NULL, "Got NULL from a non-empty list");
2124     hr->par_clear();
2125     tmp_free_list.add_ordered(hr);
2126 
2127     // Instead of adding one region at a time to the secondary_free_list,
2128     // we accumulate them in the local list and move them a few at a
2129     // time. This also cuts down on the number of notify_all() calls
2130     // we do during this process. We'll also append the local list when
2131     // _cleanup_list is empty (which means we just removed the last
2132     // region from the _cleanup_list).
2133     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2134         _cleanup_list.is_empty()) {
2135       if (G1ConcRegionFreeingVerbose) {
2136         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2137                                "appending %u entries to the secondary_free_list, "
2138                                "cleanup list still has %u entries",
2139                                tmp_free_list.length(),
2140                                _cleanup_list.length());
2141       }
2142 
2143       {
2144         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2145         g1h->secondary_free_list_add(&tmp_free_list);
2146         SecondaryFreeList_lock->notify_all();
2147       }
2148 
2149       if (G1StressConcRegionFreeing) {
2150         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2151           os::sleep(Thread::current(), (jlong) 1, false);
2152         }
2153       }
2154     }
2155   }
2156   assert(tmp_free_list.is_empty(), "post-condition");
2157 }
2158 
2159 // Supporting Object and Oop closures for reference discovery
2160 // and processing in during marking
2161 
2162 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2163   HeapWord* addr = (HeapWord*)obj;
2164   return addr != NULL &&
2165          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2166 }
2167 
2168 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2169 // Uses the CMTask associated with a worker thread (for serial reference
2170 // processing the CMTask for worker 0 is used) to preserve (mark) and
2171 // trace referent objects.
2172 //
2173 // Using the CMTask and embedded local queues avoids having the worker
2174 // threads operating on the global mark stack. This reduces the risk
2175 // of overflowing the stack - which we would rather avoid at this late
2176 // state. Also using the tasks' local queues removes the potential
2177 // of the workers interfering with each other that could occur if
2178 // operating on the global stack.
2179 
2180 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2181   ConcurrentMark* _cm;
2182   CMTask*         _task;
2183   int             _ref_counter_limit;
2184   int             _ref_counter;
2185   bool            _is_serial;
2186  public:
2187   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2188     _cm(cm), _task(task), _is_serial(is_serial),
2189     _ref_counter_limit(G1RefProcDrainInterval) {
2190     assert(_ref_counter_limit > 0, "sanity");
2191     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2192     _ref_counter = _ref_counter_limit;
2193   }
2194 
2195   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2196   virtual void do_oop(      oop* p) { do_oop_work(p); }
2197 
2198   template <class T> void do_oop_work(T* p) {
2199     if (!_cm->has_overflown()) {
2200       oop obj = oopDesc::load_decode_heap_oop(p);
2201       if (_cm->verbose_high()) {
2202         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2203                                "*"PTR_FORMAT" = "PTR_FORMAT,
2204                                _task->worker_id(), p2i(p), p2i((void*) obj));
2205       }
2206 
2207       _task->deal_with_reference(obj);
2208       _ref_counter--;
2209 
2210       if (_ref_counter == 0) {
2211         // We have dealt with _ref_counter_limit references, pushing them
2212         // and objects reachable from them on to the local stack (and
2213         // possibly the global stack). Call CMTask::do_marking_step() to
2214         // process these entries.
2215         //
2216         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2217         // there's nothing more to do (i.e. we're done with the entries that
2218         // were pushed as a result of the CMTask::deal_with_reference() calls
2219         // above) or we overflow.
2220         //
2221         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2222         // flag while there may still be some work to do. (See the comment at
2223         // the beginning of CMTask::do_marking_step() for those conditions -
2224         // one of which is reaching the specified time target.) It is only
2225         // when CMTask::do_marking_step() returns without setting the
2226         // has_aborted() flag that the marking step has completed.
2227         do {
2228           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2229           _task->do_marking_step(mark_step_duration_ms,
2230                                  false      /* do_termination */,
2231                                  _is_serial);
2232         } while (_task->has_aborted() && !_cm->has_overflown());
2233         _ref_counter = _ref_counter_limit;
2234       }
2235     } else {
2236       if (_cm->verbose_high()) {
2237          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2238       }
2239     }
2240   }
2241 };
2242 
2243 // 'Drain' oop closure used by both serial and parallel reference processing.
2244 // Uses the CMTask associated with a given worker thread (for serial
2245 // reference processing the CMtask for worker 0 is used). Calls the
2246 // do_marking_step routine, with an unbelievably large timeout value,
2247 // to drain the marking data structures of the remaining entries
2248 // added by the 'keep alive' oop closure above.
2249 
2250 class G1CMDrainMarkingStackClosure: public VoidClosure {
2251   ConcurrentMark* _cm;
2252   CMTask*         _task;
2253   bool            _is_serial;
2254  public:
2255   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2256     _cm(cm), _task(task), _is_serial(is_serial) {
2257     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2258   }
2259 
2260   void do_void() {
2261     do {
2262       if (_cm->verbose_high()) {
2263         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2264                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2265       }
2266 
2267       // We call CMTask::do_marking_step() to completely drain the local
2268       // and global marking stacks of entries pushed by the 'keep alive'
2269       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2270       //
2271       // CMTask::do_marking_step() is called in a loop, which we'll exit
2272       // if there's nothing more to do (i.e. we've completely drained the
2273       // entries that were pushed as a a result of applying the 'keep alive'
2274       // closure to the entries on the discovered ref lists) or we overflow
2275       // the global marking stack.
2276       //
2277       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2278       // flag while there may still be some work to do. (See the comment at
2279       // the beginning of CMTask::do_marking_step() for those conditions -
2280       // one of which is reaching the specified time target.) It is only
2281       // when CMTask::do_marking_step() returns without setting the
2282       // has_aborted() flag that the marking step has completed.
2283 
2284       _task->do_marking_step(1000000000.0 /* something very large */,
2285                              true         /* do_termination */,
2286                              _is_serial);
2287     } while (_task->has_aborted() && !_cm->has_overflown());
2288   }
2289 };
2290 
2291 // Implementation of AbstractRefProcTaskExecutor for parallel
2292 // reference processing at the end of G1 concurrent marking
2293 
2294 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2295 private:
2296   G1CollectedHeap* _g1h;
2297   ConcurrentMark*  _cm;
2298   WorkGang*        _workers;
2299   int              _active_workers;
2300 
2301 public:
2302   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2303                         ConcurrentMark* cm,
2304                         WorkGang* workers,
2305                         int n_workers) :
2306     _g1h(g1h), _cm(cm),
2307     _workers(workers), _active_workers(n_workers) { }
2308 
2309   // Executes the given task using concurrent marking worker threads.
2310   virtual void execute(ProcessTask& task);
2311   virtual void execute(EnqueueTask& task);
2312 };
2313 
2314 class G1CMRefProcTaskProxy: public AbstractGangTask {
2315   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2316   ProcessTask&     _proc_task;
2317   G1CollectedHeap* _g1h;
2318   ConcurrentMark*  _cm;
2319 
2320 public:
2321   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2322                      G1CollectedHeap* g1h,
2323                      ConcurrentMark* cm) :
2324     AbstractGangTask("Process reference objects in parallel"),
2325     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2326     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2327     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2328   }
2329 
2330   virtual void work(uint worker_id) {
2331     ResourceMark rm;
2332     HandleMark hm;
2333     CMTask* task = _cm->task(worker_id);
2334     G1CMIsAliveClosure g1_is_alive(_g1h);
2335     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2336     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2337 
2338     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2339   }
2340 };
2341 
2342 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2343   assert(_workers != NULL, "Need parallel worker threads.");
2344   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2345 
2346   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2347 
2348   // We need to reset the concurrency level before each
2349   // proxy task execution, so that the termination protocol
2350   // and overflow handling in CMTask::do_marking_step() knows
2351   // how many workers to wait for.
2352   _cm->set_concurrency(_active_workers);
2353   _g1h->set_par_threads(_active_workers);
2354   _workers->run_task(&proc_task_proxy);
2355   _g1h->set_par_threads(0);
2356 }
2357 
2358 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2359   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2360   EnqueueTask& _enq_task;
2361 
2362 public:
2363   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2364     AbstractGangTask("Enqueue reference objects in parallel"),
2365     _enq_task(enq_task) { }
2366 
2367   virtual void work(uint worker_id) {
2368     _enq_task.work(worker_id);
2369   }
2370 };
2371 
2372 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2373   assert(_workers != NULL, "Need parallel worker threads.");
2374   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2375 
2376   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2377 
2378   // Not strictly necessary but...
2379   //
2380   // We need to reset the concurrency level before each
2381   // proxy task execution, so that the termination protocol
2382   // and overflow handling in CMTask::do_marking_step() knows
2383   // how many workers to wait for.
2384   _cm->set_concurrency(_active_workers);
2385   _g1h->set_par_threads(_active_workers);
2386   _workers->run_task(&enq_task_proxy);
2387   _g1h->set_par_threads(0);
2388 }
2389 
2390 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2391   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2392 }
2393 
2394 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2395   if (has_overflown()) {
2396     // Skip processing the discovered references if we have
2397     // overflown the global marking stack. Reference objects
2398     // only get discovered once so it is OK to not
2399     // de-populate the discovered reference lists. We could have,
2400     // but the only benefit would be that, when marking restarts,
2401     // less reference objects are discovered.
2402     return;
2403   }
2404 
2405   ResourceMark rm;
2406   HandleMark   hm;
2407 
2408   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2409 
2410   // Is alive closure.
2411   G1CMIsAliveClosure g1_is_alive(g1h);
2412 
2413   // Inner scope to exclude the cleaning of the string and symbol
2414   // tables from the displayed time.
2415   {
2416     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2417 
2418     ReferenceProcessor* rp = g1h->ref_processor_cm();
2419 
2420     // See the comment in G1CollectedHeap::ref_processing_init()
2421     // about how reference processing currently works in G1.
2422 
2423     // Set the soft reference policy
2424     rp->setup_policy(clear_all_soft_refs);
2425     assert(_markStack.isEmpty(), "mark stack should be empty");
2426 
2427     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2428     // in serial reference processing. Note these closures are also
2429     // used for serially processing (by the the current thread) the
2430     // JNI references during parallel reference processing.
2431     //
2432     // These closures do not need to synchronize with the worker
2433     // threads involved in parallel reference processing as these
2434     // instances are executed serially by the current thread (e.g.
2435     // reference processing is not multi-threaded and is thus
2436     // performed by the current thread instead of a gang worker).
2437     //
2438     // The gang tasks involved in parallel reference processing create
2439     // their own instances of these closures, which do their own
2440     // synchronization among themselves.
2441     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2442     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2443 
2444     // We need at least one active thread. If reference processing
2445     // is not multi-threaded we use the current (VMThread) thread,
2446     // otherwise we use the work gang from the G1CollectedHeap and
2447     // we utilize all the worker threads we can.
2448     bool processing_is_mt = rp->processing_is_mt();
2449     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2450     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2451 
2452     // Parallel processing task executor.
2453     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2454                                               g1h->workers(), active_workers);
2455     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2456 
2457     // Set the concurrency level. The phase was already set prior to
2458     // executing the remark task.
2459     set_concurrency(active_workers);
2460 
2461     // Set the degree of MT processing here.  If the discovery was done MT,
2462     // the number of threads involved during discovery could differ from
2463     // the number of active workers.  This is OK as long as the discovered
2464     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2465     rp->set_active_mt_degree(active_workers);
2466 
2467     // Process the weak references.
2468     const ReferenceProcessorStats& stats =
2469         rp->process_discovered_references(&g1_is_alive,
2470                                           &g1_keep_alive,
2471                                           &g1_drain_mark_stack,
2472                                           executor,
2473                                           g1h->gc_timer_cm(),
2474                                           concurrent_gc_id());
2475     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2476 
2477     // The do_oop work routines of the keep_alive and drain_marking_stack
2478     // oop closures will set the has_overflown flag if we overflow the
2479     // global marking stack.
2480 
2481     assert(_markStack.overflow() || _markStack.isEmpty(),
2482             "mark stack should be empty (unless it overflowed)");
2483 
2484     if (_markStack.overflow()) {
2485       // This should have been done already when we tried to push an
2486       // entry on to the global mark stack. But let's do it again.
2487       set_has_overflown();
2488     }
2489 
2490     assert(rp->num_q() == active_workers, "why not");
2491 
2492     rp->enqueue_discovered_references(executor);
2493 
2494     rp->verify_no_references_recorded();
2495     assert(!rp->discovery_enabled(), "Post condition");
2496   }
2497 
2498   if (has_overflown()) {
2499     // We can not trust g1_is_alive if the marking stack overflowed
2500     return;
2501   }
2502 
2503   assert(_markStack.isEmpty(), "Marking should have completed");
2504 
2505   // Unload Klasses, String, Symbols, Code Cache, etc.
2506   {
2507     G1CMTraceTime trace("Unloading", G1Log::finer());
2508 
2509     if (ClassUnloadingWithConcurrentMark) {
2510       bool purged_classes;
2511 
2512       {
2513         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2514         purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
2515       }
2516 
2517       {
2518         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2519         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2520       }
2521     }
2522 
2523     if (G1StringDedup::is_enabled()) {
2524       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2525       G1StringDedup::unlink(&g1_is_alive);
2526     }
2527   }
2528 }
2529 
2530 void ConcurrentMark::swapMarkBitMaps() {
2531   CMBitMapRO* temp = _prevMarkBitMap;
2532   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2533   _nextMarkBitMap  = (CMBitMap*)  temp;
2534 }
2535 
2536 class CMObjectClosure;
2537 
2538 // Closure for iterating over objects, currently only used for
2539 // processing SATB buffers.
2540 class CMObjectClosure : public ObjectClosure {
2541 private:
2542   CMTask* _task;
2543 
2544 public:
2545   void do_object(oop obj) {
2546     _task->deal_with_reference(obj);
2547   }
2548 
2549   CMObjectClosure(CMTask* task) : _task(task) { }
2550 };
2551 
2552 class G1RemarkThreadsClosure : public ThreadClosure {
2553   CMObjectClosure _cm_obj;
2554   G1CMOopClosure _cm_cl;
2555   MarkingCodeBlobClosure _code_cl;
2556   int _thread_parity;
2557 
2558  public:
2559   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2560     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2561     _thread_parity(SharedHeap::heap()->strong_roots_parity()) {}
2562 
2563   void do_thread(Thread* thread) {
2564     if (thread->is_Java_thread()) {
2565       if (thread->claim_oops_do(true, _thread_parity)) {
2566         JavaThread* jt = (JavaThread*)thread;
2567 
2568         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2569         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2570         // * Alive if on the stack of an executing method
2571         // * Weakly reachable otherwise
2572         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2573         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2574         jt->nmethods_do(&_code_cl);
2575 
2576         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2577       }
2578     } else if (thread->is_VM_thread()) {
2579       if (thread->claim_oops_do(true, _thread_parity)) {
2580         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2581       }
2582     }
2583   }
2584 };
2585 
2586 class CMRemarkTask: public AbstractGangTask {
2587 private:
2588   ConcurrentMark* _cm;
2589 public:
2590   void work(uint worker_id) {
2591     // Since all available tasks are actually started, we should
2592     // only proceed if we're supposed to be active.
2593     if (worker_id < _cm->active_tasks()) {
2594       CMTask* task = _cm->task(worker_id);
2595       task->record_start_time();
2596       {
2597         ResourceMark rm;
2598         HandleMark hm;
2599 
2600         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2601         Threads::threads_do(&threads_f);
2602       }
2603 
2604       do {
2605         task->do_marking_step(1000000000.0 /* something very large */,
2606                               true         /* do_termination       */,
2607                               false        /* is_serial            */);
2608       } while (task->has_aborted() && !_cm->has_overflown());
2609       // If we overflow, then we do not want to restart. We instead
2610       // want to abort remark and do concurrent marking again.
2611       task->record_end_time();
2612     }
2613   }
2614 
2615   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2616     AbstractGangTask("Par Remark"), _cm(cm) {
2617     _cm->terminator()->reset_for_reuse(active_workers);
2618   }
2619 };
2620 
2621 void ConcurrentMark::checkpointRootsFinalWork() {
2622   ResourceMark rm;
2623   HandleMark   hm;
2624   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2625 
2626   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2627 
2628   g1h->ensure_parsability(false);
2629 
2630   G1CollectedHeap::StrongRootsScope srs(g1h);
2631   // this is remark, so we'll use up all active threads
2632   uint active_workers = g1h->workers()->active_workers();
2633   if (active_workers == 0) {
2634     assert(active_workers > 0, "Should have been set earlier");
2635     active_workers = (uint) ParallelGCThreads;
2636     g1h->workers()->set_active_workers(active_workers);
2637   }
2638   set_concurrency_and_phase(active_workers, false /* concurrent */);
2639   // Leave _parallel_marking_threads at it's
2640   // value originally calculated in the ConcurrentMark
2641   // constructor and pass values of the active workers
2642   // through the gang in the task.
2643 
2644   CMRemarkTask remarkTask(this, active_workers);
2645   // We will start all available threads, even if we decide that the
2646   // active_workers will be fewer. The extra ones will just bail out
2647   // immediately.
2648   g1h->set_par_threads(active_workers);
2649   g1h->workers()->run_task(&remarkTask);
2650   g1h->set_par_threads(0);
2651 
2652   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2653   guarantee(has_overflown() ||
2654             satb_mq_set.completed_buffers_num() == 0,
2655             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2656                     BOOL_TO_STR(has_overflown()),
2657                     satb_mq_set.completed_buffers_num()));
2658 
2659   print_stats();
2660 }
2661 
2662 #ifndef PRODUCT
2663 
2664 class PrintReachableOopClosure: public OopClosure {
2665 private:
2666   G1CollectedHeap* _g1h;
2667   outputStream*    _out;
2668   VerifyOption     _vo;
2669   bool             _all;
2670 
2671 public:
2672   PrintReachableOopClosure(outputStream* out,
2673                            VerifyOption  vo,
2674                            bool          all) :
2675     _g1h(G1CollectedHeap::heap()),
2676     _out(out), _vo(vo), _all(all) { }
2677 
2678   void do_oop(narrowOop* p) { do_oop_work(p); }
2679   void do_oop(      oop* p) { do_oop_work(p); }
2680 
2681   template <class T> void do_oop_work(T* p) {
2682     oop         obj = oopDesc::load_decode_heap_oop(p);
2683     const char* str = NULL;
2684     const char* str2 = "";
2685 
2686     if (obj == NULL) {
2687       str = "";
2688     } else if (!_g1h->is_in_g1_reserved(obj)) {
2689       str = " O";
2690     } else {
2691       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2692       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2693       bool marked = _g1h->is_marked(obj, _vo);
2694 
2695       if (over_tams) {
2696         str = " >";
2697         if (marked) {
2698           str2 = " AND MARKED";
2699         }
2700       } else if (marked) {
2701         str = " M";
2702       } else {
2703         str = " NOT";
2704       }
2705     }
2706 
2707     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2708                    p2i(p), p2i((void*) obj), str, str2);
2709   }
2710 };
2711 
2712 class PrintReachableObjectClosure : public ObjectClosure {
2713 private:
2714   G1CollectedHeap* _g1h;
2715   outputStream*    _out;
2716   VerifyOption     _vo;
2717   bool             _all;
2718   HeapRegion*      _hr;
2719 
2720 public:
2721   PrintReachableObjectClosure(outputStream* out,
2722                               VerifyOption  vo,
2723                               bool          all,
2724                               HeapRegion*   hr) :
2725     _g1h(G1CollectedHeap::heap()),
2726     _out(out), _vo(vo), _all(all), _hr(hr) { }
2727 
2728   void do_object(oop o) {
2729     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2730     bool marked = _g1h->is_marked(o, _vo);
2731     bool print_it = _all || over_tams || marked;
2732 
2733     if (print_it) {
2734       _out->print_cr(" "PTR_FORMAT"%s",
2735                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2736       PrintReachableOopClosure oopCl(_out, _vo, _all);
2737       o->oop_iterate_no_header(&oopCl);
2738     }
2739   }
2740 };
2741 
2742 class PrintReachableRegionClosure : public HeapRegionClosure {
2743 private:
2744   G1CollectedHeap* _g1h;
2745   outputStream*    _out;
2746   VerifyOption     _vo;
2747   bool             _all;
2748 
2749 public:
2750   bool doHeapRegion(HeapRegion* hr) {
2751     HeapWord* b = hr->bottom();
2752     HeapWord* e = hr->end();
2753     HeapWord* t = hr->top();
2754     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2755     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2756                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2757     _out->cr();
2758 
2759     HeapWord* from = b;
2760     HeapWord* to   = t;
2761 
2762     if (to > from) {
2763       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2764       _out->cr();
2765       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2766       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2767       _out->cr();
2768     }
2769 
2770     return false;
2771   }
2772 
2773   PrintReachableRegionClosure(outputStream* out,
2774                               VerifyOption  vo,
2775                               bool          all) :
2776     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2777 };
2778 
2779 void ConcurrentMark::print_reachable(const char* str,
2780                                      VerifyOption vo,
2781                                      bool all) {
2782   gclog_or_tty->cr();
2783   gclog_or_tty->print_cr("== Doing heap dump... ");
2784 
2785   if (G1PrintReachableBaseFile == NULL) {
2786     gclog_or_tty->print_cr("  #### error: no base file defined");
2787     return;
2788   }
2789 
2790   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2791       (JVM_MAXPATHLEN - 1)) {
2792     gclog_or_tty->print_cr("  #### error: file name too long");
2793     return;
2794   }
2795 
2796   char file_name[JVM_MAXPATHLEN];
2797   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2798   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2799 
2800   fileStream fout(file_name);
2801   if (!fout.is_open()) {
2802     gclog_or_tty->print_cr("  #### error: could not open file");
2803     return;
2804   }
2805 
2806   outputStream* out = &fout;
2807   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2808   out->cr();
2809 
2810   out->print_cr("--- ITERATING OVER REGIONS");
2811   out->cr();
2812   PrintReachableRegionClosure rcl(out, vo, all);
2813   _g1h->heap_region_iterate(&rcl);
2814   out->cr();
2815 
2816   gclog_or_tty->print_cr("  done");
2817   gclog_or_tty->flush();
2818 }
2819 
2820 #endif // PRODUCT
2821 
2822 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2823   // Note we are overriding the read-only view of the prev map here, via
2824   // the cast.
2825   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2826 }
2827 
2828 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2829   _nextMarkBitMap->clearRange(mr);
2830 }
2831 
2832 HeapRegion*
2833 ConcurrentMark::claim_region(uint worker_id) {
2834   // "checkpoint" the finger
2835   HeapWord* finger = _finger;
2836 
2837   // _heap_end will not change underneath our feet; it only changes at
2838   // yield points.
2839   while (finger < _heap_end) {
2840     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2841 
2842     // Note on how this code handles humongous regions. In the
2843     // normal case the finger will reach the start of a "starts
2844     // humongous" (SH) region. Its end will either be the end of the
2845     // last "continues humongous" (CH) region in the sequence, or the
2846     // standard end of the SH region (if the SH is the only region in
2847     // the sequence). That way claim_region() will skip over the CH
2848     // regions. However, there is a subtle race between a CM thread
2849     // executing this method and a mutator thread doing a humongous
2850     // object allocation. The two are not mutually exclusive as the CM
2851     // thread does not need to hold the Heap_lock when it gets
2852     // here. So there is a chance that claim_region() will come across
2853     // a free region that's in the progress of becoming a SH or a CH
2854     // region. In the former case, it will either
2855     //   a) Miss the update to the region's end, in which case it will
2856     //      visit every subsequent CH region, will find their bitmaps
2857     //      empty, and do nothing, or
2858     //   b) Will observe the update of the region's end (in which case
2859     //      it will skip the subsequent CH regions).
2860     // If it comes across a region that suddenly becomes CH, the
2861     // scenario will be similar to b). So, the race between
2862     // claim_region() and a humongous object allocation might force us
2863     // to do a bit of unnecessary work (due to some unnecessary bitmap
2864     // iterations) but it should not introduce and correctness issues.
2865     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2866 
2867     // Above heap_region_containing_raw may return NULL as we always scan claim
2868     // until the end of the heap. In this case, just jump to the next region.
2869     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2870 
2871     // Is the gap between reading the finger and doing the CAS too long?
2872     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2873     if (res == finger && curr_region != NULL) {
2874       // we succeeded
2875       HeapWord*   bottom        = curr_region->bottom();
2876       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2877 
2878       if (verbose_low()) {
2879         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2880                                "["PTR_FORMAT", "PTR_FORMAT"), "
2881                                "limit = "PTR_FORMAT,
2882                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2883       }
2884 
2885       // notice that _finger == end cannot be guaranteed here since,
2886       // someone else might have moved the finger even further
2887       assert(_finger >= end, "the finger should have moved forward");
2888 
2889       if (verbose_low()) {
2890         gclog_or_tty->print_cr("[%u] we were successful with region = "
2891                                PTR_FORMAT, worker_id, p2i(curr_region));
2892       }
2893 
2894       if (limit > bottom) {
2895         if (verbose_low()) {
2896           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2897                                  "returning it ", worker_id, p2i(curr_region));
2898         }
2899         return curr_region;
2900       } else {
2901         assert(limit == bottom,
2902                "the region limit should be at bottom");
2903         if (verbose_low()) {
2904           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2905                                  "returning NULL", worker_id, p2i(curr_region));
2906         }
2907         // we return NULL and the caller should try calling
2908         // claim_region() again.
2909         return NULL;
2910       }
2911     } else {
2912       assert(_finger > finger, "the finger should have moved forward");
2913       if (verbose_low()) {
2914         if (curr_region == NULL) {
2915           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2916                                  "global finger = "PTR_FORMAT", "
2917                                  "our finger = "PTR_FORMAT,
2918                                  worker_id, p2i(_finger), p2i(finger));
2919         } else {
2920           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2921                                  "global finger = "PTR_FORMAT", "
2922                                  "our finger = "PTR_FORMAT,
2923                                  worker_id, p2i(_finger), p2i(finger));
2924         }
2925       }
2926 
2927       // read it again
2928       finger = _finger;
2929     }
2930   }
2931 
2932   return NULL;
2933 }
2934 
2935 #ifndef PRODUCT
2936 enum VerifyNoCSetOopsPhase {
2937   VerifyNoCSetOopsStack,
2938   VerifyNoCSetOopsQueues,
2939   VerifyNoCSetOopsSATBCompleted,
2940   VerifyNoCSetOopsSATBThread
2941 };
2942 
2943 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2944 private:
2945   G1CollectedHeap* _g1h;
2946   VerifyNoCSetOopsPhase _phase;
2947   int _info;
2948 
2949   const char* phase_str() {
2950     switch (_phase) {
2951     case VerifyNoCSetOopsStack:         return "Stack";
2952     case VerifyNoCSetOopsQueues:        return "Queue";
2953     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2954     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2955     default:                            ShouldNotReachHere();
2956     }
2957     return NULL;
2958   }
2959 
2960   void do_object_work(oop obj) {
2961     guarantee(!_g1h->obj_in_cs(obj),
2962               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2963                       p2i((void*) obj), phase_str(), _info));
2964   }
2965 
2966 public:
2967   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2968 
2969   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2970     _phase = phase;
2971     _info = info;
2972   }
2973 
2974   virtual void do_oop(oop* p) {
2975     oop obj = oopDesc::load_decode_heap_oop(p);
2976     do_object_work(obj);
2977   }
2978 
2979   virtual void do_oop(narrowOop* p) {
2980     // We should not come across narrow oops while scanning marking
2981     // stacks and SATB buffers.
2982     ShouldNotReachHere();
2983   }
2984 
2985   virtual void do_object(oop obj) {
2986     do_object_work(obj);
2987   }
2988 };
2989 
2990 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2991                                          bool verify_enqueued_buffers,
2992                                          bool verify_thread_buffers,
2993                                          bool verify_fingers) {
2994   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2995   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2996     return;
2997   }
2998 
2999   VerifyNoCSetOopsClosure cl;
3000 
3001   if (verify_stacks) {
3002     // Verify entries on the global mark stack
3003     cl.set_phase(VerifyNoCSetOopsStack);
3004     _markStack.oops_do(&cl);
3005 
3006     // Verify entries on the task queues
3007     for (uint i = 0; i < _max_worker_id; i += 1) {
3008       cl.set_phase(VerifyNoCSetOopsQueues, i);
3009       CMTaskQueue* queue = _task_queues->queue(i);
3010       queue->oops_do(&cl);
3011     }
3012   }
3013 
3014   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3015 
3016   // Verify entries on the enqueued SATB buffers
3017   if (verify_enqueued_buffers) {
3018     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3019     satb_qs.iterate_completed_buffers_read_only(&cl);
3020   }
3021 
3022   // Verify entries on the per-thread SATB buffers
3023   if (verify_thread_buffers) {
3024     cl.set_phase(VerifyNoCSetOopsSATBThread);
3025     satb_qs.iterate_thread_buffers_read_only(&cl);
3026   }
3027 
3028   if (verify_fingers) {
3029     // Verify the global finger
3030     HeapWord* global_finger = finger();
3031     if (global_finger != NULL && global_finger < _heap_end) {
3032       // The global finger always points to a heap region boundary. We
3033       // use heap_region_containing_raw() to get the containing region
3034       // given that the global finger could be pointing to a free region
3035       // which subsequently becomes continues humongous. If that
3036       // happens, heap_region_containing() will return the bottom of the
3037       // corresponding starts humongous region and the check below will
3038       // not hold any more.
3039       // Since we always iterate over all regions, we might get a NULL HeapRegion
3040       // here.
3041       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3042       guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3043                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3044                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3045     }
3046 
3047     // Verify the task fingers
3048     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3049     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3050       CMTask* task = _tasks[i];
3051       HeapWord* task_finger = task->finger();
3052       if (task_finger != NULL && task_finger < _heap_end) {
3053         // See above note on the global finger verification.
3054         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3055         guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3056                   !task_hr->in_collection_set(),
3057                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3058                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3059       }
3060     }
3061   }
3062 }
3063 #endif // PRODUCT
3064 
3065 // Aggregate the counting data that was constructed concurrently
3066 // with marking.
3067 class AggregateCountDataHRClosure: public HeapRegionClosure {
3068   G1CollectedHeap* _g1h;
3069   ConcurrentMark* _cm;
3070   CardTableModRefBS* _ct_bs;
3071   BitMap* _cm_card_bm;
3072   uint _max_worker_id;
3073 
3074  public:
3075   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3076                               BitMap* cm_card_bm,
3077                               uint max_worker_id) :
3078     _g1h(g1h), _cm(g1h->concurrent_mark()),
3079     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3080     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3081 
3082   bool doHeapRegion(HeapRegion* hr) {
3083     if (hr->is_continues_humongous()) {
3084       // We will ignore these here and process them when their
3085       // associated "starts humongous" region is processed.
3086       // Note that we cannot rely on their associated
3087       // "starts humongous" region to have their bit set to 1
3088       // since, due to the region chunking in the parallel region
3089       // iteration, a "continues humongous" region might be visited
3090       // before its associated "starts humongous".
3091       return false;
3092     }
3093 
3094     HeapWord* start = hr->bottom();
3095     HeapWord* limit = hr->next_top_at_mark_start();
3096     HeapWord* end = hr->end();
3097 
3098     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3099            err_msg("Preconditions not met - "
3100                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3101                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3102                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3103 
3104     assert(hr->next_marked_bytes() == 0, "Precondition");
3105 
3106     if (start == limit) {
3107       // NTAMS of this region has not been set so nothing to do.
3108       return false;
3109     }
3110 
3111     // 'start' should be in the heap.
3112     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3113     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3114     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3115 
3116     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3117     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3118     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3119 
3120     // If ntams is not card aligned then we bump card bitmap index
3121     // for limit so that we get the all the cards spanned by
3122     // the object ending at ntams.
3123     // Note: if this is the last region in the heap then ntams
3124     // could be actually just beyond the end of the the heap;
3125     // limit_idx will then  correspond to a (non-existent) card
3126     // that is also outside the heap.
3127     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3128       limit_idx += 1;
3129     }
3130 
3131     assert(limit_idx <= end_idx, "or else use atomics");
3132 
3133     // Aggregate the "stripe" in the count data associated with hr.
3134     uint hrm_index = hr->hrm_index();
3135     size_t marked_bytes = 0;
3136 
3137     for (uint i = 0; i < _max_worker_id; i += 1) {
3138       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3139       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3140 
3141       // Fetch the marked_bytes in this region for task i and
3142       // add it to the running total for this region.
3143       marked_bytes += marked_bytes_array[hrm_index];
3144 
3145       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3146       // into the global card bitmap.
3147       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3148 
3149       while (scan_idx < limit_idx) {
3150         assert(task_card_bm->at(scan_idx) == true, "should be");
3151         _cm_card_bm->set_bit(scan_idx);
3152         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3153 
3154         // BitMap::get_next_one_offset() can handle the case when
3155         // its left_offset parameter is greater than its right_offset
3156         // parameter. It does, however, have an early exit if
3157         // left_offset == right_offset. So let's limit the value
3158         // passed in for left offset here.
3159         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3160         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3161       }
3162     }
3163 
3164     // Update the marked bytes for this region.
3165     hr->add_to_marked_bytes(marked_bytes);
3166 
3167     // Next heap region
3168     return false;
3169   }
3170 };
3171 
3172 class G1AggregateCountDataTask: public AbstractGangTask {
3173 protected:
3174   G1CollectedHeap* _g1h;
3175   ConcurrentMark* _cm;
3176   BitMap* _cm_card_bm;
3177   uint _max_worker_id;
3178   int _active_workers;
3179   HeapRegionClaimer _hrclaimer;
3180 
3181 public:
3182   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3183                            ConcurrentMark* cm,
3184                            BitMap* cm_card_bm,
3185                            uint max_worker_id,
3186                            int n_workers) :
3187       AbstractGangTask("Count Aggregation"),
3188       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3189       _max_worker_id(max_worker_id),
3190       _active_workers(n_workers),
3191       _hrclaimer(_active_workers) {
3192   }
3193 
3194   void work(uint worker_id) {
3195     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3196 
3197     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
3198   }
3199 };
3200 
3201 
3202 void ConcurrentMark::aggregate_count_data() {
3203   int n_workers = _g1h->workers()->active_workers();
3204 
3205   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3206                                            _max_worker_id, n_workers);
3207 
3208   _g1h->set_par_threads(n_workers);
3209   _g1h->workers()->run_task(&g1_par_agg_task);
3210   _g1h->set_par_threads(0);
3211   _g1h->allocation_context_stats().update_at_remark();
3212 }
3213 
3214 // Clear the per-worker arrays used to store the per-region counting data
3215 void ConcurrentMark::clear_all_count_data() {
3216   // Clear the global card bitmap - it will be filled during
3217   // liveness count aggregation (during remark) and the
3218   // final counting task.
3219   _card_bm.clear();
3220 
3221   // Clear the global region bitmap - it will be filled as part
3222   // of the final counting task.
3223   _region_bm.clear();
3224 
3225   uint max_regions = _g1h->max_regions();
3226   assert(_max_worker_id > 0, "uninitialized");
3227 
3228   for (uint i = 0; i < _max_worker_id; i += 1) {
3229     BitMap* task_card_bm = count_card_bitmap_for(i);
3230     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3231 
3232     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3233     assert(marked_bytes_array != NULL, "uninitialized");
3234 
3235     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3236     task_card_bm->clear();
3237   }
3238 }
3239 
3240 void ConcurrentMark::print_stats() {
3241   if (verbose_stats()) {
3242     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3243     for (size_t i = 0; i < _active_tasks; ++i) {
3244       _tasks[i]->print_stats();
3245       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3246     }
3247   }
3248 }
3249 
3250 // abandon current marking iteration due to a Full GC
3251 void ConcurrentMark::abort() {
3252   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3253   // concurrent bitmap clearing.
3254   _nextMarkBitMap->clearAll();
3255 
3256   // Note we cannot clear the previous marking bitmap here
3257   // since VerifyDuringGC verifies the objects marked during
3258   // a full GC against the previous bitmap.
3259 
3260   // Clear the liveness counting data
3261   clear_all_count_data();
3262   // Empty mark stack
3263   reset_marking_state();
3264   for (uint i = 0; i < _max_worker_id; ++i) {
3265     _tasks[i]->clear_region_fields();
3266   }
3267   _first_overflow_barrier_sync.abort();
3268   _second_overflow_barrier_sync.abort();
3269   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3270   if (!gc_id.is_undefined()) {
3271     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3272     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3273     _aborted_gc_id = gc_id;
3274    }
3275   _has_aborted = true;
3276 
3277   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3278   satb_mq_set.abandon_partial_marking();
3279   // This can be called either during or outside marking, we'll read
3280   // the expected_active value from the SATB queue set.
3281   satb_mq_set.set_active_all_threads(
3282                                  false, /* new active value */
3283                                  satb_mq_set.is_active() /* expected_active */);
3284 
3285   _g1h->trace_heap_after_concurrent_cycle();
3286   _g1h->register_concurrent_cycle_end();
3287 }
3288 
3289 const GCId& ConcurrentMark::concurrent_gc_id() {
3290   if (has_aborted()) {
3291     return _aborted_gc_id;
3292   }
3293   return _g1h->gc_tracer_cm()->gc_id();
3294 }
3295 
3296 static void print_ms_time_info(const char* prefix, const char* name,
3297                                NumberSeq& ns) {
3298   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3299                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3300   if (ns.num() > 0) {
3301     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3302                            prefix, ns.sd(), ns.maximum());
3303   }
3304 }
3305 
3306 void ConcurrentMark::print_summary_info() {
3307   gclog_or_tty->print_cr(" Concurrent marking:");
3308   print_ms_time_info("  ", "init marks", _init_times);
3309   print_ms_time_info("  ", "remarks", _remark_times);
3310   {
3311     print_ms_time_info("     ", "final marks", _remark_mark_times);
3312     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3313 
3314   }
3315   print_ms_time_info("  ", "cleanups", _cleanup_times);
3316   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3317                          _total_counting_time,
3318                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3319                           (double)_cleanup_times.num()
3320                          : 0.0));
3321   if (G1ScrubRemSets) {
3322     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3323                            _total_rs_scrub_time,
3324                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3325                             (double)_cleanup_times.num()
3326                            : 0.0));
3327   }
3328   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3329                          (_init_times.sum() + _remark_times.sum() +
3330                           _cleanup_times.sum())/1000.0);
3331   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3332                 "(%8.2f s marking).",
3333                 cmThread()->vtime_accum(),
3334                 cmThread()->vtime_mark_accum());
3335 }
3336 
3337 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3338   _parallel_workers->print_worker_threads_on(st);
3339 }
3340 
3341 void ConcurrentMark::print_on_error(outputStream* st) const {
3342   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3343       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3344   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3345   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3346 }
3347 
3348 // We take a break if someone is trying to stop the world.
3349 bool ConcurrentMark::do_yield_check(uint worker_id) {
3350   if (SuspendibleThreadSet::should_yield()) {
3351     if (worker_id == 0) {
3352       _g1h->g1_policy()->record_concurrent_pause();
3353     }
3354     SuspendibleThreadSet::yield();
3355     return true;
3356   } else {
3357     return false;
3358   }
3359 }
3360 
3361 #ifndef PRODUCT
3362 // for debugging purposes
3363 void ConcurrentMark::print_finger() {
3364   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3365                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3366   for (uint i = 0; i < _max_worker_id; ++i) {
3367     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3368   }
3369   gclog_or_tty->cr();
3370 }
3371 #endif
3372 
3373 void CMTask::scan_object(oop obj) {
3374   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3375 
3376   if (_cm->verbose_high()) {
3377     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3378                            _worker_id, p2i((void*) obj));
3379   }
3380 
3381   size_t obj_size = obj->size();
3382   _words_scanned += obj_size;
3383 
3384   obj->oop_iterate(_cm_oop_closure);
3385   statsOnly( ++_objs_scanned );
3386   check_limits();
3387 }
3388 
3389 // Closure for iteration over bitmaps
3390 class CMBitMapClosure : public BitMapClosure {
3391 private:
3392   // the bitmap that is being iterated over
3393   CMBitMap*                   _nextMarkBitMap;
3394   ConcurrentMark*             _cm;
3395   CMTask*                     _task;
3396 
3397 public:
3398   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3399     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3400 
3401   bool do_bit(size_t offset) {
3402     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3403     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3404     assert( addr < _cm->finger(), "invariant");
3405 
3406     statsOnly( _task->increase_objs_found_on_bitmap() );
3407     assert(addr >= _task->finger(), "invariant");
3408 
3409     // We move that task's local finger along.
3410     _task->move_finger_to(addr);
3411 
3412     _task->scan_object(oop(addr));
3413     // we only partially drain the local queue and global stack
3414     _task->drain_local_queue(true);
3415     _task->drain_global_stack(true);
3416 
3417     // if the has_aborted flag has been raised, we need to bail out of
3418     // the iteration
3419     return !_task->has_aborted();
3420   }
3421 };
3422 
3423 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3424                                ConcurrentMark* cm,
3425                                CMTask* task)
3426   : _g1h(g1h), _cm(cm), _task(task) {
3427   assert(_ref_processor == NULL, "should be initialized to NULL");
3428 
3429   if (G1UseConcMarkReferenceProcessing) {
3430     _ref_processor = g1h->ref_processor_cm();
3431     assert(_ref_processor != NULL, "should not be NULL");
3432   }
3433 }
3434 
3435 void CMTask::setup_for_region(HeapRegion* hr) {
3436   assert(hr != NULL,
3437         "claim_region() should have filtered out NULL regions");
3438   assert(!hr->is_continues_humongous(),
3439         "claim_region() should have filtered out continues humongous regions");
3440 
3441   if (_cm->verbose_low()) {
3442     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3443                            _worker_id, p2i(hr));
3444   }
3445 
3446   _curr_region  = hr;
3447   _finger       = hr->bottom();
3448   update_region_limit();
3449 }
3450 
3451 void CMTask::update_region_limit() {
3452   HeapRegion* hr            = _curr_region;
3453   HeapWord* bottom          = hr->bottom();
3454   HeapWord* limit           = hr->next_top_at_mark_start();
3455 
3456   if (limit == bottom) {
3457     if (_cm->verbose_low()) {
3458       gclog_or_tty->print_cr("[%u] found an empty region "
3459                              "["PTR_FORMAT", "PTR_FORMAT")",
3460                              _worker_id, p2i(bottom), p2i(limit));
3461     }
3462     // The region was collected underneath our feet.
3463     // We set the finger to bottom to ensure that the bitmap
3464     // iteration that will follow this will not do anything.
3465     // (this is not a condition that holds when we set the region up,
3466     // as the region is not supposed to be empty in the first place)
3467     _finger = bottom;
3468   } else if (limit >= _region_limit) {
3469     assert(limit >= _finger, "peace of mind");
3470   } else {
3471     assert(limit < _region_limit, "only way to get here");
3472     // This can happen under some pretty unusual circumstances.  An
3473     // evacuation pause empties the region underneath our feet (NTAMS
3474     // at bottom). We then do some allocation in the region (NTAMS
3475     // stays at bottom), followed by the region being used as a GC
3476     // alloc region (NTAMS will move to top() and the objects
3477     // originally below it will be grayed). All objects now marked in
3478     // the region are explicitly grayed, if below the global finger,
3479     // and we do not need in fact to scan anything else. So, we simply
3480     // set _finger to be limit to ensure that the bitmap iteration
3481     // doesn't do anything.
3482     _finger = limit;
3483   }
3484 
3485   _region_limit = limit;
3486 }
3487 
3488 void CMTask::giveup_current_region() {
3489   assert(_curr_region != NULL, "invariant");
3490   if (_cm->verbose_low()) {
3491     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3492                            _worker_id, p2i(_curr_region));
3493   }
3494   clear_region_fields();
3495 }
3496 
3497 void CMTask::clear_region_fields() {
3498   // Values for these three fields that indicate that we're not
3499   // holding on to a region.
3500   _curr_region   = NULL;
3501   _finger        = NULL;
3502   _region_limit  = NULL;
3503 }
3504 
3505 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3506   if (cm_oop_closure == NULL) {
3507     assert(_cm_oop_closure != NULL, "invariant");
3508   } else {
3509     assert(_cm_oop_closure == NULL, "invariant");
3510   }
3511   _cm_oop_closure = cm_oop_closure;
3512 }
3513 
3514 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3515   guarantee(nextMarkBitMap != NULL, "invariant");
3516 
3517   if (_cm->verbose_low()) {
3518     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3519   }
3520 
3521   _nextMarkBitMap                = nextMarkBitMap;
3522   clear_region_fields();
3523 
3524   _calls                         = 0;
3525   _elapsed_time_ms               = 0.0;
3526   _termination_time_ms           = 0.0;
3527   _termination_start_time_ms     = 0.0;
3528 
3529 #if _MARKING_STATS_
3530   _local_pushes                  = 0;
3531   _local_pops                    = 0;
3532   _local_max_size                = 0;
3533   _objs_scanned                  = 0;
3534   _global_pushes                 = 0;
3535   _global_pops                   = 0;
3536   _global_max_size               = 0;
3537   _global_transfers_to           = 0;
3538   _global_transfers_from         = 0;
3539   _regions_claimed               = 0;
3540   _objs_found_on_bitmap          = 0;
3541   _satb_buffers_processed        = 0;
3542   _steal_attempts                = 0;
3543   _steals                        = 0;
3544   _aborted                       = 0;
3545   _aborted_overflow              = 0;
3546   _aborted_cm_aborted            = 0;
3547   _aborted_yield                 = 0;
3548   _aborted_timed_out             = 0;
3549   _aborted_satb                  = 0;
3550   _aborted_termination           = 0;
3551 #endif // _MARKING_STATS_
3552 }
3553 
3554 bool CMTask::should_exit_termination() {
3555   regular_clock_call();
3556   // This is called when we are in the termination protocol. We should
3557   // quit if, for some reason, this task wants to abort or the global
3558   // stack is not empty (this means that we can get work from it).
3559   return !_cm->mark_stack_empty() || has_aborted();
3560 }
3561 
3562 void CMTask::reached_limit() {
3563   assert(_words_scanned >= _words_scanned_limit ||
3564          _refs_reached >= _refs_reached_limit ,
3565          "shouldn't have been called otherwise");
3566   regular_clock_call();
3567 }
3568 
3569 void CMTask::regular_clock_call() {
3570   if (has_aborted()) return;
3571 
3572   // First, we need to recalculate the words scanned and refs reached
3573   // limits for the next clock call.
3574   recalculate_limits();
3575 
3576   // During the regular clock call we do the following
3577 
3578   // (1) If an overflow has been flagged, then we abort.
3579   if (_cm->has_overflown()) {
3580     set_has_aborted();
3581     return;
3582   }
3583 
3584   // If we are not concurrent (i.e. we're doing remark) we don't need
3585   // to check anything else. The other steps are only needed during
3586   // the concurrent marking phase.
3587   if (!concurrent()) return;
3588 
3589   // (2) If marking has been aborted for Full GC, then we also abort.
3590   if (_cm->has_aborted()) {
3591     set_has_aborted();
3592     statsOnly( ++_aborted_cm_aborted );
3593     return;
3594   }
3595 
3596   double curr_time_ms = os::elapsedVTime() * 1000.0;
3597 
3598   // (3) If marking stats are enabled, then we update the step history.
3599 #if _MARKING_STATS_
3600   if (_words_scanned >= _words_scanned_limit) {
3601     ++_clock_due_to_scanning;
3602   }
3603   if (_refs_reached >= _refs_reached_limit) {
3604     ++_clock_due_to_marking;
3605   }
3606 
3607   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3608   _interval_start_time_ms = curr_time_ms;
3609   _all_clock_intervals_ms.add(last_interval_ms);
3610 
3611   if (_cm->verbose_medium()) {
3612       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3613                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3614                         _worker_id, last_interval_ms,
3615                         _words_scanned,
3616                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3617                         _refs_reached,
3618                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3619   }
3620 #endif // _MARKING_STATS_
3621 
3622   // (4) We check whether we should yield. If we have to, then we abort.
3623   if (SuspendibleThreadSet::should_yield()) {
3624     // We should yield. To do this we abort the task. The caller is
3625     // responsible for yielding.
3626     set_has_aborted();
3627     statsOnly( ++_aborted_yield );
3628     return;
3629   }
3630 
3631   // (5) We check whether we've reached our time quota. If we have,
3632   // then we abort.
3633   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3634   if (elapsed_time_ms > _time_target_ms) {
3635     set_has_aborted();
3636     _has_timed_out = true;
3637     statsOnly( ++_aborted_timed_out );
3638     return;
3639   }
3640 
3641   // (6) Finally, we check whether there are enough completed STAB
3642   // buffers available for processing. If there are, we abort.
3643   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3644   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3645     if (_cm->verbose_low()) {
3646       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3647                              _worker_id);
3648     }
3649     // we do need to process SATB buffers, we'll abort and restart
3650     // the marking task to do so
3651     set_has_aborted();
3652     statsOnly( ++_aborted_satb );
3653     return;
3654   }
3655 }
3656 
3657 void CMTask::recalculate_limits() {
3658   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3659   _words_scanned_limit      = _real_words_scanned_limit;
3660 
3661   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3662   _refs_reached_limit       = _real_refs_reached_limit;
3663 }
3664 
3665 void CMTask::decrease_limits() {
3666   // This is called when we believe that we're going to do an infrequent
3667   // operation which will increase the per byte scanned cost (i.e. move
3668   // entries to/from the global stack). It basically tries to decrease the
3669   // scanning limit so that the clock is called earlier.
3670 
3671   if (_cm->verbose_medium()) {
3672     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3673   }
3674 
3675   _words_scanned_limit = _real_words_scanned_limit -
3676     3 * words_scanned_period / 4;
3677   _refs_reached_limit  = _real_refs_reached_limit -
3678     3 * refs_reached_period / 4;
3679 }
3680 
3681 void CMTask::move_entries_to_global_stack() {
3682   // local array where we'll store the entries that will be popped
3683   // from the local queue
3684   oop buffer[global_stack_transfer_size];
3685 
3686   int n = 0;
3687   oop obj;
3688   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3689     buffer[n] = obj;
3690     ++n;
3691   }
3692 
3693   if (n > 0) {
3694     // we popped at least one entry from the local queue
3695 
3696     statsOnly( ++_global_transfers_to; _local_pops += n );
3697 
3698     if (!_cm->mark_stack_push(buffer, n)) {
3699       if (_cm->verbose_low()) {
3700         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3701                                _worker_id);
3702       }
3703       set_has_aborted();
3704     } else {
3705       // the transfer was successful
3706 
3707       if (_cm->verbose_medium()) {
3708         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3709                                _worker_id, n);
3710       }
3711       statsOnly( int tmp_size = _cm->mark_stack_size();
3712                  if (tmp_size > _global_max_size) {
3713                    _global_max_size = tmp_size;
3714                  }
3715                  _global_pushes += n );
3716     }
3717   }
3718 
3719   // this operation was quite expensive, so decrease the limits
3720   decrease_limits();
3721 }
3722 
3723 void CMTask::get_entries_from_global_stack() {
3724   // local array where we'll store the entries that will be popped
3725   // from the global stack.
3726   oop buffer[global_stack_transfer_size];
3727   int n;
3728   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3729   assert(n <= global_stack_transfer_size,
3730          "we should not pop more than the given limit");
3731   if (n > 0) {
3732     // yes, we did actually pop at least one entry
3733 
3734     statsOnly( ++_global_transfers_from; _global_pops += n );
3735     if (_cm->verbose_medium()) {
3736       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3737                              _worker_id, n);
3738     }
3739     for (int i = 0; i < n; ++i) {
3740       bool success = _task_queue->push(buffer[i]);
3741       // We only call this when the local queue is empty or under a
3742       // given target limit. So, we do not expect this push to fail.
3743       assert(success, "invariant");
3744     }
3745 
3746     statsOnly( int tmp_size = _task_queue->size();
3747                if (tmp_size > _local_max_size) {
3748                  _local_max_size = tmp_size;
3749                }
3750                _local_pushes += n );
3751   }
3752 
3753   // this operation was quite expensive, so decrease the limits
3754   decrease_limits();
3755 }
3756 
3757 void CMTask::drain_local_queue(bool partially) {
3758   if (has_aborted()) return;
3759 
3760   // Decide what the target size is, depending whether we're going to
3761   // drain it partially (so that other tasks can steal if they run out
3762   // of things to do) or totally (at the very end).
3763   size_t target_size;
3764   if (partially) {
3765     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3766   } else {
3767     target_size = 0;
3768   }
3769 
3770   if (_task_queue->size() > target_size) {
3771     if (_cm->verbose_high()) {
3772       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3773                              _worker_id, target_size);
3774     }
3775 
3776     oop obj;
3777     bool ret = _task_queue->pop_local(obj);
3778     while (ret) {
3779       statsOnly( ++_local_pops );
3780 
3781       if (_cm->verbose_high()) {
3782         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3783                                p2i((void*) obj));
3784       }
3785 
3786       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3787       assert(!_g1h->is_on_master_free_list(
3788                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3789 
3790       scan_object(obj);
3791 
3792       if (_task_queue->size() <= target_size || has_aborted()) {
3793         ret = false;
3794       } else {
3795         ret = _task_queue->pop_local(obj);
3796       }
3797     }
3798 
3799     if (_cm->verbose_high()) {
3800       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3801                              _worker_id, _task_queue->size());
3802     }
3803   }
3804 }
3805 
3806 void CMTask::drain_global_stack(bool partially) {
3807   if (has_aborted()) return;
3808 
3809   // We have a policy to drain the local queue before we attempt to
3810   // drain the global stack.
3811   assert(partially || _task_queue->size() == 0, "invariant");
3812 
3813   // Decide what the target size is, depending whether we're going to
3814   // drain it partially (so that other tasks can steal if they run out
3815   // of things to do) or totally (at the very end).  Notice that,
3816   // because we move entries from the global stack in chunks or
3817   // because another task might be doing the same, we might in fact
3818   // drop below the target. But, this is not a problem.
3819   size_t target_size;
3820   if (partially) {
3821     target_size = _cm->partial_mark_stack_size_target();
3822   } else {
3823     target_size = 0;
3824   }
3825 
3826   if (_cm->mark_stack_size() > target_size) {
3827     if (_cm->verbose_low()) {
3828       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3829                              _worker_id, target_size);
3830     }
3831 
3832     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3833       get_entries_from_global_stack();
3834       drain_local_queue(partially);
3835     }
3836 
3837     if (_cm->verbose_low()) {
3838       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3839                              _worker_id, _cm->mark_stack_size());
3840     }
3841   }
3842 }
3843 
3844 // SATB Queue has several assumptions on whether to call the par or
3845 // non-par versions of the methods. this is why some of the code is
3846 // replicated. We should really get rid of the single-threaded version
3847 // of the code to simplify things.
3848 void CMTask::drain_satb_buffers() {
3849   if (has_aborted()) return;
3850 
3851   // We set this so that the regular clock knows that we're in the
3852   // middle of draining buffers and doesn't set the abort flag when it
3853   // notices that SATB buffers are available for draining. It'd be
3854   // very counter productive if it did that. :-)
3855   _draining_satb_buffers = true;
3856 
3857   CMObjectClosure oc(this);
3858   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3859   satb_mq_set.set_closure(_worker_id, &oc);
3860 
3861   // This keeps claiming and applying the closure to completed buffers
3862   // until we run out of buffers or we need to abort.
3863   while (!has_aborted() &&
3864          satb_mq_set.apply_closure_to_completed_buffer(_worker_id)) {
3865     if (_cm->verbose_medium()) {
3866       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3867     }
3868     statsOnly( ++_satb_buffers_processed );
3869     regular_clock_call();
3870   }
3871 
3872   _draining_satb_buffers = false;
3873 
3874   assert(has_aborted() ||
3875          concurrent() ||
3876          satb_mq_set.completed_buffers_num() == 0, "invariant");
3877 
3878   satb_mq_set.set_closure(_worker_id, NULL);
3879 
3880   // again, this was a potentially expensive operation, decrease the
3881   // limits to get the regular clock call early
3882   decrease_limits();
3883 }
3884 
3885 void CMTask::print_stats() {
3886   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3887                          _worker_id, _calls);
3888   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3889                          _elapsed_time_ms, _termination_time_ms);
3890   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3891                          _step_times_ms.num(), _step_times_ms.avg(),
3892                          _step_times_ms.sd());
3893   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3894                          _step_times_ms.maximum(), _step_times_ms.sum());
3895 
3896 #if _MARKING_STATS_
3897   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3898                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3899                          _all_clock_intervals_ms.sd());
3900   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3901                          _all_clock_intervals_ms.maximum(),
3902                          _all_clock_intervals_ms.sum());
3903   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3904                          _clock_due_to_scanning, _clock_due_to_marking);
3905   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3906                          _objs_scanned, _objs_found_on_bitmap);
3907   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3908                          _local_pushes, _local_pops, _local_max_size);
3909   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3910                          _global_pushes, _global_pops, _global_max_size);
3911   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3912                          _global_transfers_to,_global_transfers_from);
3913   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3914   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3915   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3916                          _steal_attempts, _steals);
3917   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3918   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3919                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3920   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3921                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3922 #endif // _MARKING_STATS_
3923 }
3924 
3925 /*****************************************************************************
3926 
3927     The do_marking_step(time_target_ms, ...) method is the building
3928     block of the parallel marking framework. It can be called in parallel
3929     with other invocations of do_marking_step() on different tasks
3930     (but only one per task, obviously) and concurrently with the
3931     mutator threads, or during remark, hence it eliminates the need
3932     for two versions of the code. When called during remark, it will
3933     pick up from where the task left off during the concurrent marking
3934     phase. Interestingly, tasks are also claimable during evacuation
3935     pauses too, since do_marking_step() ensures that it aborts before
3936     it needs to yield.
3937 
3938     The data structures that it uses to do marking work are the
3939     following:
3940 
3941       (1) Marking Bitmap. If there are gray objects that appear only
3942       on the bitmap (this happens either when dealing with an overflow
3943       or when the initial marking phase has simply marked the roots
3944       and didn't push them on the stack), then tasks claim heap
3945       regions whose bitmap they then scan to find gray objects. A
3946       global finger indicates where the end of the last claimed region
3947       is. A local finger indicates how far into the region a task has
3948       scanned. The two fingers are used to determine how to gray an
3949       object (i.e. whether simply marking it is OK, as it will be
3950       visited by a task in the future, or whether it needs to be also
3951       pushed on a stack).
3952 
3953       (2) Local Queue. The local queue of the task which is accessed
3954       reasonably efficiently by the task. Other tasks can steal from
3955       it when they run out of work. Throughout the marking phase, a
3956       task attempts to keep its local queue short but not totally
3957       empty, so that entries are available for stealing by other
3958       tasks. Only when there is no more work, a task will totally
3959       drain its local queue.
3960 
3961       (3) Global Mark Stack. This handles local queue overflow. During
3962       marking only sets of entries are moved between it and the local
3963       queues, as access to it requires a mutex and more fine-grain
3964       interaction with it which might cause contention. If it
3965       overflows, then the marking phase should restart and iterate
3966       over the bitmap to identify gray objects. Throughout the marking
3967       phase, tasks attempt to keep the global mark stack at a small
3968       length but not totally empty, so that entries are available for
3969       popping by other tasks. Only when there is no more work, tasks
3970       will totally drain the global mark stack.
3971 
3972       (4) SATB Buffer Queue. This is where completed SATB buffers are
3973       made available. Buffers are regularly removed from this queue
3974       and scanned for roots, so that the queue doesn't get too
3975       long. During remark, all completed buffers are processed, as
3976       well as the filled in parts of any uncompleted buffers.
3977 
3978     The do_marking_step() method tries to abort when the time target
3979     has been reached. There are a few other cases when the
3980     do_marking_step() method also aborts:
3981 
3982       (1) When the marking phase has been aborted (after a Full GC).
3983 
3984       (2) When a global overflow (on the global stack) has been
3985       triggered. Before the task aborts, it will actually sync up with
3986       the other tasks to ensure that all the marking data structures
3987       (local queues, stacks, fingers etc.)  are re-initialized so that
3988       when do_marking_step() completes, the marking phase can
3989       immediately restart.
3990 
3991       (3) When enough completed SATB buffers are available. The
3992       do_marking_step() method only tries to drain SATB buffers right
3993       at the beginning. So, if enough buffers are available, the
3994       marking step aborts and the SATB buffers are processed at
3995       the beginning of the next invocation.
3996 
3997       (4) To yield. when we have to yield then we abort and yield
3998       right at the end of do_marking_step(). This saves us from a lot
3999       of hassle as, by yielding we might allow a Full GC. If this
4000       happens then objects will be compacted underneath our feet, the
4001       heap might shrink, etc. We save checking for this by just
4002       aborting and doing the yield right at the end.
4003 
4004     From the above it follows that the do_marking_step() method should
4005     be called in a loop (or, otherwise, regularly) until it completes.
4006 
4007     If a marking step completes without its has_aborted() flag being
4008     true, it means it has completed the current marking phase (and
4009     also all other marking tasks have done so and have all synced up).
4010 
4011     A method called regular_clock_call() is invoked "regularly" (in
4012     sub ms intervals) throughout marking. It is this clock method that
4013     checks all the abort conditions which were mentioned above and
4014     decides when the task should abort. A work-based scheme is used to
4015     trigger this clock method: when the number of object words the
4016     marking phase has scanned or the number of references the marking
4017     phase has visited reach a given limit. Additional invocations to
4018     the method clock have been planted in a few other strategic places
4019     too. The initial reason for the clock method was to avoid calling
4020     vtime too regularly, as it is quite expensive. So, once it was in
4021     place, it was natural to piggy-back all the other conditions on it
4022     too and not constantly check them throughout the code.
4023 
4024     If do_termination is true then do_marking_step will enter its
4025     termination protocol.
4026 
4027     The value of is_serial must be true when do_marking_step is being
4028     called serially (i.e. by the VMThread) and do_marking_step should
4029     skip any synchronization in the termination and overflow code.
4030     Examples include the serial remark code and the serial reference
4031     processing closures.
4032 
4033     The value of is_serial must be false when do_marking_step is
4034     being called by any of the worker threads in a work gang.
4035     Examples include the concurrent marking code (CMMarkingTask),
4036     the MT remark code, and the MT reference processing closures.
4037 
4038  *****************************************************************************/
4039 
4040 void CMTask::do_marking_step(double time_target_ms,
4041                              bool do_termination,
4042                              bool is_serial) {
4043   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4044   assert(concurrent() == _cm->concurrent(), "they should be the same");
4045 
4046   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4047   assert(_task_queues != NULL, "invariant");
4048   assert(_task_queue != NULL, "invariant");
4049   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4050 
4051   assert(!_claimed,
4052          "only one thread should claim this task at any one time");
4053 
4054   // OK, this doesn't safeguard again all possible scenarios, as it is
4055   // possible for two threads to set the _claimed flag at the same
4056   // time. But it is only for debugging purposes anyway and it will
4057   // catch most problems.
4058   _claimed = true;
4059 
4060   _start_time_ms = os::elapsedVTime() * 1000.0;
4061   statsOnly( _interval_start_time_ms = _start_time_ms );
4062 
4063   // If do_stealing is true then do_marking_step will attempt to
4064   // steal work from the other CMTasks. It only makes sense to
4065   // enable stealing when the termination protocol is enabled
4066   // and do_marking_step() is not being called serially.
4067   bool do_stealing = do_termination && !is_serial;
4068 
4069   double diff_prediction_ms =
4070     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4071   _time_target_ms = time_target_ms - diff_prediction_ms;
4072 
4073   // set up the variables that are used in the work-based scheme to
4074   // call the regular clock method
4075   _words_scanned = 0;
4076   _refs_reached  = 0;
4077   recalculate_limits();
4078 
4079   // clear all flags
4080   clear_has_aborted();
4081   _has_timed_out = false;
4082   _draining_satb_buffers = false;
4083 
4084   ++_calls;
4085 
4086   if (_cm->verbose_low()) {
4087     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4088                            "target = %1.2lfms >>>>>>>>>>",
4089                            _worker_id, _calls, _time_target_ms);
4090   }
4091 
4092   // Set up the bitmap and oop closures. Anything that uses them is
4093   // eventually called from this method, so it is OK to allocate these
4094   // statically.
4095   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4096   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4097   set_cm_oop_closure(&cm_oop_closure);
4098 
4099   if (_cm->has_overflown()) {
4100     // This can happen if the mark stack overflows during a GC pause
4101     // and this task, after a yield point, restarts. We have to abort
4102     // as we need to get into the overflow protocol which happens
4103     // right at the end of this task.
4104     set_has_aborted();
4105   }
4106 
4107   // First drain any available SATB buffers. After this, we will not
4108   // look at SATB buffers before the next invocation of this method.
4109   // If enough completed SATB buffers are queued up, the regular clock
4110   // will abort this task so that it restarts.
4111   drain_satb_buffers();
4112   // ...then partially drain the local queue and the global stack
4113   drain_local_queue(true);
4114   drain_global_stack(true);
4115 
4116   do {
4117     if (!has_aborted() && _curr_region != NULL) {
4118       // This means that we're already holding on to a region.
4119       assert(_finger != NULL, "if region is not NULL, then the finger "
4120              "should not be NULL either");
4121 
4122       // We might have restarted this task after an evacuation pause
4123       // which might have evacuated the region we're holding on to
4124       // underneath our feet. Let's read its limit again to make sure
4125       // that we do not iterate over a region of the heap that
4126       // contains garbage (update_region_limit() will also move
4127       // _finger to the start of the region if it is found empty).
4128       update_region_limit();
4129       // We will start from _finger not from the start of the region,
4130       // as we might be restarting this task after aborting half-way
4131       // through scanning this region. In this case, _finger points to
4132       // the address where we last found a marked object. If this is a
4133       // fresh region, _finger points to start().
4134       MemRegion mr = MemRegion(_finger, _region_limit);
4135 
4136       if (_cm->verbose_low()) {
4137         gclog_or_tty->print_cr("[%u] we're scanning part "
4138                                "["PTR_FORMAT", "PTR_FORMAT") "
4139                                "of region "HR_FORMAT,
4140                                _worker_id, p2i(_finger), p2i(_region_limit),
4141                                HR_FORMAT_PARAMS(_curr_region));
4142       }
4143 
4144       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
4145              "humongous regions should go around loop once only");
4146 
4147       // Some special cases:
4148       // If the memory region is empty, we can just give up the region.
4149       // If the current region is humongous then we only need to check
4150       // the bitmap for the bit associated with the start of the object,
4151       // scan the object if it's live, and give up the region.
4152       // Otherwise, let's iterate over the bitmap of the part of the region
4153       // that is left.
4154       // If the iteration is successful, give up the region.
4155       if (mr.is_empty()) {
4156         giveup_current_region();
4157         regular_clock_call();
4158       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
4159         if (_nextMarkBitMap->isMarked(mr.start())) {
4160           // The object is marked - apply the closure
4161           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4162           bitmap_closure.do_bit(offset);
4163         }
4164         // Even if this task aborted while scanning the humongous object
4165         // we can (and should) give up the current region.
4166         giveup_current_region();
4167         regular_clock_call();
4168       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4169         giveup_current_region();
4170         regular_clock_call();
4171       } else {
4172         assert(has_aborted(), "currently the only way to do so");
4173         // The only way to abort the bitmap iteration is to return
4174         // false from the do_bit() method. However, inside the
4175         // do_bit() method we move the _finger to point to the
4176         // object currently being looked at. So, if we bail out, we
4177         // have definitely set _finger to something non-null.
4178         assert(_finger != NULL, "invariant");
4179 
4180         // Region iteration was actually aborted. So now _finger
4181         // points to the address of the object we last scanned. If we
4182         // leave it there, when we restart this task, we will rescan
4183         // the object. It is easy to avoid this. We move the finger by
4184         // enough to point to the next possible object header (the
4185         // bitmap knows by how much we need to move it as it knows its
4186         // granularity).
4187         assert(_finger < _region_limit, "invariant");
4188         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4189         // Check if bitmap iteration was aborted while scanning the last object
4190         if (new_finger >= _region_limit) {
4191           giveup_current_region();
4192         } else {
4193           move_finger_to(new_finger);
4194         }
4195       }
4196     }
4197     // At this point we have either completed iterating over the
4198     // region we were holding on to, or we have aborted.
4199 
4200     // We then partially drain the local queue and the global stack.
4201     // (Do we really need this?)
4202     drain_local_queue(true);
4203     drain_global_stack(true);
4204 
4205     // Read the note on the claim_region() method on why it might
4206     // return NULL with potentially more regions available for
4207     // claiming and why we have to check out_of_regions() to determine
4208     // whether we're done or not.
4209     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4210       // We are going to try to claim a new region. We should have
4211       // given up on the previous one.
4212       // Separated the asserts so that we know which one fires.
4213       assert(_curr_region  == NULL, "invariant");
4214       assert(_finger       == NULL, "invariant");
4215       assert(_region_limit == NULL, "invariant");
4216       if (_cm->verbose_low()) {
4217         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4218       }
4219       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4220       if (claimed_region != NULL) {
4221         // Yes, we managed to claim one
4222         statsOnly( ++_regions_claimed );
4223 
4224         if (_cm->verbose_low()) {
4225           gclog_or_tty->print_cr("[%u] we successfully claimed "
4226                                  "region "PTR_FORMAT,
4227                                  _worker_id, p2i(claimed_region));
4228         }
4229 
4230         setup_for_region(claimed_region);
4231         assert(_curr_region == claimed_region, "invariant");
4232       }
4233       // It is important to call the regular clock here. It might take
4234       // a while to claim a region if, for example, we hit a large
4235       // block of empty regions. So we need to call the regular clock
4236       // method once round the loop to make sure it's called
4237       // frequently enough.
4238       regular_clock_call();
4239     }
4240 
4241     if (!has_aborted() && _curr_region == NULL) {
4242       assert(_cm->out_of_regions(),
4243              "at this point we should be out of regions");
4244     }
4245   } while ( _curr_region != NULL && !has_aborted());
4246 
4247   if (!has_aborted()) {
4248     // We cannot check whether the global stack is empty, since other
4249     // tasks might be pushing objects to it concurrently.
4250     assert(_cm->out_of_regions(),
4251            "at this point we should be out of regions");
4252 
4253     if (_cm->verbose_low()) {
4254       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4255     }
4256 
4257     // Try to reduce the number of available SATB buffers so that
4258     // remark has less work to do.
4259     drain_satb_buffers();
4260   }
4261 
4262   // Since we've done everything else, we can now totally drain the
4263   // local queue and global stack.
4264   drain_local_queue(false);
4265   drain_global_stack(false);
4266 
4267   // Attempt at work stealing from other task's queues.
4268   if (do_stealing && !has_aborted()) {
4269     // We have not aborted. This means that we have finished all that
4270     // we could. Let's try to do some stealing...
4271 
4272     // We cannot check whether the global stack is empty, since other
4273     // tasks might be pushing objects to it concurrently.
4274     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4275            "only way to reach here");
4276 
4277     if (_cm->verbose_low()) {
4278       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4279     }
4280 
4281     while (!has_aborted()) {
4282       oop obj;
4283       statsOnly( ++_steal_attempts );
4284 
4285       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4286         if (_cm->verbose_medium()) {
4287           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4288                                  _worker_id, p2i((void*) obj));
4289         }
4290 
4291         statsOnly( ++_steals );
4292 
4293         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4294                "any stolen object should be marked");
4295         scan_object(obj);
4296 
4297         // And since we're towards the end, let's totally drain the
4298         // local queue and global stack.
4299         drain_local_queue(false);
4300         drain_global_stack(false);
4301       } else {
4302         break;
4303       }
4304     }
4305   }
4306 
4307   // If we are about to wrap up and go into termination, check if we
4308   // should raise the overflow flag.
4309   if (do_termination && !has_aborted()) {
4310     if (_cm->force_overflow()->should_force()) {
4311       _cm->set_has_overflown();
4312       regular_clock_call();
4313     }
4314   }
4315 
4316   // We still haven't aborted. Now, let's try to get into the
4317   // termination protocol.
4318   if (do_termination && !has_aborted()) {
4319     // We cannot check whether the global stack is empty, since other
4320     // tasks might be concurrently pushing objects on it.
4321     // Separated the asserts so that we know which one fires.
4322     assert(_cm->out_of_regions(), "only way to reach here");
4323     assert(_task_queue->size() == 0, "only way to reach here");
4324 
4325     if (_cm->verbose_low()) {
4326       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4327     }
4328 
4329     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4330 
4331     // The CMTask class also extends the TerminatorTerminator class,
4332     // hence its should_exit_termination() method will also decide
4333     // whether to exit the termination protocol or not.
4334     bool finished = (is_serial ||
4335                      _cm->terminator()->offer_termination(this));
4336     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4337     _termination_time_ms +=
4338       termination_end_time_ms - _termination_start_time_ms;
4339 
4340     if (finished) {
4341       // We're all done.
4342 
4343       if (_worker_id == 0) {
4344         // let's allow task 0 to do this
4345         if (concurrent()) {
4346           assert(_cm->concurrent_marking_in_progress(), "invariant");
4347           // we need to set this to false before the next
4348           // safepoint. This way we ensure that the marking phase
4349           // doesn't observe any more heap expansions.
4350           _cm->clear_concurrent_marking_in_progress();
4351         }
4352       }
4353 
4354       // We can now guarantee that the global stack is empty, since
4355       // all other tasks have finished. We separated the guarantees so
4356       // that, if a condition is false, we can immediately find out
4357       // which one.
4358       guarantee(_cm->out_of_regions(), "only way to reach here");
4359       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4360       guarantee(_task_queue->size() == 0, "only way to reach here");
4361       guarantee(!_cm->has_overflown(), "only way to reach here");
4362       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4363 
4364       if (_cm->verbose_low()) {
4365         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4366       }
4367     } else {
4368       // Apparently there's more work to do. Let's abort this task. It
4369       // will restart it and we can hopefully find more things to do.
4370 
4371       if (_cm->verbose_low()) {
4372         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4373                                _worker_id);
4374       }
4375 
4376       set_has_aborted();
4377       statsOnly( ++_aborted_termination );
4378     }
4379   }
4380 
4381   // Mainly for debugging purposes to make sure that a pointer to the
4382   // closure which was statically allocated in this frame doesn't
4383   // escape it by accident.
4384   set_cm_oop_closure(NULL);
4385   double end_time_ms = os::elapsedVTime() * 1000.0;
4386   double elapsed_time_ms = end_time_ms - _start_time_ms;
4387   // Update the step history.
4388   _step_times_ms.add(elapsed_time_ms);
4389 
4390   if (has_aborted()) {
4391     // The task was aborted for some reason.
4392 
4393     statsOnly( ++_aborted );
4394 
4395     if (_has_timed_out) {
4396       double diff_ms = elapsed_time_ms - _time_target_ms;
4397       // Keep statistics of how well we did with respect to hitting
4398       // our target only if we actually timed out (if we aborted for
4399       // other reasons, then the results might get skewed).
4400       _marking_step_diffs_ms.add(diff_ms);
4401     }
4402 
4403     if (_cm->has_overflown()) {
4404       // This is the interesting one. We aborted because a global
4405       // overflow was raised. This means we have to restart the
4406       // marking phase and start iterating over regions. However, in
4407       // order to do this we have to make sure that all tasks stop
4408       // what they are doing and re-initialize in a safe manner. We
4409       // will achieve this with the use of two barrier sync points.
4410 
4411       if (_cm->verbose_low()) {
4412         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4413       }
4414 
4415       if (!is_serial) {
4416         // We only need to enter the sync barrier if being called
4417         // from a parallel context
4418         _cm->enter_first_sync_barrier(_worker_id);
4419 
4420         // When we exit this sync barrier we know that all tasks have
4421         // stopped doing marking work. So, it's now safe to
4422         // re-initialize our data structures. At the end of this method,
4423         // task 0 will clear the global data structures.
4424       }
4425 
4426       statsOnly( ++_aborted_overflow );
4427 
4428       // We clear the local state of this task...
4429       clear_region_fields();
4430 
4431       if (!is_serial) {
4432         // ...and enter the second barrier.
4433         _cm->enter_second_sync_barrier(_worker_id);
4434       }
4435       // At this point, if we're during the concurrent phase of
4436       // marking, everything has been re-initialized and we're
4437       // ready to restart.
4438     }
4439 
4440     if (_cm->verbose_low()) {
4441       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4442                              "elapsed = %1.2lfms <<<<<<<<<<",
4443                              _worker_id, _time_target_ms, elapsed_time_ms);
4444       if (_cm->has_aborted()) {
4445         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4446                                _worker_id);
4447       }
4448     }
4449   } else {
4450     if (_cm->verbose_low()) {
4451       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4452                              "elapsed = %1.2lfms <<<<<<<<<<",
4453                              _worker_id, _time_target_ms, elapsed_time_ms);
4454     }
4455   }
4456 
4457   _claimed = false;
4458 }
4459 
4460 CMTask::CMTask(uint worker_id,
4461                ConcurrentMark* cm,
4462                size_t* marked_bytes,
4463                BitMap* card_bm,
4464                CMTaskQueue* task_queue,
4465                CMTaskQueueSet* task_queues)
4466   : _g1h(G1CollectedHeap::heap()),
4467     _worker_id(worker_id), _cm(cm),
4468     _claimed(false),
4469     _nextMarkBitMap(NULL), _hash_seed(17),
4470     _task_queue(task_queue),
4471     _task_queues(task_queues),
4472     _cm_oop_closure(NULL),
4473     _marked_bytes_array(marked_bytes),
4474     _card_bm(card_bm) {
4475   guarantee(task_queue != NULL, "invariant");
4476   guarantee(task_queues != NULL, "invariant");
4477 
4478   statsOnly( _clock_due_to_scanning = 0;
4479              _clock_due_to_marking  = 0 );
4480 
4481   _marking_step_diffs_ms.add(0.5);
4482 }
4483 
4484 // These are formatting macros that are used below to ensure
4485 // consistent formatting. The *_H_* versions are used to format the
4486 // header for a particular value and they should be kept consistent
4487 // with the corresponding macro. Also note that most of the macros add
4488 // the necessary white space (as a prefix) which makes them a bit
4489 // easier to compose.
4490 
4491 // All the output lines are prefixed with this string to be able to
4492 // identify them easily in a large log file.
4493 #define G1PPRL_LINE_PREFIX            "###"
4494 
4495 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4496 #ifdef _LP64
4497 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4498 #else // _LP64
4499 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4500 #endif // _LP64
4501 
4502 // For per-region info
4503 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4504 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4505 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4506 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4507 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4508 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4509 
4510 // For summary info
4511 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4512 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4513 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4514 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4515 
4516 G1PrintRegionLivenessInfoClosure::
4517 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4518   : _out(out),
4519     _total_used_bytes(0), _total_capacity_bytes(0),
4520     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4521     _hum_used_bytes(0), _hum_capacity_bytes(0),
4522     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4523     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4524   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4525   MemRegion g1_reserved = g1h->g1_reserved();
4526   double now = os::elapsedTime();
4527 
4528   // Print the header of the output.
4529   _out->cr();
4530   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4531   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4532                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4533                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4534                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4535                  HeapRegion::GrainBytes);
4536   _out->print_cr(G1PPRL_LINE_PREFIX);
4537   _out->print_cr(G1PPRL_LINE_PREFIX
4538                 G1PPRL_TYPE_H_FORMAT
4539                 G1PPRL_ADDR_BASE_H_FORMAT
4540                 G1PPRL_BYTE_H_FORMAT
4541                 G1PPRL_BYTE_H_FORMAT
4542                 G1PPRL_BYTE_H_FORMAT
4543                 G1PPRL_DOUBLE_H_FORMAT
4544                 G1PPRL_BYTE_H_FORMAT
4545                 G1PPRL_BYTE_H_FORMAT,
4546                 "type", "address-range",
4547                 "used", "prev-live", "next-live", "gc-eff",
4548                 "remset", "code-roots");
4549   _out->print_cr(G1PPRL_LINE_PREFIX
4550                 G1PPRL_TYPE_H_FORMAT
4551                 G1PPRL_ADDR_BASE_H_FORMAT
4552                 G1PPRL_BYTE_H_FORMAT
4553                 G1PPRL_BYTE_H_FORMAT
4554                 G1PPRL_BYTE_H_FORMAT
4555                 G1PPRL_DOUBLE_H_FORMAT
4556                 G1PPRL_BYTE_H_FORMAT
4557                 G1PPRL_BYTE_H_FORMAT,
4558                 "", "",
4559                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4560                 "(bytes)", "(bytes)");
4561 }
4562 
4563 // It takes as a parameter a reference to one of the _hum_* fields, it
4564 // deduces the corresponding value for a region in a humongous region
4565 // series (either the region size, or what's left if the _hum_* field
4566 // is < the region size), and updates the _hum_* field accordingly.
4567 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4568   size_t bytes = 0;
4569   // The > 0 check is to deal with the prev and next live bytes which
4570   // could be 0.
4571   if (*hum_bytes > 0) {
4572     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4573     *hum_bytes -= bytes;
4574   }
4575   return bytes;
4576 }
4577 
4578 // It deduces the values for a region in a humongous region series
4579 // from the _hum_* fields and updates those accordingly. It assumes
4580 // that that _hum_* fields have already been set up from the "starts
4581 // humongous" region and we visit the regions in address order.
4582 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4583                                                      size_t* capacity_bytes,
4584                                                      size_t* prev_live_bytes,
4585                                                      size_t* next_live_bytes) {
4586   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4587   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4588   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4589   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4590   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4591 }
4592 
4593 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4594   const char* type       = r->get_type_str();
4595   HeapWord* bottom       = r->bottom();
4596   HeapWord* end          = r->end();
4597   size_t capacity_bytes  = r->capacity();
4598   size_t used_bytes      = r->used();
4599   size_t prev_live_bytes = r->live_bytes();
4600   size_t next_live_bytes = r->next_live_bytes();
4601   double gc_eff          = r->gc_efficiency();
4602   size_t remset_bytes    = r->rem_set()->mem_size();
4603   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4604 
4605   if (r->is_starts_humongous()) {
4606     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4607            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4608            "they should have been zeroed after the last time we used them");
4609     // Set up the _hum_* fields.
4610     _hum_capacity_bytes  = capacity_bytes;
4611     _hum_used_bytes      = used_bytes;
4612     _hum_prev_live_bytes = prev_live_bytes;
4613     _hum_next_live_bytes = next_live_bytes;
4614     get_hum_bytes(&used_bytes, &capacity_bytes,
4615                   &prev_live_bytes, &next_live_bytes);
4616     end = bottom + HeapRegion::GrainWords;
4617   } else if (r->is_continues_humongous()) {
4618     get_hum_bytes(&used_bytes, &capacity_bytes,
4619                   &prev_live_bytes, &next_live_bytes);
4620     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4621   }
4622 
4623   _total_used_bytes      += used_bytes;
4624   _total_capacity_bytes  += capacity_bytes;
4625   _total_prev_live_bytes += prev_live_bytes;
4626   _total_next_live_bytes += next_live_bytes;
4627   _total_remset_bytes    += remset_bytes;
4628   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4629 
4630   // Print a line for this particular region.
4631   _out->print_cr(G1PPRL_LINE_PREFIX
4632                  G1PPRL_TYPE_FORMAT
4633                  G1PPRL_ADDR_BASE_FORMAT
4634                  G1PPRL_BYTE_FORMAT
4635                  G1PPRL_BYTE_FORMAT
4636                  G1PPRL_BYTE_FORMAT
4637                  G1PPRL_DOUBLE_FORMAT
4638                  G1PPRL_BYTE_FORMAT
4639                  G1PPRL_BYTE_FORMAT,
4640                  type, p2i(bottom), p2i(end),
4641                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4642                  remset_bytes, strong_code_roots_bytes);
4643 
4644   return false;
4645 }
4646 
4647 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4648   // add static memory usages to remembered set sizes
4649   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4650   // Print the footer of the output.
4651   _out->print_cr(G1PPRL_LINE_PREFIX);
4652   _out->print_cr(G1PPRL_LINE_PREFIX
4653                  " SUMMARY"
4654                  G1PPRL_SUM_MB_FORMAT("capacity")
4655                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4656                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4657                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4658                  G1PPRL_SUM_MB_FORMAT("remset")
4659                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4660                  bytes_to_mb(_total_capacity_bytes),
4661                  bytes_to_mb(_total_used_bytes),
4662                  perc(_total_used_bytes, _total_capacity_bytes),
4663                  bytes_to_mb(_total_prev_live_bytes),
4664                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4665                  bytes_to_mb(_total_next_live_bytes),
4666                  perc(_total_next_live_bytes, _total_capacity_bytes),
4667                  bytes_to_mb(_total_remset_bytes),
4668                  bytes_to_mb(_total_strong_code_roots_bytes));
4669   _out->cr();
4670 }