1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "runtime/prefetch.inline.hpp"
  49 #include "services/memTracker.hpp"
  50 
  51 // Concurrent marking bit map wrapper
  52 
  53 CMBitMapRO::CMBitMapRO(int shifter) :
  54   _bm(),
  55   _shifter(shifter) {
  56   _bmStartWord = 0;
  57   _bmWordSize = 0;
  58 }
  59 
  60 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  61                                                HeapWord* limit) const {
  62   // First we must round addr *up* to a possible object boundary.
  63   addr = (HeapWord*)align_size_up((intptr_t)addr,
  64                                   HeapWordSize << _shifter);
  65   size_t addrOffset = heapWordToOffset(addr);
  66   if (limit == NULL) {
  67     limit = _bmStartWord + _bmWordSize;
  68   }
  69   size_t limitOffset = heapWordToOffset(limit);
  70   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  71   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  72   assert(nextAddr >= addr, "get_next_one postcondition");
  73   assert(nextAddr == limit || isMarked(nextAddr),
  74          "get_next_one postcondition");
  75   return nextAddr;
  76 }
  77 
  78 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  79                                                  HeapWord* limit) const {
  80   size_t addrOffset = heapWordToOffset(addr);
  81   if (limit == NULL) {
  82     limit = _bmStartWord + _bmWordSize;
  83   }
  84   size_t limitOffset = heapWordToOffset(limit);
  85   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  86   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  87   assert(nextAddr >= addr, "get_next_one postcondition");
  88   assert(nextAddr == limit || !isMarked(nextAddr),
  89          "get_next_one postcondition");
  90   return nextAddr;
  91 }
  92 
  93 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  94   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  95   return (int) (diff >> _shifter);
  96 }
  97 
  98 #ifndef PRODUCT
  99 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
 100   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 101   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 102          "size inconsistency");
 103   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 104          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 105 }
 106 #endif
 107 
 108 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 109   _bm.print_on_error(st, prefix);
 110 }
 111 
 112 bool CMBitMap::allocate(ReservedSpace heap_rs) {
 113   _bmStartWord = (HeapWord*)(heap_rs.base());
 114   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 115   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 116                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 117   if (!brs.is_reserved()) {
 118     warning("ConcurrentMark marking bit map allocation failure");
 119     return false;
 120   }
 121   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 122   // For now we'll just commit all of the bit map up front.
 123   // Later on we'll try to be more parsimonious with swap.
 124   if (!_virtual_space.initialize(brs, brs.size())) {
 125     warning("ConcurrentMark marking bit map backing store failure");
 126     return false;
 127   }
 128   assert(_virtual_space.committed_size() == brs.size(),
 129          "didn't reserve backing store for all of concurrent marking bit map?");
 130   _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
 131   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 132          _bmWordSize, "inconsistency in bit map sizing");
 133   _bm.set_size(_bmWordSize >> _shifter);
 134   return true;
 135 }
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 bool CMMarkStack::allocate(size_t capacity) {
 184   // allocate a stack of the requisite depth
 185   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 186   if (!rs.is_reserved()) {
 187     warning("ConcurrentMark MarkStack allocation failure");
 188     return false;
 189   }
 190   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 191   if (!_virtual_space.initialize(rs, rs.size())) {
 192     warning("ConcurrentMark MarkStack backing store failure");
 193     // Release the virtual memory reserved for the marking stack
 194     rs.release();
 195     return false;
 196   }
 197   assert(_virtual_space.committed_size() == rs.size(),
 198          "Didn't reserve backing store for all of ConcurrentMark stack?");
 199   _base = (oop*) _virtual_space.low();
 200   setEmpty();
 201   _capacity = (jint) capacity;
 202   _saved_index = -1;
 203   _should_expand = false;
 204   NOT_PRODUCT(_max_depth = 0);
 205   return true;
 206 }
 207 
 208 void CMMarkStack::expand() {
 209   // Called, during remark, if we've overflown the marking stack during marking.
 210   assert(isEmpty(), "stack should been emptied while handling overflow");
 211   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 212   // Clear expansion flag
 213   _should_expand = false;
 214   if (_capacity == (jint) MarkStackSizeMax) {
 215     if (PrintGCDetails && Verbose) {
 216       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 217     }
 218     return;
 219   }
 220   // Double capacity if possible
 221   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 222   // Do not give up existing stack until we have managed to
 223   // get the double capacity that we desired.
 224   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 225                                                            sizeof(oop)));
 226   if (rs.is_reserved()) {
 227     // Release the backing store associated with old stack
 228     _virtual_space.release();
 229     // Reinitialize virtual space for new stack
 230     if (!_virtual_space.initialize(rs, rs.size())) {
 231       fatal("Not enough swap for expanded marking stack capacity");
 232     }
 233     _base = (oop*)(_virtual_space.low());
 234     _index = 0;
 235     _capacity = new_capacity;
 236   } else {
 237     if (PrintGCDetails && Verbose) {
 238       // Failed to double capacity, continue;
 239       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 240                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 241                           _capacity / K, new_capacity / K);
 242     }
 243   }
 244 }
 245 
 246 void CMMarkStack::set_should_expand() {
 247   // If we're resetting the marking state because of an
 248   // marking stack overflow, record that we should, if
 249   // possible, expand the stack.
 250   _should_expand = _cm->has_overflown();
 251 }
 252 
 253 CMMarkStack::~CMMarkStack() {
 254   if (_base != NULL) {
 255     _base = NULL;
 256     _virtual_space.release();
 257   }
 258 }
 259 
 260 void CMMarkStack::par_push(oop ptr) {
 261   while (true) {
 262     if (isFull()) {
 263       _overflow = true;
 264       return;
 265     }
 266     // Otherwise...
 267     jint index = _index;
 268     jint next_index = index+1;
 269     jint res = Atomic::cmpxchg(next_index, &_index, index);
 270     if (res == index) {
 271       _base[index] = ptr;
 272       // Note that we don't maintain this atomically.  We could, but it
 273       // doesn't seem necessary.
 274       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 275       return;
 276     }
 277     // Otherwise, we need to try again.
 278   }
 279 }
 280 
 281 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 282   while (true) {
 283     if (isFull()) {
 284       _overflow = true;
 285       return;
 286     }
 287     // Otherwise...
 288     jint index = _index;
 289     jint next_index = index + n;
 290     if (next_index > _capacity) {
 291       _overflow = true;
 292       return;
 293     }
 294     jint res = Atomic::cmpxchg(next_index, &_index, index);
 295     if (res == index) {
 296       for (int i = 0; i < n; i++) {
 297         int  ind = index + i;
 298         assert(ind < _capacity, "By overflow test above.");
 299         _base[ind] = ptr_arr[i];
 300       }
 301       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 302       return;
 303     }
 304     // Otherwise, we need to try again.
 305   }
 306 }
 307 
 308 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 309   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 310   jint start = _index;
 311   jint next_index = start + n;
 312   if (next_index > _capacity) {
 313     _overflow = true;
 314     return;
 315   }
 316   // Otherwise.
 317   _index = next_index;
 318   for (int i = 0; i < n; i++) {
 319     int ind = start + i;
 320     assert(ind < _capacity, "By overflow test above.");
 321     _base[ind] = ptr_arr[i];
 322   }
 323   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 324 }
 325 
 326 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 327   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 328   jint index = _index;
 329   if (index == 0) {
 330     *n = 0;
 331     return false;
 332   } else {
 333     int k = MIN2(max, index);
 334     jint  new_ind = index - k;
 335     for (int j = 0; j < k; j++) {
 336       ptr_arr[j] = _base[new_ind + j];
 337     }
 338     _index = new_ind;
 339     *n = k;
 340     return true;
 341   }
 342 }
 343 
 344 template<class OopClosureClass>
 345 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 346   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 347          || SafepointSynchronize::is_at_safepoint(),
 348          "Drain recursion must be yield-safe.");
 349   bool res = true;
 350   debug_only(_drain_in_progress = true);
 351   debug_only(_drain_in_progress_yields = yield_after);
 352   while (!isEmpty()) {
 353     oop newOop = pop();
 354     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 355     assert(newOop->is_oop(), "Expected an oop");
 356     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 357            "only grey objects on this stack");
 358     newOop->oop_iterate(cl);
 359     if (yield_after && _cm->do_yield_check()) {
 360       res = false;
 361       break;
 362     }
 363   }
 364   debug_only(_drain_in_progress = false);
 365   return res;
 366 }
 367 
 368 void CMMarkStack::note_start_of_gc() {
 369   assert(_saved_index == -1,
 370          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 371   _saved_index = _index;
 372 }
 373 
 374 void CMMarkStack::note_end_of_gc() {
 375   // This is intentionally a guarantee, instead of an assert. If we
 376   // accidentally add something to the mark stack during GC, it
 377   // will be a correctness issue so it's better if we crash. we'll
 378   // only check this once per GC anyway, so it won't be a performance
 379   // issue in any way.
 380   guarantee(_saved_index == _index,
 381             err_msg("saved index: %d index: %d", _saved_index, _index));
 382   _saved_index = -1;
 383 }
 384 
 385 void CMMarkStack::oops_do(OopClosure* f) {
 386   assert(_saved_index == _index,
 387          err_msg("saved index: %d index: %d", _saved_index, _index));
 388   for (int i = 0; i < _index; i += 1) {
 389     f->do_oop(&_base[i]);
 390   }
 391 }
 392 
 393 bool ConcurrentMark::not_yet_marked(oop obj) const {
 394   return _g1h->is_obj_ill(obj);
 395 }
 396 
 397 CMRootRegions::CMRootRegions() :
 398   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 399   _should_abort(false),  _next_survivor(NULL) { }
 400 
 401 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 402   _young_list = g1h->young_list();
 403   _cm = cm;
 404 }
 405 
 406 void CMRootRegions::prepare_for_scan() {
 407   assert(!scan_in_progress(), "pre-condition");
 408 
 409   // Currently, only survivors can be root regions.
 410   assert(_next_survivor == NULL, "pre-condition");
 411   _next_survivor = _young_list->first_survivor_region();
 412   _scan_in_progress = (_next_survivor != NULL);
 413   _should_abort = false;
 414 }
 415 
 416 HeapRegion* CMRootRegions::claim_next() {
 417   if (_should_abort) {
 418     // If someone has set the should_abort flag, we return NULL to
 419     // force the caller to bail out of their loop.
 420     return NULL;
 421   }
 422 
 423   // Currently, only survivors can be root regions.
 424   HeapRegion* res = _next_survivor;
 425   if (res != NULL) {
 426     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 427     // Read it again in case it changed while we were waiting for the lock.
 428     res = _next_survivor;
 429     if (res != NULL) {
 430       if (res == _young_list->last_survivor_region()) {
 431         // We just claimed the last survivor so store NULL to indicate
 432         // that we're done.
 433         _next_survivor = NULL;
 434       } else {
 435         _next_survivor = res->get_next_young_region();
 436       }
 437     } else {
 438       // Someone else claimed the last survivor while we were trying
 439       // to take the lock so nothing else to do.
 440     }
 441   }
 442   assert(res == NULL || res->is_survivor(), "post-condition");
 443 
 444   return res;
 445 }
 446 
 447 void CMRootRegions::scan_finished() {
 448   assert(scan_in_progress(), "pre-condition");
 449 
 450   // Currently, only survivors can be root regions.
 451   if (!_should_abort) {
 452     assert(_next_survivor == NULL, "we should have claimed all survivors");
 453   }
 454   _next_survivor = NULL;
 455 
 456   {
 457     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 458     _scan_in_progress = false;
 459     RootRegionScan_lock->notify_all();
 460   }
 461 }
 462 
 463 bool CMRootRegions::wait_until_scan_finished() {
 464   if (!scan_in_progress()) return false;
 465 
 466   {
 467     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 468     while (scan_in_progress()) {
 469       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 470     }
 471   }
 472   return true;
 473 }
 474 
 475 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 476 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 477 #endif // _MSC_VER
 478 
 479 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 480   return MAX2((n_par_threads + 2) / 4, 1U);
 481 }
 482 
 483 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 484   _g1h(g1h),
 485   _markBitMap1(log2_intptr(MinObjAlignment)),
 486   _markBitMap2(log2_intptr(MinObjAlignment)),
 487   _parallel_marking_threads(0),
 488   _max_parallel_marking_threads(0),
 489   _sleep_factor(0.0),
 490   _marking_task_overhead(1.0),
 491   _cleanup_sleep_factor(0.0),
 492   _cleanup_task_overhead(1.0),
 493   _cleanup_list("Cleanup List"),
 494   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 495   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 496             CardTableModRefBS::card_shift,
 497             false /* in_resource_area*/),
 498 
 499   _prevMarkBitMap(&_markBitMap1),
 500   _nextMarkBitMap(&_markBitMap2),
 501 
 502   _markStack(this),
 503   // _finger set in set_non_marking_state
 504 
 505   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 506   // _active_tasks set in set_non_marking_state
 507   // _tasks set inside the constructor
 508   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 509   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 510 
 511   _has_overflown(false),
 512   _concurrent(false),
 513   _has_aborted(false),
 514   _aborted_gc_id(GCId::undefined()),
 515   _restart_for_overflow(false),
 516   _concurrent_marking_in_progress(false),
 517 
 518   // _verbose_level set below
 519 
 520   _init_times(),
 521   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 522   _cleanup_times(),
 523   _total_counting_time(0.0),
 524   _total_rs_scrub_time(0.0),
 525 
 526   _parallel_workers(NULL),
 527 
 528   _count_card_bitmaps(NULL),
 529   _count_marked_bytes(NULL),
 530   _completed_initialization(false) {
 531   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 532   if (verbose_level < no_verbose) {
 533     verbose_level = no_verbose;
 534   }
 535   if (verbose_level > high_verbose) {
 536     verbose_level = high_verbose;
 537   }
 538   _verbose_level = verbose_level;
 539 
 540   if (verbose_low()) {
 541     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 542                            "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 543   }
 544 
 545   if (!_markBitMap1.allocate(heap_rs)) {
 546     warning("Failed to allocate first CM bit map");
 547     return;
 548   }
 549   if (!_markBitMap2.allocate(heap_rs)) {
 550     warning("Failed to allocate second CM bit map");
 551     return;
 552   }
 553 
 554   // Create & start a ConcurrentMark thread.
 555   _cmThread = new ConcurrentMarkThread(this);
 556   assert(cmThread() != NULL, "CM Thread should have been created");
 557   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 558   if (_cmThread->osthread() == NULL) {
 559       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 560   }
 561 
 562   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 563   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 564   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 565 
 566   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 567   satb_qs.set_buffer_size(G1SATBBufferSize);
 568 
 569   _root_regions.init(_g1h, this);
 570 
 571   if (ConcGCThreads > ParallelGCThreads) {
 572     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 573             "than ParallelGCThreads (" UINTX_FORMAT ").",
 574             ConcGCThreads, ParallelGCThreads);
 575     return;
 576   }
 577   if (ParallelGCThreads == 0) {
 578     // if we are not running with any parallel GC threads we will not
 579     // spawn any marking threads either
 580     _parallel_marking_threads =       0;
 581     _max_parallel_marking_threads =   0;
 582     _sleep_factor             =     0.0;
 583     _marking_task_overhead    =     1.0;
 584   } else {
 585     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 586       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 587       // if both are set
 588       _sleep_factor             = 0.0;
 589       _marking_task_overhead    = 1.0;
 590     } else if (G1MarkingOverheadPercent > 0) {
 591       // We will calculate the number of parallel marking threads based
 592       // on a target overhead with respect to the soft real-time goal
 593       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 594       double overall_cm_overhead =
 595         (double) MaxGCPauseMillis * marking_overhead /
 596         (double) GCPauseIntervalMillis;
 597       double cpu_ratio = 1.0 / (double) os::processor_count();
 598       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 599       double marking_task_overhead =
 600         overall_cm_overhead / marking_thread_num *
 601                                                 (double) os::processor_count();
 602       double sleep_factor =
 603                          (1.0 - marking_task_overhead) / marking_task_overhead;
 604 
 605       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 606       _sleep_factor             = sleep_factor;
 607       _marking_task_overhead    = marking_task_overhead;
 608     } else {
 609       // Calculate the number of parallel marking threads by scaling
 610       // the number of parallel GC threads.
 611       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 612       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 613       _sleep_factor             = 0.0;
 614       _marking_task_overhead    = 1.0;
 615     }
 616 
 617     assert(ConcGCThreads > 0, "Should have been set");
 618     _parallel_marking_threads = (uint) ConcGCThreads;
 619     _max_parallel_marking_threads = _parallel_marking_threads;
 620 
 621     if (parallel_marking_threads() > 1) {
 622       _cleanup_task_overhead = 1.0;
 623     } else {
 624       _cleanup_task_overhead = marking_task_overhead();
 625     }
 626     _cleanup_sleep_factor =
 627                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 628 
 629 #if 0
 630     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 631     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 632     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 633     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 634     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 635 #endif
 636 
 637     guarantee(parallel_marking_threads() > 0, "peace of mind");
 638     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 639          _max_parallel_marking_threads, false, true);
 640     if (_parallel_workers == NULL) {
 641       vm_exit_during_initialization("Failed necessary allocation.");
 642     } else {
 643       _parallel_workers->initialize_workers();
 644     }
 645   }
 646 
 647   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 648     uintx mark_stack_size =
 649       MIN2(MarkStackSizeMax,
 650           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 651     // Verify that the calculated value for MarkStackSize is in range.
 652     // It would be nice to use the private utility routine from Arguments.
 653     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 654       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 655               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 656               mark_stack_size, (uintx) 1, MarkStackSizeMax);
 657       return;
 658     }
 659     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 660   } else {
 661     // Verify MarkStackSize is in range.
 662     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 663       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 664         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 665           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 666                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 667                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
 668           return;
 669         }
 670       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 671         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 672           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 673                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 674                   MarkStackSize, MarkStackSizeMax);
 675           return;
 676         }
 677       }
 678     }
 679   }
 680 
 681   if (!_markStack.allocate(MarkStackSize)) {
 682     warning("Failed to allocate CM marking stack");
 683     return;
 684   }
 685 
 686   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 687   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 688 
 689   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 690   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 691 
 692   BitMap::idx_t card_bm_size = _card_bm.size();
 693 
 694   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 695   _active_tasks = _max_worker_id;
 696 
 697   size_t max_regions = (size_t) _g1h->max_regions();
 698   for (uint i = 0; i < _max_worker_id; ++i) {
 699     CMTaskQueue* task_queue = new CMTaskQueue();
 700     task_queue->initialize();
 701     _task_queues->register_queue(i, task_queue);
 702 
 703     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 704     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 705 
 706     _tasks[i] = new CMTask(i, this,
 707                            _count_marked_bytes[i],
 708                            &_count_card_bitmaps[i],
 709                            task_queue, _task_queues);
 710 
 711     _accum_task_vtime[i] = 0.0;
 712   }
 713 
 714   // Calculate the card number for the bottom of the heap. Used
 715   // in biasing indexes into the accounting card bitmaps.
 716   _heap_bottom_card_num =
 717     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 718                                 CardTableModRefBS::card_shift);
 719 
 720   // Clear all the liveness counting data
 721   clear_all_count_data();
 722 
 723   // so that the call below can read a sensible value
 724   _heap_start = (HeapWord*) heap_rs.base();
 725   set_non_marking_state();
 726   _completed_initialization = true;
 727 }
 728 
 729 void ConcurrentMark::update_g1_committed(bool force) {
 730   // If concurrent marking is not in progress, then we do not need to
 731   // update _heap_end.
 732   if (!concurrent_marking_in_progress() && !force) return;
 733 
 734   MemRegion committed = _g1h->g1_committed();
 735   assert(committed.start() == _heap_start, "start shouldn't change");
 736   HeapWord* new_end = committed.end();
 737   if (new_end > _heap_end) {
 738     // The heap has been expanded.
 739 
 740     _heap_end = new_end;
 741   }
 742   // Notice that the heap can also shrink. However, this only happens
 743   // during a Full GC (at least currently) and the entire marking
 744   // phase will bail out and the task will not be restarted. So, let's
 745   // do nothing.
 746 }
 747 
 748 void ConcurrentMark::reset() {
 749   // Starting values for these two. This should be called in a STW
 750   // phase. CM will be notified of any future g1_committed expansions
 751   // will be at the end of evacuation pauses, when tasks are
 752   // inactive.
 753   MemRegion committed = _g1h->g1_committed();
 754   _heap_start = committed.start();
 755   _heap_end   = committed.end();
 756 
 757   // Separated the asserts so that we know which one fires.
 758   assert(_heap_start != NULL, "heap bounds should look ok");
 759   assert(_heap_end != NULL, "heap bounds should look ok");
 760   assert(_heap_start < _heap_end, "heap bounds should look ok");
 761 
 762   // Reset all the marking data structures and any necessary flags
 763   reset_marking_state();
 764 
 765   if (verbose_low()) {
 766     gclog_or_tty->print_cr("[global] resetting");
 767   }
 768 
 769   // We do reset all of them, since different phases will use
 770   // different number of active threads. So, it's easiest to have all
 771   // of them ready.
 772   for (uint i = 0; i < _max_worker_id; ++i) {
 773     _tasks[i]->reset(_nextMarkBitMap);
 774   }
 775 
 776   // we need this to make sure that the flag is on during the evac
 777   // pause with initial mark piggy-backed
 778   set_concurrent_marking_in_progress();
 779 }
 780 
 781 
 782 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 783   _markStack.set_should_expand();
 784   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 785   if (clear_overflow) {
 786     clear_has_overflown();
 787   } else {
 788     assert(has_overflown(), "pre-condition");
 789   }
 790   _finger = _heap_start;
 791 
 792   for (uint i = 0; i < _max_worker_id; ++i) {
 793     CMTaskQueue* queue = _task_queues->queue(i);
 794     queue->set_empty();
 795   }
 796 }
 797 
 798 void ConcurrentMark::set_concurrency(uint active_tasks) {
 799   assert(active_tasks <= _max_worker_id, "we should not have more");
 800 
 801   _active_tasks = active_tasks;
 802   // Need to update the three data structures below according to the
 803   // number of active threads for this phase.
 804   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 805   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 806   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 807 }
 808 
 809 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 810   set_concurrency(active_tasks);
 811 
 812   _concurrent = concurrent;
 813   // We propagate this to all tasks, not just the active ones.
 814   for (uint i = 0; i < _max_worker_id; ++i)
 815     _tasks[i]->set_concurrent(concurrent);
 816 
 817   if (concurrent) {
 818     set_concurrent_marking_in_progress();
 819   } else {
 820     // We currently assume that the concurrent flag has been set to
 821     // false before we start remark. At this point we should also be
 822     // in a STW phase.
 823     assert(!concurrent_marking_in_progress(), "invariant");
 824     assert(out_of_regions(),
 825            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 826                    p2i(_finger), p2i(_heap_end)));
 827     update_g1_committed(true);
 828   }
 829 }
 830 
 831 void ConcurrentMark::set_non_marking_state() {
 832   // We set the global marking state to some default values when we're
 833   // not doing marking.
 834   reset_marking_state();
 835   _active_tasks = 0;
 836   clear_concurrent_marking_in_progress();
 837 }
 838 
 839 ConcurrentMark::~ConcurrentMark() {
 840   // The ConcurrentMark instance is never freed.
 841   ShouldNotReachHere();
 842 }
 843 
 844 void ConcurrentMark::clearNextBitmap() {
 845   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 846   G1CollectorPolicy* g1p = g1h->g1_policy();
 847 
 848   // Make sure that the concurrent mark thread looks to still be in
 849   // the current cycle.
 850   guarantee(cmThread()->during_cycle(), "invariant");
 851 
 852   // We are finishing up the current cycle by clearing the next
 853   // marking bitmap and getting it ready for the next cycle. During
 854   // this time no other cycle can start. So, let's make sure that this
 855   // is the case.
 856   guarantee(!g1h->mark_in_progress(), "invariant");
 857 
 858   // clear the mark bitmap (no grey objects to start with).
 859   // We need to do this in chunks and offer to yield in between
 860   // each chunk.
 861   HeapWord* start  = _nextMarkBitMap->startWord();
 862   HeapWord* end    = _nextMarkBitMap->endWord();
 863   HeapWord* cur    = start;
 864   size_t chunkSize = M;
 865   while (cur < end) {
 866     HeapWord* next = cur + chunkSize;
 867     if (next > end) {
 868       next = end;
 869     }
 870     MemRegion mr(cur,next);
 871     _nextMarkBitMap->clearRange(mr);
 872     cur = next;
 873     do_yield_check();
 874 
 875     // Repeat the asserts from above. We'll do them as asserts here to
 876     // minimize their overhead on the product. However, we'll have
 877     // them as guarantees at the beginning / end of the bitmap
 878     // clearing to get some checking in the product.
 879     assert(cmThread()->during_cycle(), "invariant");
 880     assert(!g1h->mark_in_progress(), "invariant");
 881   }
 882 
 883   // Clear the liveness counting data
 884   clear_all_count_data();
 885 
 886   // Repeat the asserts from above.
 887   guarantee(cmThread()->during_cycle(), "invariant");
 888   guarantee(!g1h->mark_in_progress(), "invariant");
 889 }
 890 
 891 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 892 public:
 893   bool doHeapRegion(HeapRegion* r) {
 894     if (!r->continuesHumongous()) {
 895       r->note_start_of_marking();
 896     }
 897     return false;
 898   }
 899 };
 900 
 901 void ConcurrentMark::checkpointRootsInitialPre() {
 902   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 903   G1CollectorPolicy* g1p = g1h->g1_policy();
 904 
 905   _has_aborted = false;
 906 
 907 #ifndef PRODUCT
 908   if (G1PrintReachableAtInitialMark) {
 909     print_reachable("at-cycle-start",
 910                     VerifyOption_G1UsePrevMarking, true /* all */);
 911   }
 912 #endif
 913 
 914   // Initialise marking structures. This has to be done in a STW phase.
 915   reset();
 916 
 917   // For each region note start of marking.
 918   NoteStartOfMarkHRClosure startcl;
 919   g1h->heap_region_iterate(&startcl);
 920 }
 921 
 922 
 923 void ConcurrentMark::checkpointRootsInitialPost() {
 924   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 925 
 926   // If we force an overflow during remark, the remark operation will
 927   // actually abort and we'll restart concurrent marking. If we always
 928   // force an oveflow during remark we'll never actually complete the
 929   // marking phase. So, we initilize this here, at the start of the
 930   // cycle, so that at the remaining overflow number will decrease at
 931   // every remark and we'll eventually not need to cause one.
 932   force_overflow_stw()->init();
 933 
 934   // Start Concurrent Marking weak-reference discovery.
 935   ReferenceProcessor* rp = g1h->ref_processor_cm();
 936   // enable ("weak") refs discovery
 937   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 938   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 939 
 940   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 941   // This is the start of  the marking cycle, we're expected all
 942   // threads to have SATB queues with active set to false.
 943   satb_mq_set.set_active_all_threads(true, /* new active value */
 944                                      false /* expected_active */);
 945 
 946   _root_regions.prepare_for_scan();
 947 
 948   // update_g1_committed() will be called at the end of an evac pause
 949   // when marking is on. So, it's also called at the end of the
 950   // initial-mark pause to update the heap end, if the heap expands
 951   // during it. No need to call it here.
 952 }
 953 
 954 /*
 955  * Notice that in the next two methods, we actually leave the STS
 956  * during the barrier sync and join it immediately afterwards. If we
 957  * do not do this, the following deadlock can occur: one thread could
 958  * be in the barrier sync code, waiting for the other thread to also
 959  * sync up, whereas another one could be trying to yield, while also
 960  * waiting for the other threads to sync up too.
 961  *
 962  * Note, however, that this code is also used during remark and in
 963  * this case we should not attempt to leave / enter the STS, otherwise
 964  * we'll either hit an asseert (debug / fastdebug) or deadlock
 965  * (product). So we should only leave / enter the STS if we are
 966  * operating concurrently.
 967  *
 968  * Because the thread that does the sync barrier has left the STS, it
 969  * is possible to be suspended for a Full GC or an evacuation pause
 970  * could occur. This is actually safe, since the entering the sync
 971  * barrier is one of the last things do_marking_step() does, and it
 972  * doesn't manipulate any data structures afterwards.
 973  */
 974 
 975 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 976   if (verbose_low()) {
 977     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 978   }
 979 
 980   if (concurrent()) {
 981     SuspendibleThreadSet::leave();
 982   }
 983 
 984   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 985 
 986   if (concurrent()) {
 987     SuspendibleThreadSet::join();
 988   }
 989   // at this point everyone should have synced up and not be doing any
 990   // more work
 991 
 992   if (verbose_low()) {
 993     if (barrier_aborted) {
 994       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 995     } else {
 996       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 997     }
 998   }
 999 
1000   if (barrier_aborted) {
1001     // If the barrier aborted we ignore the overflow condition and
1002     // just abort the whole marking phase as quickly as possible.
1003     return;
1004   }
1005 
1006   // If we're executing the concurrent phase of marking, reset the marking
1007   // state; otherwise the marking state is reset after reference processing,
1008   // during the remark pause.
1009   // If we reset here as a result of an overflow during the remark we will
1010   // see assertion failures from any subsequent set_concurrency_and_phase()
1011   // calls.
1012   if (concurrent()) {
1013     // let the task associated with with worker 0 do this
1014     if (worker_id == 0) {
1015       // task 0 is responsible for clearing the global data structures
1016       // We should be here because of an overflow. During STW we should
1017       // not clear the overflow flag since we rely on it being true when
1018       // we exit this method to abort the pause and restart concurent
1019       // marking.
1020       reset_marking_state(true /* clear_overflow */);
1021       force_overflow()->update();
1022 
1023       if (G1Log::fine()) {
1024         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1025         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1026       }
1027     }
1028   }
1029 
1030   // after this, each task should reset its own data structures then
1031   // then go into the second barrier
1032 }
1033 
1034 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1035   if (verbose_low()) {
1036     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1037   }
1038 
1039   if (concurrent()) {
1040     SuspendibleThreadSet::leave();
1041   }
1042 
1043   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1044 
1045   if (concurrent()) {
1046     SuspendibleThreadSet::join();
1047   }
1048   // at this point everything should be re-initialized and ready to go
1049 
1050   if (verbose_low()) {
1051     if (barrier_aborted) {
1052       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1053     } else {
1054       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1055     }
1056   }
1057 }
1058 
1059 #ifndef PRODUCT
1060 void ForceOverflowSettings::init() {
1061   _num_remaining = G1ConcMarkForceOverflow;
1062   _force = false;
1063   update();
1064 }
1065 
1066 void ForceOverflowSettings::update() {
1067   if (_num_remaining > 0) {
1068     _num_remaining -= 1;
1069     _force = true;
1070   } else {
1071     _force = false;
1072   }
1073 }
1074 
1075 bool ForceOverflowSettings::should_force() {
1076   if (_force) {
1077     _force = false;
1078     return true;
1079   } else {
1080     return false;
1081   }
1082 }
1083 #endif // !PRODUCT
1084 
1085 class CMConcurrentMarkingTask: public AbstractGangTask {
1086 private:
1087   ConcurrentMark*       _cm;
1088   ConcurrentMarkThread* _cmt;
1089 
1090 public:
1091   void work(uint worker_id) {
1092     assert(Thread::current()->is_ConcurrentGC_thread(),
1093            "this should only be done by a conc GC thread");
1094     ResourceMark rm;
1095 
1096     double start_vtime = os::elapsedVTime();
1097 
1098     SuspendibleThreadSet::join();
1099 
1100     assert(worker_id < _cm->active_tasks(), "invariant");
1101     CMTask* the_task = _cm->task(worker_id);
1102     the_task->record_start_time();
1103     if (!_cm->has_aborted()) {
1104       do {
1105         double start_vtime_sec = os::elapsedVTime();
1106         double start_time_sec = os::elapsedTime();
1107         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1108 
1109         the_task->do_marking_step(mark_step_duration_ms,
1110                                   true  /* do_termination */,
1111                                   false /* is_serial*/);
1112 
1113         double end_time_sec = os::elapsedTime();
1114         double end_vtime_sec = os::elapsedVTime();
1115         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1116         double elapsed_time_sec = end_time_sec - start_time_sec;
1117         _cm->clear_has_overflown();
1118 
1119         bool ret = _cm->do_yield_check(worker_id);
1120 
1121         jlong sleep_time_ms;
1122         if (!_cm->has_aborted() && the_task->has_aborted()) {
1123           sleep_time_ms =
1124             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1125           SuspendibleThreadSet::leave();
1126           os::sleep(Thread::current(), sleep_time_ms, false);
1127           SuspendibleThreadSet::join();
1128         }
1129         double end_time2_sec = os::elapsedTime();
1130         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1131 
1132 #if 0
1133           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1134                                  "overhead %1.4lf",
1135                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1136                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1137           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1138                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1139 #endif
1140       } while (!_cm->has_aborted() && the_task->has_aborted());
1141     }
1142     the_task->record_end_time();
1143     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1144 
1145     SuspendibleThreadSet::leave();
1146 
1147     double end_vtime = os::elapsedVTime();
1148     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1149   }
1150 
1151   CMConcurrentMarkingTask(ConcurrentMark* cm,
1152                           ConcurrentMarkThread* cmt) :
1153       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1154 
1155   ~CMConcurrentMarkingTask() { }
1156 };
1157 
1158 // Calculates the number of active workers for a concurrent
1159 // phase.
1160 uint ConcurrentMark::calc_parallel_marking_threads() {
1161   if (G1CollectedHeap::use_parallel_gc_threads()) {
1162     uint n_conc_workers = 0;
1163     if (!UseDynamicNumberOfGCThreads ||
1164         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1165          !ForceDynamicNumberOfGCThreads)) {
1166       n_conc_workers = max_parallel_marking_threads();
1167     } else {
1168       n_conc_workers =
1169         AdaptiveSizePolicy::calc_default_active_workers(
1170                                      max_parallel_marking_threads(),
1171                                      1, /* Minimum workers */
1172                                      parallel_marking_threads(),
1173                                      Threads::number_of_non_daemon_threads());
1174       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1175       // that scaling has already gone into "_max_parallel_marking_threads".
1176     }
1177     assert(n_conc_workers > 0, "Always need at least 1");
1178     return n_conc_workers;
1179   }
1180   // If we are not running with any parallel GC threads we will not
1181   // have spawned any marking threads either. Hence the number of
1182   // concurrent workers should be 0.
1183   return 0;
1184 }
1185 
1186 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1187   // Currently, only survivors can be root regions.
1188   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1189   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1190 
1191   const uintx interval = PrefetchScanIntervalInBytes;
1192   HeapWord* curr = hr->bottom();
1193   const HeapWord* end = hr->top();
1194   while (curr < end) {
1195     Prefetch::read(curr, interval);
1196     oop obj = oop(curr);
1197     int size = obj->oop_iterate(&cl);
1198     assert(size == obj->size(), "sanity");
1199     curr += size;
1200   }
1201 }
1202 
1203 class CMRootRegionScanTask : public AbstractGangTask {
1204 private:
1205   ConcurrentMark* _cm;
1206 
1207 public:
1208   CMRootRegionScanTask(ConcurrentMark* cm) :
1209     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1210 
1211   void work(uint worker_id) {
1212     assert(Thread::current()->is_ConcurrentGC_thread(),
1213            "this should only be done by a conc GC thread");
1214 
1215     CMRootRegions* root_regions = _cm->root_regions();
1216     HeapRegion* hr = root_regions->claim_next();
1217     while (hr != NULL) {
1218       _cm->scanRootRegion(hr, worker_id);
1219       hr = root_regions->claim_next();
1220     }
1221   }
1222 };
1223 
1224 void ConcurrentMark::scanRootRegions() {
1225   // scan_in_progress() will have been set to true only if there was
1226   // at least one root region to scan. So, if it's false, we
1227   // should not attempt to do any further work.
1228   if (root_regions()->scan_in_progress()) {
1229     _parallel_marking_threads = calc_parallel_marking_threads();
1230     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1231            "Maximum number of marking threads exceeded");
1232     uint active_workers = MAX2(1U, parallel_marking_threads());
1233 
1234     CMRootRegionScanTask task(this);
1235     if (use_parallel_marking_threads()) {
1236       _parallel_workers->set_active_workers((int) active_workers);
1237       _parallel_workers->run_task(&task);
1238     } else {
1239       task.work(0);
1240     }
1241 
1242     // It's possible that has_aborted() is true here without actually
1243     // aborting the survivor scan earlier. This is OK as it's
1244     // mainly used for sanity checking.
1245     root_regions()->scan_finished();
1246   }
1247 }
1248 
1249 void ConcurrentMark::markFromRoots() {
1250   // we might be tempted to assert that:
1251   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1252   //        "inconsistent argument?");
1253   // However that wouldn't be right, because it's possible that
1254   // a safepoint is indeed in progress as a younger generation
1255   // stop-the-world GC happens even as we mark in this generation.
1256 
1257   _restart_for_overflow = false;
1258   force_overflow_conc()->init();
1259 
1260   // _g1h has _n_par_threads
1261   _parallel_marking_threads = calc_parallel_marking_threads();
1262   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1263     "Maximum number of marking threads exceeded");
1264 
1265   uint active_workers = MAX2(1U, parallel_marking_threads());
1266 
1267   // Parallel task terminator is set in "set_concurrency_and_phase()"
1268   set_concurrency_and_phase(active_workers, true /* concurrent */);
1269 
1270   CMConcurrentMarkingTask markingTask(this, cmThread());
1271   if (use_parallel_marking_threads()) {
1272     _parallel_workers->set_active_workers((int)active_workers);
1273     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1274     // and the decisions on that MT processing is made elsewhere.
1275     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1276     _parallel_workers->run_task(&markingTask);
1277   } else {
1278     markingTask.work(0);
1279   }
1280   print_stats();
1281 }
1282 
1283 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1284   // world is stopped at this checkpoint
1285   assert(SafepointSynchronize::is_at_safepoint(),
1286          "world should be stopped");
1287 
1288   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1289 
1290   // If a full collection has happened, we shouldn't do this.
1291   if (has_aborted()) {
1292     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1293     return;
1294   }
1295 
1296   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1297 
1298   if (VerifyDuringGC) {
1299     HandleMark hm;  // handle scope
1300     Universe::heap()->prepare_for_verify();
1301     Universe::verify(VerifyOption_G1UsePrevMarking,
1302                      " VerifyDuringGC:(before)");
1303   }
1304 
1305   G1CollectorPolicy* g1p = g1h->g1_policy();
1306   g1p->record_concurrent_mark_remark_start();
1307 
1308   double start = os::elapsedTime();
1309 
1310   checkpointRootsFinalWork();
1311 
1312   double mark_work_end = os::elapsedTime();
1313 
1314   weakRefsWork(clear_all_soft_refs);
1315 
1316   if (has_overflown()) {
1317     // Oops.  We overflowed.  Restart concurrent marking.
1318     _restart_for_overflow = true;
1319     if (G1TraceMarkStackOverflow) {
1320       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1321     }
1322 
1323     // Verify the heap w.r.t. the previous marking bitmap.
1324     if (VerifyDuringGC) {
1325       HandleMark hm;  // handle scope
1326       Universe::heap()->prepare_for_verify();
1327       Universe::verify(VerifyOption_G1UsePrevMarking,
1328                        " VerifyDuringGC:(overflow)");
1329     }
1330 
1331     // Clear the marking state because we will be restarting
1332     // marking due to overflowing the global mark stack.
1333     reset_marking_state();
1334   } else {
1335     // Aggregate the per-task counting data that we have accumulated
1336     // while marking.
1337     aggregate_count_data();
1338 
1339     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1340     // We're done with marking.
1341     // This is the end of  the marking cycle, we're expected all
1342     // threads to have SATB queues with active set to true.
1343     satb_mq_set.set_active_all_threads(false, /* new active value */
1344                                        true /* expected_active */);
1345 
1346     if (VerifyDuringGC) {
1347       HandleMark hm;  // handle scope
1348       Universe::heap()->prepare_for_verify();
1349       Universe::verify(VerifyOption_G1UseNextMarking,
1350                        " VerifyDuringGC:(after)");
1351     }
1352     assert(!restart_for_overflow(), "sanity");
1353     // Completely reset the marking state since marking completed
1354     set_non_marking_state();
1355   }
1356 
1357   // Expand the marking stack, if we have to and if we can.
1358   if (_markStack.should_expand()) {
1359     _markStack.expand();
1360   }
1361 
1362   // Statistics
1363   double now = os::elapsedTime();
1364   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1365   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1366   _remark_times.add((now - start) * 1000.0);
1367 
1368   g1p->record_concurrent_mark_remark_end();
1369 
1370   G1CMIsAliveClosure is_alive(g1h);
1371   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1372 }
1373 
1374 // Base class of the closures that finalize and verify the
1375 // liveness counting data.
1376 class CMCountDataClosureBase: public HeapRegionClosure {
1377 protected:
1378   G1CollectedHeap* _g1h;
1379   ConcurrentMark* _cm;
1380   CardTableModRefBS* _ct_bs;
1381 
1382   BitMap* _region_bm;
1383   BitMap* _card_bm;
1384 
1385   // Takes a region that's not empty (i.e., it has at least one
1386   // live object in it and sets its corresponding bit on the region
1387   // bitmap to 1. If the region is "starts humongous" it will also set
1388   // to 1 the bits on the region bitmap that correspond to its
1389   // associated "continues humongous" regions.
1390   void set_bit_for_region(HeapRegion* hr) {
1391     assert(!hr->continuesHumongous(), "should have filtered those out");
1392 
1393     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1394     if (!hr->startsHumongous()) {
1395       // Normal (non-humongous) case: just set the bit.
1396       _region_bm->par_at_put(index, true);
1397     } else {
1398       // Starts humongous case: calculate how many regions are part of
1399       // this humongous region and then set the bit range.
1400       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1401       _region_bm->par_at_put_range(index, end_index, true);
1402     }
1403   }
1404 
1405 public:
1406   CMCountDataClosureBase(G1CollectedHeap* g1h,
1407                          BitMap* region_bm, BitMap* card_bm):
1408     _g1h(g1h), _cm(g1h->concurrent_mark()),
1409     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1410     _region_bm(region_bm), _card_bm(card_bm) { }
1411 };
1412 
1413 // Closure that calculates the # live objects per region. Used
1414 // for verification purposes during the cleanup pause.
1415 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1416   CMBitMapRO* _bm;
1417   size_t _region_marked_bytes;
1418 
1419 public:
1420   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1421                          BitMap* region_bm, BitMap* card_bm) :
1422     CMCountDataClosureBase(g1h, region_bm, card_bm),
1423     _bm(bm), _region_marked_bytes(0) { }
1424 
1425   bool doHeapRegion(HeapRegion* hr) {
1426 
1427     if (hr->continuesHumongous()) {
1428       // We will ignore these here and process them when their
1429       // associated "starts humongous" region is processed (see
1430       // set_bit_for_heap_region()). Note that we cannot rely on their
1431       // associated "starts humongous" region to have their bit set to
1432       // 1 since, due to the region chunking in the parallel region
1433       // iteration, a "continues humongous" region might be visited
1434       // before its associated "starts humongous".
1435       return false;
1436     }
1437 
1438     HeapWord* ntams = hr->next_top_at_mark_start();
1439     HeapWord* start = hr->bottom();
1440 
1441     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1442            err_msg("Preconditions not met - "
1443                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1444                    p2i(start), p2i(ntams), p2i(hr->end())));
1445 
1446     // Find the first marked object at or after "start".
1447     start = _bm->getNextMarkedWordAddress(start, ntams);
1448 
1449     size_t marked_bytes = 0;
1450 
1451     while (start < ntams) {
1452       oop obj = oop(start);
1453       int obj_sz = obj->size();
1454       HeapWord* obj_end = start + obj_sz;
1455 
1456       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1457       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1458 
1459       // Note: if we're looking at the last region in heap - obj_end
1460       // could be actually just beyond the end of the heap; end_idx
1461       // will then correspond to a (non-existent) card that is also
1462       // just beyond the heap.
1463       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1464         // end of object is not card aligned - increment to cover
1465         // all the cards spanned by the object
1466         end_idx += 1;
1467       }
1468 
1469       // Set the bits in the card BM for the cards spanned by this object.
1470       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1471 
1472       // Add the size of this object to the number of marked bytes.
1473       marked_bytes += (size_t)obj_sz * HeapWordSize;
1474 
1475       // Find the next marked object after this one.
1476       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1477     }
1478 
1479     // Mark the allocated-since-marking portion...
1480     HeapWord* top = hr->top();
1481     if (ntams < top) {
1482       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1483       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1484 
1485       // Note: if we're looking at the last region in heap - top
1486       // could be actually just beyond the end of the heap; end_idx
1487       // will then correspond to a (non-existent) card that is also
1488       // just beyond the heap.
1489       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1490         // end of object is not card aligned - increment to cover
1491         // all the cards spanned by the object
1492         end_idx += 1;
1493       }
1494       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1495 
1496       // This definitely means the region has live objects.
1497       set_bit_for_region(hr);
1498     }
1499 
1500     // Update the live region bitmap.
1501     if (marked_bytes > 0) {
1502       set_bit_for_region(hr);
1503     }
1504 
1505     // Set the marked bytes for the current region so that
1506     // it can be queried by a calling verificiation routine
1507     _region_marked_bytes = marked_bytes;
1508 
1509     return false;
1510   }
1511 
1512   size_t region_marked_bytes() const { return _region_marked_bytes; }
1513 };
1514 
1515 // Heap region closure used for verifying the counting data
1516 // that was accumulated concurrently and aggregated during
1517 // the remark pause. This closure is applied to the heap
1518 // regions during the STW cleanup pause.
1519 
1520 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1521   G1CollectedHeap* _g1h;
1522   ConcurrentMark* _cm;
1523   CalcLiveObjectsClosure _calc_cl;
1524   BitMap* _region_bm;   // Region BM to be verified
1525   BitMap* _card_bm;     // Card BM to be verified
1526   bool _verbose;        // verbose output?
1527 
1528   BitMap* _exp_region_bm; // Expected Region BM values
1529   BitMap* _exp_card_bm;   // Expected card BM values
1530 
1531   int _failures;
1532 
1533 public:
1534   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1535                                 BitMap* region_bm,
1536                                 BitMap* card_bm,
1537                                 BitMap* exp_region_bm,
1538                                 BitMap* exp_card_bm,
1539                                 bool verbose) :
1540     _g1h(g1h), _cm(g1h->concurrent_mark()),
1541     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1542     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1543     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1544     _failures(0) { }
1545 
1546   int failures() const { return _failures; }
1547 
1548   bool doHeapRegion(HeapRegion* hr) {
1549     if (hr->continuesHumongous()) {
1550       // We will ignore these here and process them when their
1551       // associated "starts humongous" region is processed (see
1552       // set_bit_for_heap_region()). Note that we cannot rely on their
1553       // associated "starts humongous" region to have their bit set to
1554       // 1 since, due to the region chunking in the parallel region
1555       // iteration, a "continues humongous" region might be visited
1556       // before its associated "starts humongous".
1557       return false;
1558     }
1559 
1560     int failures = 0;
1561 
1562     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1563     // this region and set the corresponding bits in the expected region
1564     // and card bitmaps.
1565     bool res = _calc_cl.doHeapRegion(hr);
1566     assert(res == false, "should be continuing");
1567 
1568     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1569                     Mutex::_no_safepoint_check_flag);
1570 
1571     // Verify the marked bytes for this region.
1572     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1573     size_t act_marked_bytes = hr->next_marked_bytes();
1574 
1575     // We're not OK if expected marked bytes > actual marked bytes. It means
1576     // we have missed accounting some objects during the actual marking.
1577     if (exp_marked_bytes > act_marked_bytes) {
1578       if (_verbose) {
1579         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1580                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1581                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1582       }
1583       failures += 1;
1584     }
1585 
1586     // Verify the bit, for this region, in the actual and expected
1587     // (which was just calculated) region bit maps.
1588     // We're not OK if the bit in the calculated expected region
1589     // bitmap is set and the bit in the actual region bitmap is not.
1590     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1591 
1592     bool expected = _exp_region_bm->at(index);
1593     bool actual = _region_bm->at(index);
1594     if (expected && !actual) {
1595       if (_verbose) {
1596         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1597                                "expected: %s, actual: %s",
1598                                hr->hrs_index(),
1599                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1600       }
1601       failures += 1;
1602     }
1603 
1604     // Verify that the card bit maps for the cards spanned by the current
1605     // region match. We have an error if we have a set bit in the expected
1606     // bit map and the corresponding bit in the actual bitmap is not set.
1607 
1608     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1609     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1610 
1611     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1612       expected = _exp_card_bm->at(i);
1613       actual = _card_bm->at(i);
1614 
1615       if (expected && !actual) {
1616         if (_verbose) {
1617           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1618                                  "expected: %s, actual: %s",
1619                                  hr->hrs_index(), i,
1620                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1621         }
1622         failures += 1;
1623       }
1624     }
1625 
1626     if (failures > 0 && _verbose)  {
1627       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1628                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1629                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1630                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1631     }
1632 
1633     _failures += failures;
1634 
1635     // We could stop iteration over the heap when we
1636     // find the first violating region by returning true.
1637     return false;
1638   }
1639 };
1640 
1641 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1642 protected:
1643   G1CollectedHeap* _g1h;
1644   ConcurrentMark* _cm;
1645   BitMap* _actual_region_bm;
1646   BitMap* _actual_card_bm;
1647 
1648   uint    _n_workers;
1649 
1650   BitMap* _expected_region_bm;
1651   BitMap* _expected_card_bm;
1652 
1653   int  _failures;
1654   bool _verbose;
1655 
1656 public:
1657   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1658                             BitMap* region_bm, BitMap* card_bm,
1659                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1660     : AbstractGangTask("G1 verify final counting"),
1661       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1662       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1663       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1664       _failures(0), _verbose(false),
1665       _n_workers(0) {
1666     assert(VerifyDuringGC, "don't call this otherwise");
1667 
1668     // Use the value already set as the number of active threads
1669     // in the call to run_task().
1670     if (G1CollectedHeap::use_parallel_gc_threads()) {
1671       assert( _g1h->workers()->active_workers() > 0,
1672         "Should have been previously set");
1673       _n_workers = _g1h->workers()->active_workers();
1674     } else {
1675       _n_workers = 1;
1676     }
1677 
1678     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1679     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1680 
1681     _verbose = _cm->verbose_medium();
1682   }
1683 
1684   void work(uint worker_id) {
1685     assert(worker_id < _n_workers, "invariant");
1686 
1687     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1688                                             _actual_region_bm, _actual_card_bm,
1689                                             _expected_region_bm,
1690                                             _expected_card_bm,
1691                                             _verbose);
1692 
1693     if (G1CollectedHeap::use_parallel_gc_threads()) {
1694       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1695                                             worker_id,
1696                                             _n_workers,
1697                                             HeapRegion::VerifyCountClaimValue);
1698     } else {
1699       _g1h->heap_region_iterate(&verify_cl);
1700     }
1701 
1702     Atomic::add(verify_cl.failures(), &_failures);
1703   }
1704 
1705   int failures() const { return _failures; }
1706 };
1707 
1708 // Closure that finalizes the liveness counting data.
1709 // Used during the cleanup pause.
1710 // Sets the bits corresponding to the interval [NTAMS, top]
1711 // (which contains the implicitly live objects) in the
1712 // card liveness bitmap. Also sets the bit for each region,
1713 // containing live data, in the region liveness bitmap.
1714 
1715 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1716  public:
1717   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1718                               BitMap* region_bm,
1719                               BitMap* card_bm) :
1720     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1721 
1722   bool doHeapRegion(HeapRegion* hr) {
1723 
1724     if (hr->continuesHumongous()) {
1725       // We will ignore these here and process them when their
1726       // associated "starts humongous" region is processed (see
1727       // set_bit_for_heap_region()). Note that we cannot rely on their
1728       // associated "starts humongous" region to have their bit set to
1729       // 1 since, due to the region chunking in the parallel region
1730       // iteration, a "continues humongous" region might be visited
1731       // before its associated "starts humongous".
1732       return false;
1733     }
1734 
1735     HeapWord* ntams = hr->next_top_at_mark_start();
1736     HeapWord* top   = hr->top();
1737 
1738     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1739 
1740     // Mark the allocated-since-marking portion...
1741     if (ntams < top) {
1742       // This definitely means the region has live objects.
1743       set_bit_for_region(hr);
1744 
1745       // Now set the bits in the card bitmap for [ntams, top)
1746       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1747       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1748 
1749       // Note: if we're looking at the last region in heap - top
1750       // could be actually just beyond the end of the heap; end_idx
1751       // will then correspond to a (non-existent) card that is also
1752       // just beyond the heap.
1753       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1754         // end of object is not card aligned - increment to cover
1755         // all the cards spanned by the object
1756         end_idx += 1;
1757       }
1758 
1759       assert(end_idx <= _card_bm->size(),
1760              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1761                      end_idx, _card_bm->size()));
1762       assert(start_idx < _card_bm->size(),
1763              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1764                      start_idx, _card_bm->size()));
1765 
1766       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1767     }
1768 
1769     // Set the bit for the region if it contains live data
1770     if (hr->next_marked_bytes() > 0) {
1771       set_bit_for_region(hr);
1772     }
1773 
1774     return false;
1775   }
1776 };
1777 
1778 class G1ParFinalCountTask: public AbstractGangTask {
1779 protected:
1780   G1CollectedHeap* _g1h;
1781   ConcurrentMark* _cm;
1782   BitMap* _actual_region_bm;
1783   BitMap* _actual_card_bm;
1784 
1785   uint    _n_workers;
1786 
1787 public:
1788   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1789     : AbstractGangTask("G1 final counting"),
1790       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1791       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1792       _n_workers(0) {
1793     // Use the value already set as the number of active threads
1794     // in the call to run_task().
1795     if (G1CollectedHeap::use_parallel_gc_threads()) {
1796       assert( _g1h->workers()->active_workers() > 0,
1797         "Should have been previously set");
1798       _n_workers = _g1h->workers()->active_workers();
1799     } else {
1800       _n_workers = 1;
1801     }
1802   }
1803 
1804   void work(uint worker_id) {
1805     assert(worker_id < _n_workers, "invariant");
1806 
1807     FinalCountDataUpdateClosure final_update_cl(_g1h,
1808                                                 _actual_region_bm,
1809                                                 _actual_card_bm);
1810 
1811     if (G1CollectedHeap::use_parallel_gc_threads()) {
1812       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1813                                             worker_id,
1814                                             _n_workers,
1815                                             HeapRegion::FinalCountClaimValue);
1816     } else {
1817       _g1h->heap_region_iterate(&final_update_cl);
1818     }
1819   }
1820 };
1821 
1822 class G1ParNoteEndTask;
1823 
1824 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1825   G1CollectedHeap* _g1;
1826   size_t _max_live_bytes;
1827   uint _regions_claimed;
1828   size_t _freed_bytes;
1829   FreeRegionList* _local_cleanup_list;
1830   HeapRegionSetCount _old_regions_removed;
1831   HeapRegionSetCount _humongous_regions_removed;
1832   HRRSCleanupTask* _hrrs_cleanup_task;
1833   double _claimed_region_time;
1834   double _max_region_time;
1835 
1836 public:
1837   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1838                              FreeRegionList* local_cleanup_list,
1839                              HRRSCleanupTask* hrrs_cleanup_task) :
1840     _g1(g1),
1841     _max_live_bytes(0), _regions_claimed(0),
1842     _freed_bytes(0),
1843     _claimed_region_time(0.0), _max_region_time(0.0),
1844     _local_cleanup_list(local_cleanup_list),
1845     _old_regions_removed(),
1846     _humongous_regions_removed(),
1847     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1848 
1849   size_t freed_bytes() { return _freed_bytes; }
1850   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1851   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1852 
1853   bool doHeapRegion(HeapRegion *hr) {
1854     if (hr->continuesHumongous()) {
1855       return false;
1856     }
1857     // We use a claim value of zero here because all regions
1858     // were claimed with value 1 in the FinalCount task.
1859     _g1->reset_gc_time_stamps(hr);
1860     double start = os::elapsedTime();
1861     _regions_claimed++;
1862     hr->note_end_of_marking();
1863     _max_live_bytes += hr->max_live_bytes();
1864 
1865     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1866       _freed_bytes += hr->used();
1867       hr->set_containing_set(NULL);
1868       if (hr->isHumongous()) {
1869         assert(hr->startsHumongous(), "we should only see starts humongous");
1870         _humongous_regions_removed.increment(1u, hr->capacity());
1871         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1872       } else {
1873         _old_regions_removed.increment(1u, hr->capacity());
1874         _g1->free_region(hr, _local_cleanup_list, true);
1875       }
1876     } else {
1877       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1878     }
1879 
1880     double region_time = (os::elapsedTime() - start);
1881     _claimed_region_time += region_time;
1882     if (region_time > _max_region_time) {
1883       _max_region_time = region_time;
1884     }
1885     return false;
1886   }
1887 
1888   size_t max_live_bytes() { return _max_live_bytes; }
1889   uint regions_claimed() { return _regions_claimed; }
1890   double claimed_region_time_sec() { return _claimed_region_time; }
1891   double max_region_time_sec() { return _max_region_time; }
1892 };
1893 
1894 class G1ParNoteEndTask: public AbstractGangTask {
1895   friend class G1NoteEndOfConcMarkClosure;
1896 
1897 protected:
1898   G1CollectedHeap* _g1h;
1899   size_t _max_live_bytes;
1900   size_t _freed_bytes;
1901   FreeRegionList* _cleanup_list;
1902 
1903 public:
1904   G1ParNoteEndTask(G1CollectedHeap* g1h,
1905                    FreeRegionList* cleanup_list) :
1906     AbstractGangTask("G1 note end"), _g1h(g1h),
1907     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1908 
1909   void work(uint worker_id) {
1910     double start = os::elapsedTime();
1911     FreeRegionList local_cleanup_list("Local Cleanup List");
1912     HRRSCleanupTask hrrs_cleanup_task;
1913     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1914                                            &hrrs_cleanup_task);
1915     if (G1CollectedHeap::use_parallel_gc_threads()) {
1916       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1917                                             _g1h->workers()->active_workers(),
1918                                             HeapRegion::NoteEndClaimValue);
1919     } else {
1920       _g1h->heap_region_iterate(&g1_note_end);
1921     }
1922     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1923 
1924     // Now update the lists
1925     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1926     {
1927       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1928       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1929       _max_live_bytes += g1_note_end.max_live_bytes();
1930       _freed_bytes += g1_note_end.freed_bytes();
1931 
1932       // If we iterate over the global cleanup list at the end of
1933       // cleanup to do this printing we will not guarantee to only
1934       // generate output for the newly-reclaimed regions (the list
1935       // might not be empty at the beginning of cleanup; we might
1936       // still be working on its previous contents). So we do the
1937       // printing here, before we append the new regions to the global
1938       // cleanup list.
1939 
1940       G1HRPrinter* hr_printer = _g1h->hr_printer();
1941       if (hr_printer->is_active()) {
1942         FreeRegionListIterator iter(&local_cleanup_list);
1943         while (iter.more_available()) {
1944           HeapRegion* hr = iter.get_next();
1945           hr_printer->cleanup(hr);
1946         }
1947       }
1948 
1949       _cleanup_list->add_ordered(&local_cleanup_list);
1950       assert(local_cleanup_list.is_empty(), "post-condition");
1951 
1952       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1953     }
1954   }
1955   size_t max_live_bytes() { return _max_live_bytes; }
1956   size_t freed_bytes() { return _freed_bytes; }
1957 };
1958 
1959 class G1ParScrubRemSetTask: public AbstractGangTask {
1960 protected:
1961   G1RemSet* _g1rs;
1962   BitMap* _region_bm;
1963   BitMap* _card_bm;
1964 public:
1965   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1966                        BitMap* region_bm, BitMap* card_bm) :
1967     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1968     _region_bm(region_bm), _card_bm(card_bm) { }
1969 
1970   void work(uint worker_id) {
1971     if (G1CollectedHeap::use_parallel_gc_threads()) {
1972       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1973                        HeapRegion::ScrubRemSetClaimValue);
1974     } else {
1975       _g1rs->scrub(_region_bm, _card_bm);
1976     }
1977   }
1978 
1979 };
1980 
1981 void ConcurrentMark::cleanup() {
1982   // world is stopped at this checkpoint
1983   assert(SafepointSynchronize::is_at_safepoint(),
1984          "world should be stopped");
1985   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1986 
1987   // If a full collection has happened, we shouldn't do this.
1988   if (has_aborted()) {
1989     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1990     return;
1991   }
1992 
1993   g1h->verify_region_sets_optional();
1994 
1995   if (VerifyDuringGC) {
1996     HandleMark hm;  // handle scope
1997     Universe::heap()->prepare_for_verify();
1998     Universe::verify(VerifyOption_G1UsePrevMarking,
1999                      " VerifyDuringGC:(before)");
2000   }
2001 
2002   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2003   g1p->record_concurrent_mark_cleanup_start();
2004 
2005   double start = os::elapsedTime();
2006 
2007   HeapRegionRemSet::reset_for_cleanup_tasks();
2008 
2009   uint n_workers;
2010 
2011   // Do counting once more with the world stopped for good measure.
2012   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2013 
2014   if (G1CollectedHeap::use_parallel_gc_threads()) {
2015    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2016            "sanity check");
2017 
2018     g1h->set_par_threads();
2019     n_workers = g1h->n_par_threads();
2020     assert(g1h->n_par_threads() == n_workers,
2021            "Should not have been reset");
2022     g1h->workers()->run_task(&g1_par_count_task);
2023     // Done with the parallel phase so reset to 0.
2024     g1h->set_par_threads(0);
2025 
2026     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2027            "sanity check");
2028   } else {
2029     n_workers = 1;
2030     g1_par_count_task.work(0);
2031   }
2032 
2033   if (VerifyDuringGC) {
2034     // Verify that the counting data accumulated during marking matches
2035     // that calculated by walking the marking bitmap.
2036 
2037     // Bitmaps to hold expected values
2038     BitMap expected_region_bm(_region_bm.size(), false);
2039     BitMap expected_card_bm(_card_bm.size(), false);
2040 
2041     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2042                                                  &_region_bm,
2043                                                  &_card_bm,
2044                                                  &expected_region_bm,
2045                                                  &expected_card_bm);
2046 
2047     if (G1CollectedHeap::use_parallel_gc_threads()) {
2048       g1h->set_par_threads((int)n_workers);
2049       g1h->workers()->run_task(&g1_par_verify_task);
2050       // Done with the parallel phase so reset to 0.
2051       g1h->set_par_threads(0);
2052 
2053       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2054              "sanity check");
2055     } else {
2056       g1_par_verify_task.work(0);
2057     }
2058 
2059     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2060   }
2061 
2062   size_t start_used_bytes = g1h->used();
2063   g1h->set_marking_complete();
2064 
2065   double count_end = os::elapsedTime();
2066   double this_final_counting_time = (count_end - start);
2067   _total_counting_time += this_final_counting_time;
2068 
2069   if (G1PrintRegionLivenessInfo) {
2070     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2071     _g1h->heap_region_iterate(&cl);
2072   }
2073 
2074   // Install newly created mark bitMap as "prev".
2075   swapMarkBitMaps();
2076 
2077   g1h->reset_gc_time_stamp();
2078 
2079   // Note end of marking in all heap regions.
2080   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2081   if (G1CollectedHeap::use_parallel_gc_threads()) {
2082     g1h->set_par_threads((int)n_workers);
2083     g1h->workers()->run_task(&g1_par_note_end_task);
2084     g1h->set_par_threads(0);
2085 
2086     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2087            "sanity check");
2088   } else {
2089     g1_par_note_end_task.work(0);
2090   }
2091   g1h->check_gc_time_stamps();
2092 
2093   if (!cleanup_list_is_empty()) {
2094     // The cleanup list is not empty, so we'll have to process it
2095     // concurrently. Notify anyone else that might be wanting free
2096     // regions that there will be more free regions coming soon.
2097     g1h->set_free_regions_coming();
2098   }
2099 
2100   // call below, since it affects the metric by which we sort the heap
2101   // regions.
2102   if (G1ScrubRemSets) {
2103     double rs_scrub_start = os::elapsedTime();
2104     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2105     if (G1CollectedHeap::use_parallel_gc_threads()) {
2106       g1h->set_par_threads((int)n_workers);
2107       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2108       g1h->set_par_threads(0);
2109 
2110       assert(g1h->check_heap_region_claim_values(
2111                                             HeapRegion::ScrubRemSetClaimValue),
2112              "sanity check");
2113     } else {
2114       g1_par_scrub_rs_task.work(0);
2115     }
2116 
2117     double rs_scrub_end = os::elapsedTime();
2118     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2119     _total_rs_scrub_time += this_rs_scrub_time;
2120   }
2121 
2122   // this will also free any regions totally full of garbage objects,
2123   // and sort the regions.
2124   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2125 
2126   // Statistics.
2127   double end = os::elapsedTime();
2128   _cleanup_times.add((end - start) * 1000.0);
2129 
2130   if (G1Log::fine()) {
2131     g1h->print_size_transition(gclog_or_tty,
2132                                start_used_bytes,
2133                                g1h->used(),
2134                                g1h->capacity());
2135   }
2136 
2137   // Clean up will have freed any regions completely full of garbage.
2138   // Update the soft reference policy with the new heap occupancy.
2139   Universe::update_heap_info_at_gc();
2140 
2141   // We need to make this be a "collection" so any collection pause that
2142   // races with it goes around and waits for completeCleanup to finish.
2143   g1h->increment_total_collections();
2144 
2145   // We reclaimed old regions so we should calculate the sizes to make
2146   // sure we update the old gen/space data.
2147   g1h->g1mm()->update_sizes();
2148 
2149   if (VerifyDuringGC) {
2150     HandleMark hm;  // handle scope
2151     Universe::heap()->prepare_for_verify();
2152     Universe::verify(VerifyOption_G1UsePrevMarking,
2153                      " VerifyDuringGC:(after)");
2154   }
2155 
2156   g1h->verify_region_sets_optional();
2157   g1h->trace_heap_after_concurrent_cycle();
2158 }
2159 
2160 void ConcurrentMark::completeCleanup() {
2161   if (has_aborted()) return;
2162 
2163   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2164 
2165   _cleanup_list.verify_optional();
2166   FreeRegionList tmp_free_list("Tmp Free List");
2167 
2168   if (G1ConcRegionFreeingVerbose) {
2169     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2170                            "cleanup list has %u entries",
2171                            _cleanup_list.length());
2172   }
2173 
2174   // Noone else should be accessing the _cleanup_list at this point,
2175   // so it's not necessary to take any locks
2176   while (!_cleanup_list.is_empty()) {
2177     HeapRegion* hr = _cleanup_list.remove_head();
2178     assert(hr != NULL, "Got NULL from a non-empty list");
2179     hr->par_clear();
2180     tmp_free_list.add_ordered(hr);
2181 
2182     // Instead of adding one region at a time to the secondary_free_list,
2183     // we accumulate them in the local list and move them a few at a
2184     // time. This also cuts down on the number of notify_all() calls
2185     // we do during this process. We'll also append the local list when
2186     // _cleanup_list is empty (which means we just removed the last
2187     // region from the _cleanup_list).
2188     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2189         _cleanup_list.is_empty()) {
2190       if (G1ConcRegionFreeingVerbose) {
2191         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2192                                "appending %u entries to the secondary_free_list, "
2193                                "cleanup list still has %u entries",
2194                                tmp_free_list.length(),
2195                                _cleanup_list.length());
2196       }
2197 
2198       {
2199         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2200         g1h->secondary_free_list_add(&tmp_free_list);
2201         SecondaryFreeList_lock->notify_all();
2202       }
2203 
2204       if (G1StressConcRegionFreeing) {
2205         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2206           os::sleep(Thread::current(), (jlong) 1, false);
2207         }
2208       }
2209     }
2210   }
2211   assert(tmp_free_list.is_empty(), "post-condition");
2212 }
2213 
2214 // Supporting Object and Oop closures for reference discovery
2215 // and processing in during marking
2216 
2217 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2218   HeapWord* addr = (HeapWord*)obj;
2219   return addr != NULL &&
2220          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2221 }
2222 
2223 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2224 // Uses the CMTask associated with a worker thread (for serial reference
2225 // processing the CMTask for worker 0 is used) to preserve (mark) and
2226 // trace referent objects.
2227 //
2228 // Using the CMTask and embedded local queues avoids having the worker
2229 // threads operating on the global mark stack. This reduces the risk
2230 // of overflowing the stack - which we would rather avoid at this late
2231 // state. Also using the tasks' local queues removes the potential
2232 // of the workers interfering with each other that could occur if
2233 // operating on the global stack.
2234 
2235 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2236   ConcurrentMark* _cm;
2237   CMTask*         _task;
2238   int             _ref_counter_limit;
2239   int             _ref_counter;
2240   bool            _is_serial;
2241  public:
2242   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2243     _cm(cm), _task(task), _is_serial(is_serial),
2244     _ref_counter_limit(G1RefProcDrainInterval) {
2245     assert(_ref_counter_limit > 0, "sanity");
2246     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2247     _ref_counter = _ref_counter_limit;
2248   }
2249 
2250   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2251   virtual void do_oop(      oop* p) { do_oop_work(p); }
2252 
2253   template <class T> void do_oop_work(T* p) {
2254     if (!_cm->has_overflown()) {
2255       oop obj = oopDesc::load_decode_heap_oop(p);
2256       if (_cm->verbose_high()) {
2257         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2258                                "*"PTR_FORMAT" = "PTR_FORMAT,
2259                                _task->worker_id(), p2i(p), p2i((void*) obj));
2260       }
2261 
2262       _task->deal_with_reference(obj);
2263       _ref_counter--;
2264 
2265       if (_ref_counter == 0) {
2266         // We have dealt with _ref_counter_limit references, pushing them
2267         // and objects reachable from them on to the local stack (and
2268         // possibly the global stack). Call CMTask::do_marking_step() to
2269         // process these entries.
2270         //
2271         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2272         // there's nothing more to do (i.e. we're done with the entries that
2273         // were pushed as a result of the CMTask::deal_with_reference() calls
2274         // above) or we overflow.
2275         //
2276         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2277         // flag while there may still be some work to do. (See the comment at
2278         // the beginning of CMTask::do_marking_step() for those conditions -
2279         // one of which is reaching the specified time target.) It is only
2280         // when CMTask::do_marking_step() returns without setting the
2281         // has_aborted() flag that the marking step has completed.
2282         do {
2283           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2284           _task->do_marking_step(mark_step_duration_ms,
2285                                  false      /* do_termination */,
2286                                  _is_serial);
2287         } while (_task->has_aborted() && !_cm->has_overflown());
2288         _ref_counter = _ref_counter_limit;
2289       }
2290     } else {
2291       if (_cm->verbose_high()) {
2292          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2293       }
2294     }
2295   }
2296 };
2297 
2298 // 'Drain' oop closure used by both serial and parallel reference processing.
2299 // Uses the CMTask associated with a given worker thread (for serial
2300 // reference processing the CMtask for worker 0 is used). Calls the
2301 // do_marking_step routine, with an unbelievably large timeout value,
2302 // to drain the marking data structures of the remaining entries
2303 // added by the 'keep alive' oop closure above.
2304 
2305 class G1CMDrainMarkingStackClosure: public VoidClosure {
2306   ConcurrentMark* _cm;
2307   CMTask*         _task;
2308   bool            _is_serial;
2309  public:
2310   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2311     _cm(cm), _task(task), _is_serial(is_serial) {
2312     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2313   }
2314 
2315   void do_void() {
2316     do {
2317       if (_cm->verbose_high()) {
2318         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2319                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2320       }
2321 
2322       // We call CMTask::do_marking_step() to completely drain the local
2323       // and global marking stacks of entries pushed by the 'keep alive'
2324       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2325       //
2326       // CMTask::do_marking_step() is called in a loop, which we'll exit
2327       // if there's nothing more to do (i.e. we'completely drained the
2328       // entries that were pushed as a a result of applying the 'keep alive'
2329       // closure to the entries on the discovered ref lists) or we overflow
2330       // the global marking stack.
2331       //
2332       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2333       // flag while there may still be some work to do. (See the comment at
2334       // the beginning of CMTask::do_marking_step() for those conditions -
2335       // one of which is reaching the specified time target.) It is only
2336       // when CMTask::do_marking_step() returns without setting the
2337       // has_aborted() flag that the marking step has completed.
2338 
2339       _task->do_marking_step(1000000000.0 /* something very large */,
2340                              true         /* do_termination */,
2341                              _is_serial);
2342     } while (_task->has_aborted() && !_cm->has_overflown());
2343   }
2344 };
2345 
2346 // Implementation of AbstractRefProcTaskExecutor for parallel
2347 // reference processing at the end of G1 concurrent marking
2348 
2349 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2350 private:
2351   G1CollectedHeap* _g1h;
2352   ConcurrentMark*  _cm;
2353   WorkGang*        _workers;
2354   int              _active_workers;
2355 
2356 public:
2357   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2358                         ConcurrentMark* cm,
2359                         WorkGang* workers,
2360                         int n_workers) :
2361     _g1h(g1h), _cm(cm),
2362     _workers(workers), _active_workers(n_workers) { }
2363 
2364   // Executes the given task using concurrent marking worker threads.
2365   virtual void execute(ProcessTask& task);
2366   virtual void execute(EnqueueTask& task);
2367 };
2368 
2369 class G1CMRefProcTaskProxy: public AbstractGangTask {
2370   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2371   ProcessTask&     _proc_task;
2372   G1CollectedHeap* _g1h;
2373   ConcurrentMark*  _cm;
2374 
2375 public:
2376   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2377                      G1CollectedHeap* g1h,
2378                      ConcurrentMark* cm) :
2379     AbstractGangTask("Process reference objects in parallel"),
2380     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2381     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2382     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2383   }
2384 
2385   virtual void work(uint worker_id) {
2386     CMTask* task = _cm->task(worker_id);
2387     G1CMIsAliveClosure g1_is_alive(_g1h);
2388     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2389     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2390 
2391     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2392   }
2393 };
2394 
2395 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2396   assert(_workers != NULL, "Need parallel worker threads.");
2397   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2398 
2399   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2400 
2401   // We need to reset the concurrency level before each
2402   // proxy task execution, so that the termination protocol
2403   // and overflow handling in CMTask::do_marking_step() knows
2404   // how many workers to wait for.
2405   _cm->set_concurrency(_active_workers);
2406   _g1h->set_par_threads(_active_workers);
2407   _workers->run_task(&proc_task_proxy);
2408   _g1h->set_par_threads(0);
2409 }
2410 
2411 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2412   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2413   EnqueueTask& _enq_task;
2414 
2415 public:
2416   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2417     AbstractGangTask("Enqueue reference objects in parallel"),
2418     _enq_task(enq_task) { }
2419 
2420   virtual void work(uint worker_id) {
2421     _enq_task.work(worker_id);
2422   }
2423 };
2424 
2425 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2426   assert(_workers != NULL, "Need parallel worker threads.");
2427   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2428 
2429   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2430 
2431   // Not strictly necessary but...
2432   //
2433   // We need to reset the concurrency level before each
2434   // proxy task execution, so that the termination protocol
2435   // and overflow handling in CMTask::do_marking_step() knows
2436   // how many workers to wait for.
2437   _cm->set_concurrency(_active_workers);
2438   _g1h->set_par_threads(_active_workers);
2439   _workers->run_task(&enq_task_proxy);
2440   _g1h->set_par_threads(0);
2441 }
2442 
2443 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2444   if (has_overflown()) {
2445     // Skip processing the discovered references if we have
2446     // overflown the global marking stack. Reference objects
2447     // only get discovered once so it is OK to not
2448     // de-populate the discovered reference lists. We could have,
2449     // but the only benefit would be that, when marking restarts,
2450     // less reference objects are discovered.
2451     return;
2452   }
2453 
2454   ResourceMark rm;
2455   HandleMark   hm;
2456 
2457   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2458 
2459   // Is alive closure.
2460   G1CMIsAliveClosure g1_is_alive(g1h);
2461 
2462   // Inner scope to exclude the cleaning of the string and symbol
2463   // tables from the displayed time.
2464   {
2465     if (G1Log::finer()) {
2466       gclog_or_tty->put(' ');
2467     }
2468     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2469 
2470     ReferenceProcessor* rp = g1h->ref_processor_cm();
2471 
2472     // See the comment in G1CollectedHeap::ref_processing_init()
2473     // about how reference processing currently works in G1.
2474 
2475     // Set the soft reference policy
2476     rp->setup_policy(clear_all_soft_refs);
2477     assert(_markStack.isEmpty(), "mark stack should be empty");
2478 
2479     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2480     // in serial reference processing. Note these closures are also
2481     // used for serially processing (by the the current thread) the
2482     // JNI references during parallel reference processing.
2483     //
2484     // These closures do not need to synchronize with the worker
2485     // threads involved in parallel reference processing as these
2486     // instances are executed serially by the current thread (e.g.
2487     // reference processing is not multi-threaded and is thus
2488     // performed by the current thread instead of a gang worker).
2489     //
2490     // The gang tasks involved in parallel reference procssing create
2491     // their own instances of these closures, which do their own
2492     // synchronization among themselves.
2493     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2494     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2495 
2496     // We need at least one active thread. If reference processing
2497     // is not multi-threaded we use the current (VMThread) thread,
2498     // otherwise we use the work gang from the G1CollectedHeap and
2499     // we utilize all the worker threads we can.
2500     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2501     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2502     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2503 
2504     // Parallel processing task executor.
2505     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2506                                               g1h->workers(), active_workers);
2507     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2508 
2509     // Set the concurrency level. The phase was already set prior to
2510     // executing the remark task.
2511     set_concurrency(active_workers);
2512 
2513     // Set the degree of MT processing here.  If the discovery was done MT,
2514     // the number of threads involved during discovery could differ from
2515     // the number of active workers.  This is OK as long as the discovered
2516     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2517     rp->set_active_mt_degree(active_workers);
2518 
2519     // Process the weak references.
2520     const ReferenceProcessorStats& stats =
2521         rp->process_discovered_references(&g1_is_alive,
2522                                           &g1_keep_alive,
2523                                           &g1_drain_mark_stack,
2524                                           executor,
2525                                           g1h->gc_timer_cm(),
2526                                           concurrent_gc_id());
2527     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2528 
2529     // The do_oop work routines of the keep_alive and drain_marking_stack
2530     // oop closures will set the has_overflown flag if we overflow the
2531     // global marking stack.
2532 
2533     assert(_markStack.overflow() || _markStack.isEmpty(),
2534             "mark stack should be empty (unless it overflowed)");
2535 
2536     if (_markStack.overflow()) {
2537       // This should have been done already when we tried to push an
2538       // entry on to the global mark stack. But let's do it again.
2539       set_has_overflown();
2540     }
2541 
2542     assert(rp->num_q() == active_workers, "why not");
2543 
2544     rp->enqueue_discovered_references(executor);
2545 
2546     rp->verify_no_references_recorded();
2547     assert(!rp->discovery_enabled(), "Post condition");
2548   }
2549 
2550   if (has_overflown()) {
2551     // We can not trust g1_is_alive if the marking stack overflowed
2552     return;
2553   }
2554 
2555   g1h->unlink_string_and_symbol_table(&g1_is_alive,
2556                                       /* process_strings */ false, // currently strings are always roots
2557                                       /* process_symbols */ true);
2558 }
2559 
2560 void ConcurrentMark::swapMarkBitMaps() {
2561   CMBitMapRO* temp = _prevMarkBitMap;
2562   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2563   _nextMarkBitMap  = (CMBitMap*)  temp;
2564 }
2565 
2566 class CMRemarkTask: public AbstractGangTask {
2567 private:
2568   ConcurrentMark* _cm;
2569   bool            _is_serial;
2570 public:
2571   void work(uint worker_id) {
2572     // Since all available tasks are actually started, we should
2573     // only proceed if we're supposed to be actived.
2574     if (worker_id < _cm->active_tasks()) {
2575       CMTask* task = _cm->task(worker_id);
2576       task->record_start_time();
2577       do {
2578         task->do_marking_step(1000000000.0 /* something very large */,
2579                               true         /* do_termination       */,
2580                               _is_serial);
2581       } while (task->has_aborted() && !_cm->has_overflown());
2582       // If we overflow, then we do not want to restart. We instead
2583       // want to abort remark and do concurrent marking again.
2584       task->record_end_time();
2585     }
2586   }
2587 
2588   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2589     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2590     _cm->terminator()->reset_for_reuse(active_workers);
2591   }
2592 };
2593 
2594 void ConcurrentMark::checkpointRootsFinalWork() {
2595   ResourceMark rm;
2596   HandleMark   hm;
2597   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2598 
2599   g1h->ensure_parsability(false);
2600 
2601   if (G1CollectedHeap::use_parallel_gc_threads()) {
2602     G1CollectedHeap::StrongRootsScope srs(g1h);
2603     // this is remark, so we'll use up all active threads
2604     uint active_workers = g1h->workers()->active_workers();
2605     if (active_workers == 0) {
2606       assert(active_workers > 0, "Should have been set earlier");
2607       active_workers = (uint) ParallelGCThreads;
2608       g1h->workers()->set_active_workers(active_workers);
2609     }
2610     set_concurrency_and_phase(active_workers, false /* concurrent */);
2611     // Leave _parallel_marking_threads at it's
2612     // value originally calculated in the ConcurrentMark
2613     // constructor and pass values of the active workers
2614     // through the gang in the task.
2615 
2616     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2617     // We will start all available threads, even if we decide that the
2618     // active_workers will be fewer. The extra ones will just bail out
2619     // immediately.
2620     g1h->set_par_threads(active_workers);
2621     g1h->workers()->run_task(&remarkTask);
2622     g1h->set_par_threads(0);
2623   } else {
2624     G1CollectedHeap::StrongRootsScope srs(g1h);
2625     uint active_workers = 1;
2626     set_concurrency_and_phase(active_workers, false /* concurrent */);
2627 
2628     // Note - if there's no work gang then the VMThread will be
2629     // the thread to execute the remark - serially. We have
2630     // to pass true for the is_serial parameter so that
2631     // CMTask::do_marking_step() doesn't enter the sync
2632     // barriers in the event of an overflow. Doing so will
2633     // cause an assert that the current thread is not a
2634     // concurrent GC thread.
2635     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2636     remarkTask.work(0);
2637   }
2638   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2639   guarantee(has_overflown() ||
2640             satb_mq_set.completed_buffers_num() == 0,
2641             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2642                     BOOL_TO_STR(has_overflown()),
2643                     satb_mq_set.completed_buffers_num()));
2644 
2645   print_stats();
2646 }
2647 
2648 #ifndef PRODUCT
2649 
2650 class PrintReachableOopClosure: public OopClosure {
2651 private:
2652   G1CollectedHeap* _g1h;
2653   outputStream*    _out;
2654   VerifyOption     _vo;
2655   bool             _all;
2656 
2657 public:
2658   PrintReachableOopClosure(outputStream* out,
2659                            VerifyOption  vo,
2660                            bool          all) :
2661     _g1h(G1CollectedHeap::heap()),
2662     _out(out), _vo(vo), _all(all) { }
2663 
2664   void do_oop(narrowOop* p) { do_oop_work(p); }
2665   void do_oop(      oop* p) { do_oop_work(p); }
2666 
2667   template <class T> void do_oop_work(T* p) {
2668     oop         obj = oopDesc::load_decode_heap_oop(p);
2669     const char* str = NULL;
2670     const char* str2 = "";
2671 
2672     if (obj == NULL) {
2673       str = "";
2674     } else if (!_g1h->is_in_g1_reserved(obj)) {
2675       str = " O";
2676     } else {
2677       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2678       guarantee(hr != NULL, "invariant");
2679       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2680       bool marked = _g1h->is_marked(obj, _vo);
2681 
2682       if (over_tams) {
2683         str = " >";
2684         if (marked) {
2685           str2 = " AND MARKED";
2686         }
2687       } else if (marked) {
2688         str = " M";
2689       } else {
2690         str = " NOT";
2691       }
2692     }
2693 
2694     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2695                    p2i(p), p2i((void*) obj), str, str2);
2696   }
2697 };
2698 
2699 class PrintReachableObjectClosure : public ObjectClosure {
2700 private:
2701   G1CollectedHeap* _g1h;
2702   outputStream*    _out;
2703   VerifyOption     _vo;
2704   bool             _all;
2705   HeapRegion*      _hr;
2706 
2707 public:
2708   PrintReachableObjectClosure(outputStream* out,
2709                               VerifyOption  vo,
2710                               bool          all,
2711                               HeapRegion*   hr) :
2712     _g1h(G1CollectedHeap::heap()),
2713     _out(out), _vo(vo), _all(all), _hr(hr) { }
2714 
2715   void do_object(oop o) {
2716     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2717     bool marked = _g1h->is_marked(o, _vo);
2718     bool print_it = _all || over_tams || marked;
2719 
2720     if (print_it) {
2721       _out->print_cr(" "PTR_FORMAT"%s",
2722                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2723       PrintReachableOopClosure oopCl(_out, _vo, _all);
2724       o->oop_iterate_no_header(&oopCl);
2725     }
2726   }
2727 };
2728 
2729 class PrintReachableRegionClosure : public HeapRegionClosure {
2730 private:
2731   G1CollectedHeap* _g1h;
2732   outputStream*    _out;
2733   VerifyOption     _vo;
2734   bool             _all;
2735 
2736 public:
2737   bool doHeapRegion(HeapRegion* hr) {
2738     HeapWord* b = hr->bottom();
2739     HeapWord* e = hr->end();
2740     HeapWord* t = hr->top();
2741     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2742     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2743                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2744     _out->cr();
2745 
2746     HeapWord* from = b;
2747     HeapWord* to   = t;
2748 
2749     if (to > from) {
2750       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2751       _out->cr();
2752       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2753       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2754       _out->cr();
2755     }
2756 
2757     return false;
2758   }
2759 
2760   PrintReachableRegionClosure(outputStream* out,
2761                               VerifyOption  vo,
2762                               bool          all) :
2763     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2764 };
2765 
2766 void ConcurrentMark::print_reachable(const char* str,
2767                                      VerifyOption vo,
2768                                      bool all) {
2769   gclog_or_tty->cr();
2770   gclog_or_tty->print_cr("== Doing heap dump... ");
2771 
2772   if (G1PrintReachableBaseFile == NULL) {
2773     gclog_or_tty->print_cr("  #### error: no base file defined");
2774     return;
2775   }
2776 
2777   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2778       (JVM_MAXPATHLEN - 1)) {
2779     gclog_or_tty->print_cr("  #### error: file name too long");
2780     return;
2781   }
2782 
2783   char file_name[JVM_MAXPATHLEN];
2784   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2785   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2786 
2787   fileStream fout(file_name);
2788   if (!fout.is_open()) {
2789     gclog_or_tty->print_cr("  #### error: could not open file");
2790     return;
2791   }
2792 
2793   outputStream* out = &fout;
2794   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2795   out->cr();
2796 
2797   out->print_cr("--- ITERATING OVER REGIONS");
2798   out->cr();
2799   PrintReachableRegionClosure rcl(out, vo, all);
2800   _g1h->heap_region_iterate(&rcl);
2801   out->cr();
2802 
2803   gclog_or_tty->print_cr("  done");
2804   gclog_or_tty->flush();
2805 }
2806 
2807 #endif // PRODUCT
2808 
2809 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2810   // Note we are overriding the read-only view of the prev map here, via
2811   // the cast.
2812   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2813 }
2814 
2815 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2816   _nextMarkBitMap->clearRange(mr);
2817 }
2818 
2819 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2820   clearRangePrevBitmap(mr);
2821   clearRangeNextBitmap(mr);
2822 }
2823 
2824 HeapRegion*
2825 ConcurrentMark::claim_region(uint worker_id) {
2826   // "checkpoint" the finger
2827   HeapWord* finger = _finger;
2828 
2829   // _heap_end will not change underneath our feet; it only changes at
2830   // yield points.
2831   while (finger < _heap_end) {
2832     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2833 
2834     // Note on how this code handles humongous regions. In the
2835     // normal case the finger will reach the start of a "starts
2836     // humongous" (SH) region. Its end will either be the end of the
2837     // last "continues humongous" (CH) region in the sequence, or the
2838     // standard end of the SH region (if the SH is the only region in
2839     // the sequence). That way claim_region() will skip over the CH
2840     // regions. However, there is a subtle race between a CM thread
2841     // executing this method and a mutator thread doing a humongous
2842     // object allocation. The two are not mutually exclusive as the CM
2843     // thread does not need to hold the Heap_lock when it gets
2844     // here. So there is a chance that claim_region() will come across
2845     // a free region that's in the progress of becoming a SH or a CH
2846     // region. In the former case, it will either
2847     //   a) Miss the update to the region's end, in which case it will
2848     //      visit every subsequent CH region, will find their bitmaps
2849     //      empty, and do nothing, or
2850     //   b) Will observe the update of the region's end (in which case
2851     //      it will skip the subsequent CH regions).
2852     // If it comes across a region that suddenly becomes CH, the
2853     // scenario will be similar to b). So, the race between
2854     // claim_region() and a humongous object allocation might force us
2855     // to do a bit of unnecessary work (due to some unnecessary bitmap
2856     // iterations) but it should not introduce and correctness issues.
2857     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2858     HeapWord*   bottom        = curr_region->bottom();
2859     HeapWord*   end           = curr_region->end();
2860     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2861 
2862     if (verbose_low()) {
2863       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2864                              "["PTR_FORMAT", "PTR_FORMAT"), "
2865                              "limit = "PTR_FORMAT,
2866                              worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2867     }
2868 
2869     // Is the gap between reading the finger and doing the CAS too long?
2870     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2871     if (res == finger) {
2872       // we succeeded
2873 
2874       // notice that _finger == end cannot be guaranteed here since,
2875       // someone else might have moved the finger even further
2876       assert(_finger >= end, "the finger should have moved forward");
2877 
2878       if (verbose_low()) {
2879         gclog_or_tty->print_cr("[%u] we were successful with region = "
2880                                PTR_FORMAT, worker_id, p2i(curr_region));
2881       }
2882 
2883       if (limit > bottom) {
2884         if (verbose_low()) {
2885           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2886                                  "returning it ", worker_id, p2i(curr_region));
2887         }
2888         return curr_region;
2889       } else {
2890         assert(limit == bottom,
2891                "the region limit should be at bottom");
2892         if (verbose_low()) {
2893           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2894                                  "returning NULL", worker_id, p2i(curr_region));
2895         }
2896         // we return NULL and the caller should try calling
2897         // claim_region() again.
2898         return NULL;
2899       }
2900     } else {
2901       assert(_finger > finger, "the finger should have moved forward");
2902       if (verbose_low()) {
2903         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2904                                "global finger = "PTR_FORMAT", "
2905                                "our finger = "PTR_FORMAT,
2906                                worker_id, p2i(_finger), p2i(finger));
2907       }
2908 
2909       // read it again
2910       finger = _finger;
2911     }
2912   }
2913 
2914   return NULL;
2915 }
2916 
2917 #ifndef PRODUCT
2918 enum VerifyNoCSetOopsPhase {
2919   VerifyNoCSetOopsStack,
2920   VerifyNoCSetOopsQueues,
2921   VerifyNoCSetOopsSATBCompleted,
2922   VerifyNoCSetOopsSATBThread
2923 };
2924 
2925 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2926 private:
2927   G1CollectedHeap* _g1h;
2928   VerifyNoCSetOopsPhase _phase;
2929   int _info;
2930 
2931   const char* phase_str() {
2932     switch (_phase) {
2933     case VerifyNoCSetOopsStack:         return "Stack";
2934     case VerifyNoCSetOopsQueues:        return "Queue";
2935     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2936     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2937     default:                            ShouldNotReachHere();
2938     }
2939     return NULL;
2940   }
2941 
2942   void do_object_work(oop obj) {
2943     guarantee(!_g1h->obj_in_cs(obj),
2944               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2945                       p2i((void*) obj), phase_str(), _info));
2946   }
2947 
2948 public:
2949   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2950 
2951   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2952     _phase = phase;
2953     _info = info;
2954   }
2955 
2956   virtual void do_oop(oop* p) {
2957     oop obj = oopDesc::load_decode_heap_oop(p);
2958     do_object_work(obj);
2959   }
2960 
2961   virtual void do_oop(narrowOop* p) {
2962     // We should not come across narrow oops while scanning marking
2963     // stacks and SATB buffers.
2964     ShouldNotReachHere();
2965   }
2966 
2967   virtual void do_object(oop obj) {
2968     do_object_work(obj);
2969   }
2970 };
2971 
2972 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2973                                          bool verify_enqueued_buffers,
2974                                          bool verify_thread_buffers,
2975                                          bool verify_fingers) {
2976   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2977   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2978     return;
2979   }
2980 
2981   VerifyNoCSetOopsClosure cl;
2982 
2983   if (verify_stacks) {
2984     // Verify entries on the global mark stack
2985     cl.set_phase(VerifyNoCSetOopsStack);
2986     _markStack.oops_do(&cl);
2987 
2988     // Verify entries on the task queues
2989     for (uint i = 0; i < _max_worker_id; i += 1) {
2990       cl.set_phase(VerifyNoCSetOopsQueues, i);
2991       CMTaskQueue* queue = _task_queues->queue(i);
2992       queue->oops_do(&cl);
2993     }
2994   }
2995 
2996   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2997 
2998   // Verify entries on the enqueued SATB buffers
2999   if (verify_enqueued_buffers) {
3000     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3001     satb_qs.iterate_completed_buffers_read_only(&cl);
3002   }
3003 
3004   // Verify entries on the per-thread SATB buffers
3005   if (verify_thread_buffers) {
3006     cl.set_phase(VerifyNoCSetOopsSATBThread);
3007     satb_qs.iterate_thread_buffers_read_only(&cl);
3008   }
3009 
3010   if (verify_fingers) {
3011     // Verify the global finger
3012     HeapWord* global_finger = finger();
3013     if (global_finger != NULL && global_finger < _heap_end) {
3014       // The global finger always points to a heap region boundary. We
3015       // use heap_region_containing_raw() to get the containing region
3016       // given that the global finger could be pointing to a free region
3017       // which subsequently becomes continues humongous. If that
3018       // happens, heap_region_containing() will return the bottom of the
3019       // corresponding starts humongous region and the check below will
3020       // not hold any more.
3021       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3022       guarantee(global_finger == global_hr->bottom(),
3023                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3024                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3025     }
3026 
3027     // Verify the task fingers
3028     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3029     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3030       CMTask* task = _tasks[i];
3031       HeapWord* task_finger = task->finger();
3032       if (task_finger != NULL && task_finger < _heap_end) {
3033         // See above note on the global finger verification.
3034         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3035         guarantee(task_finger == task_hr->bottom() ||
3036                   !task_hr->in_collection_set(),
3037                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3038                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3039       }
3040     }
3041   }
3042 }
3043 #endif // PRODUCT
3044 
3045 // Aggregate the counting data that was constructed concurrently
3046 // with marking.
3047 class AggregateCountDataHRClosure: public HeapRegionClosure {
3048   G1CollectedHeap* _g1h;
3049   ConcurrentMark* _cm;
3050   CardTableModRefBS* _ct_bs;
3051   BitMap* _cm_card_bm;
3052   uint _max_worker_id;
3053 
3054  public:
3055   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3056                               BitMap* cm_card_bm,
3057                               uint max_worker_id) :
3058     _g1h(g1h), _cm(g1h->concurrent_mark()),
3059     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3060     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3061 
3062   bool doHeapRegion(HeapRegion* hr) {
3063     if (hr->continuesHumongous()) {
3064       // We will ignore these here and process them when their
3065       // associated "starts humongous" region is processed.
3066       // Note that we cannot rely on their associated
3067       // "starts humongous" region to have their bit set to 1
3068       // since, due to the region chunking in the parallel region
3069       // iteration, a "continues humongous" region might be visited
3070       // before its associated "starts humongous".
3071       return false;
3072     }
3073 
3074     HeapWord* start = hr->bottom();
3075     HeapWord* limit = hr->next_top_at_mark_start();
3076     HeapWord* end = hr->end();
3077 
3078     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3079            err_msg("Preconditions not met - "
3080                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3081                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3082                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3083 
3084     assert(hr->next_marked_bytes() == 0, "Precondition");
3085 
3086     if (start == limit) {
3087       // NTAMS of this region has not been set so nothing to do.
3088       return false;
3089     }
3090 
3091     // 'start' should be in the heap.
3092     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3093     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3094     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3095 
3096     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3097     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3098     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3099 
3100     // If ntams is not card aligned then we bump card bitmap index
3101     // for limit so that we get the all the cards spanned by
3102     // the object ending at ntams.
3103     // Note: if this is the last region in the heap then ntams
3104     // could be actually just beyond the end of the the heap;
3105     // limit_idx will then  correspond to a (non-existent) card
3106     // that is also outside the heap.
3107     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3108       limit_idx += 1;
3109     }
3110 
3111     assert(limit_idx <= end_idx, "or else use atomics");
3112 
3113     // Aggregate the "stripe" in the count data associated with hr.
3114     uint hrs_index = hr->hrs_index();
3115     size_t marked_bytes = 0;
3116 
3117     for (uint i = 0; i < _max_worker_id; i += 1) {
3118       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3119       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3120 
3121       // Fetch the marked_bytes in this region for task i and
3122       // add it to the running total for this region.
3123       marked_bytes += marked_bytes_array[hrs_index];
3124 
3125       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3126       // into the global card bitmap.
3127       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3128 
3129       while (scan_idx < limit_idx) {
3130         assert(task_card_bm->at(scan_idx) == true, "should be");
3131         _cm_card_bm->set_bit(scan_idx);
3132         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3133 
3134         // BitMap::get_next_one_offset() can handle the case when
3135         // its left_offset parameter is greater than its right_offset
3136         // parameter. It does, however, have an early exit if
3137         // left_offset == right_offset. So let's limit the value
3138         // passed in for left offset here.
3139         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3140         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3141       }
3142     }
3143 
3144     // Update the marked bytes for this region.
3145     hr->add_to_marked_bytes(marked_bytes);
3146 
3147     // Next heap region
3148     return false;
3149   }
3150 };
3151 
3152 class G1AggregateCountDataTask: public AbstractGangTask {
3153 protected:
3154   G1CollectedHeap* _g1h;
3155   ConcurrentMark* _cm;
3156   BitMap* _cm_card_bm;
3157   uint _max_worker_id;
3158   int _active_workers;
3159 
3160 public:
3161   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3162                            ConcurrentMark* cm,
3163                            BitMap* cm_card_bm,
3164                            uint max_worker_id,
3165                            int n_workers) :
3166     AbstractGangTask("Count Aggregation"),
3167     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3168     _max_worker_id(max_worker_id),
3169     _active_workers(n_workers) { }
3170 
3171   void work(uint worker_id) {
3172     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3173 
3174     if (G1CollectedHeap::use_parallel_gc_threads()) {
3175       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3176                                             _active_workers,
3177                                             HeapRegion::AggregateCountClaimValue);
3178     } else {
3179       _g1h->heap_region_iterate(&cl);
3180     }
3181   }
3182 };
3183 
3184 
3185 void ConcurrentMark::aggregate_count_data() {
3186   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3187                         _g1h->workers()->active_workers() :
3188                         1);
3189 
3190   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3191                                            _max_worker_id, n_workers);
3192 
3193   if (G1CollectedHeap::use_parallel_gc_threads()) {
3194     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3195            "sanity check");
3196     _g1h->set_par_threads(n_workers);
3197     _g1h->workers()->run_task(&g1_par_agg_task);
3198     _g1h->set_par_threads(0);
3199 
3200     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3201            "sanity check");
3202     _g1h->reset_heap_region_claim_values();
3203   } else {
3204     g1_par_agg_task.work(0);
3205   }
3206 }
3207 
3208 // Clear the per-worker arrays used to store the per-region counting data
3209 void ConcurrentMark::clear_all_count_data() {
3210   // Clear the global card bitmap - it will be filled during
3211   // liveness count aggregation (during remark) and the
3212   // final counting task.
3213   _card_bm.clear();
3214 
3215   // Clear the global region bitmap - it will be filled as part
3216   // of the final counting task.
3217   _region_bm.clear();
3218 
3219   uint max_regions = _g1h->max_regions();
3220   assert(_max_worker_id > 0, "uninitialized");
3221 
3222   for (uint i = 0; i < _max_worker_id; i += 1) {
3223     BitMap* task_card_bm = count_card_bitmap_for(i);
3224     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3225 
3226     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3227     assert(marked_bytes_array != NULL, "uninitialized");
3228 
3229     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3230     task_card_bm->clear();
3231   }
3232 }
3233 
3234 void ConcurrentMark::print_stats() {
3235   if (verbose_stats()) {
3236     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3237     for (size_t i = 0; i < _active_tasks; ++i) {
3238       _tasks[i]->print_stats();
3239       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3240     }
3241   }
3242 }
3243 
3244 // abandon current marking iteration due to a Full GC
3245 void ConcurrentMark::abort() {
3246   // Clear all marks to force marking thread to do nothing
3247   _nextMarkBitMap->clearAll();
3248   // Clear the liveness counting data
3249   clear_all_count_data();
3250   // Empty mark stack
3251   reset_marking_state();
3252   for (uint i = 0; i < _max_worker_id; ++i) {
3253     _tasks[i]->clear_region_fields();
3254   }
3255   _first_overflow_barrier_sync.abort();
3256   _second_overflow_barrier_sync.abort();
3257   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3258   if (!gc_id.is_undefined()) {
3259     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3260     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3261     _aborted_gc_id = gc_id;
3262    }
3263   _has_aborted = true;
3264 
3265   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3266   satb_mq_set.abandon_partial_marking();
3267   // This can be called either during or outside marking, we'll read
3268   // the expected_active value from the SATB queue set.
3269   satb_mq_set.set_active_all_threads(
3270                                  false, /* new active value */
3271                                  satb_mq_set.is_active() /* expected_active */);
3272 
3273   _g1h->trace_heap_after_concurrent_cycle();
3274   _g1h->register_concurrent_cycle_end();
3275 }
3276 
3277 const GCId& ConcurrentMark::concurrent_gc_id() {
3278   if (has_aborted()) {
3279     return _aborted_gc_id;
3280   }
3281   return _g1h->gc_tracer_cm()->gc_id();
3282 }
3283 
3284 static void print_ms_time_info(const char* prefix, const char* name,
3285                                NumberSeq& ns) {
3286   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3287                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3288   if (ns.num() > 0) {
3289     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3290                            prefix, ns.sd(), ns.maximum());
3291   }
3292 }
3293 
3294 void ConcurrentMark::print_summary_info() {
3295   gclog_or_tty->print_cr(" Concurrent marking:");
3296   print_ms_time_info("  ", "init marks", _init_times);
3297   print_ms_time_info("  ", "remarks", _remark_times);
3298   {
3299     print_ms_time_info("     ", "final marks", _remark_mark_times);
3300     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3301 
3302   }
3303   print_ms_time_info("  ", "cleanups", _cleanup_times);
3304   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3305                          _total_counting_time,
3306                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3307                           (double)_cleanup_times.num()
3308                          : 0.0));
3309   if (G1ScrubRemSets) {
3310     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3311                            _total_rs_scrub_time,
3312                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3313                             (double)_cleanup_times.num()
3314                            : 0.0));
3315   }
3316   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3317                          (_init_times.sum() + _remark_times.sum() +
3318                           _cleanup_times.sum())/1000.0);
3319   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3320                 "(%8.2f s marking).",
3321                 cmThread()->vtime_accum(),
3322                 cmThread()->vtime_mark_accum());
3323 }
3324 
3325 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3326   if (use_parallel_marking_threads()) {
3327     _parallel_workers->print_worker_threads_on(st);
3328   }
3329 }
3330 
3331 void ConcurrentMark::print_on_error(outputStream* st) const {
3332   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3333       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3334   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3335   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3336 }
3337 
3338 // We take a break if someone is trying to stop the world.
3339 bool ConcurrentMark::do_yield_check(uint worker_id) {
3340   if (SuspendibleThreadSet::should_yield()) {
3341     if (worker_id == 0) {
3342       _g1h->g1_policy()->record_concurrent_pause();
3343     }
3344     SuspendibleThreadSet::yield();
3345     return true;
3346   } else {
3347     return false;
3348   }
3349 }
3350 
3351 bool ConcurrentMark::containing_card_is_marked(void* p) {
3352   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3353   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3354 }
3355 
3356 bool ConcurrentMark::containing_cards_are_marked(void* start,
3357                                                  void* last) {
3358   return containing_card_is_marked(start) &&
3359          containing_card_is_marked(last);
3360 }
3361 
3362 #ifndef PRODUCT
3363 // for debugging purposes
3364 void ConcurrentMark::print_finger() {
3365   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3366                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3367   for (uint i = 0; i < _max_worker_id; ++i) {
3368     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3369   }
3370   gclog_or_tty->cr();
3371 }
3372 #endif
3373 
3374 void CMTask::scan_object(oop obj) {
3375   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3376 
3377   if (_cm->verbose_high()) {
3378     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3379                            _worker_id, p2i((void*) obj));
3380   }
3381 
3382   size_t obj_size = obj->size();
3383   _words_scanned += obj_size;
3384 
3385   obj->oop_iterate(_cm_oop_closure);
3386   statsOnly( ++_objs_scanned );
3387   check_limits();
3388 }
3389 
3390 // Closure for iteration over bitmaps
3391 class CMBitMapClosure : public BitMapClosure {
3392 private:
3393   // the bitmap that is being iterated over
3394   CMBitMap*                   _nextMarkBitMap;
3395   ConcurrentMark*             _cm;
3396   CMTask*                     _task;
3397 
3398 public:
3399   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3400     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3401 
3402   bool do_bit(size_t offset) {
3403     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3404     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3405     assert( addr < _cm->finger(), "invariant");
3406 
3407     statsOnly( _task->increase_objs_found_on_bitmap() );
3408     assert(addr >= _task->finger(), "invariant");
3409 
3410     // We move that task's local finger along.
3411     _task->move_finger_to(addr);
3412 
3413     _task->scan_object(oop(addr));
3414     // we only partially drain the local queue and global stack
3415     _task->drain_local_queue(true);
3416     _task->drain_global_stack(true);
3417 
3418     // if the has_aborted flag has been raised, we need to bail out of
3419     // the iteration
3420     return !_task->has_aborted();
3421   }
3422 };
3423 
3424 // Closure for iterating over objects, currently only used for
3425 // processing SATB buffers.
3426 class CMObjectClosure : public ObjectClosure {
3427 private:
3428   CMTask* _task;
3429 
3430 public:
3431   void do_object(oop obj) {
3432     _task->deal_with_reference(obj);
3433   }
3434 
3435   CMObjectClosure(CMTask* task) : _task(task) { }
3436 };
3437 
3438 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3439                                ConcurrentMark* cm,
3440                                CMTask* task)
3441   : _g1h(g1h), _cm(cm), _task(task) {
3442   assert(_ref_processor == NULL, "should be initialized to NULL");
3443 
3444   if (G1UseConcMarkReferenceProcessing) {
3445     _ref_processor = g1h->ref_processor_cm();
3446     assert(_ref_processor != NULL, "should not be NULL");
3447   }
3448 }
3449 
3450 void CMTask::setup_for_region(HeapRegion* hr) {
3451   // Separated the asserts so that we know which one fires.
3452   assert(hr != NULL,
3453         "claim_region() should have filtered out continues humongous regions");
3454   assert(!hr->continuesHumongous(),
3455         "claim_region() should have filtered out continues humongous regions");
3456 
3457   if (_cm->verbose_low()) {
3458     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3459                            _worker_id, p2i(hr));
3460   }
3461 
3462   _curr_region  = hr;
3463   _finger       = hr->bottom();
3464   update_region_limit();
3465 }
3466 
3467 void CMTask::update_region_limit() {
3468   HeapRegion* hr            = _curr_region;
3469   HeapWord* bottom          = hr->bottom();
3470   HeapWord* limit           = hr->next_top_at_mark_start();
3471 
3472   if (limit == bottom) {
3473     if (_cm->verbose_low()) {
3474       gclog_or_tty->print_cr("[%u] found an empty region "
3475                              "["PTR_FORMAT", "PTR_FORMAT")",
3476                              _worker_id, p2i(bottom), p2i(limit));
3477     }
3478     // The region was collected underneath our feet.
3479     // We set the finger to bottom to ensure that the bitmap
3480     // iteration that will follow this will not do anything.
3481     // (this is not a condition that holds when we set the region up,
3482     // as the region is not supposed to be empty in the first place)
3483     _finger = bottom;
3484   } else if (limit >= _region_limit) {
3485     assert(limit >= _finger, "peace of mind");
3486   } else {
3487     assert(limit < _region_limit, "only way to get here");
3488     // This can happen under some pretty unusual circumstances.  An
3489     // evacuation pause empties the region underneath our feet (NTAMS
3490     // at bottom). We then do some allocation in the region (NTAMS
3491     // stays at bottom), followed by the region being used as a GC
3492     // alloc region (NTAMS will move to top() and the objects
3493     // originally below it will be grayed). All objects now marked in
3494     // the region are explicitly grayed, if below the global finger,
3495     // and we do not need in fact to scan anything else. So, we simply
3496     // set _finger to be limit to ensure that the bitmap iteration
3497     // doesn't do anything.
3498     _finger = limit;
3499   }
3500 
3501   _region_limit = limit;
3502 }
3503 
3504 void CMTask::giveup_current_region() {
3505   assert(_curr_region != NULL, "invariant");
3506   if (_cm->verbose_low()) {
3507     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3508                            _worker_id, p2i(_curr_region));
3509   }
3510   clear_region_fields();
3511 }
3512 
3513 void CMTask::clear_region_fields() {
3514   // Values for these three fields that indicate that we're not
3515   // holding on to a region.
3516   _curr_region   = NULL;
3517   _finger        = NULL;
3518   _region_limit  = NULL;
3519 }
3520 
3521 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3522   if (cm_oop_closure == NULL) {
3523     assert(_cm_oop_closure != NULL, "invariant");
3524   } else {
3525     assert(_cm_oop_closure == NULL, "invariant");
3526   }
3527   _cm_oop_closure = cm_oop_closure;
3528 }
3529 
3530 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3531   guarantee(nextMarkBitMap != NULL, "invariant");
3532 
3533   if (_cm->verbose_low()) {
3534     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3535   }
3536 
3537   _nextMarkBitMap                = nextMarkBitMap;
3538   clear_region_fields();
3539 
3540   _calls                         = 0;
3541   _elapsed_time_ms               = 0.0;
3542   _termination_time_ms           = 0.0;
3543   _termination_start_time_ms     = 0.0;
3544 
3545 #if _MARKING_STATS_
3546   _local_pushes                  = 0;
3547   _local_pops                    = 0;
3548   _local_max_size                = 0;
3549   _objs_scanned                  = 0;
3550   _global_pushes                 = 0;
3551   _global_pops                   = 0;
3552   _global_max_size               = 0;
3553   _global_transfers_to           = 0;
3554   _global_transfers_from         = 0;
3555   _regions_claimed               = 0;
3556   _objs_found_on_bitmap          = 0;
3557   _satb_buffers_processed        = 0;
3558   _steal_attempts                = 0;
3559   _steals                        = 0;
3560   _aborted                       = 0;
3561   _aborted_overflow              = 0;
3562   _aborted_cm_aborted            = 0;
3563   _aborted_yield                 = 0;
3564   _aborted_timed_out             = 0;
3565   _aborted_satb                  = 0;
3566   _aborted_termination           = 0;
3567 #endif // _MARKING_STATS_
3568 }
3569 
3570 bool CMTask::should_exit_termination() {
3571   regular_clock_call();
3572   // This is called when we are in the termination protocol. We should
3573   // quit if, for some reason, this task wants to abort or the global
3574   // stack is not empty (this means that we can get work from it).
3575   return !_cm->mark_stack_empty() || has_aborted();
3576 }
3577 
3578 void CMTask::reached_limit() {
3579   assert(_words_scanned >= _words_scanned_limit ||
3580          _refs_reached >= _refs_reached_limit ,
3581          "shouldn't have been called otherwise");
3582   regular_clock_call();
3583 }
3584 
3585 void CMTask::regular_clock_call() {
3586   if (has_aborted()) return;
3587 
3588   // First, we need to recalculate the words scanned and refs reached
3589   // limits for the next clock call.
3590   recalculate_limits();
3591 
3592   // During the regular clock call we do the following
3593 
3594   // (1) If an overflow has been flagged, then we abort.
3595   if (_cm->has_overflown()) {
3596     set_has_aborted();
3597     return;
3598   }
3599 
3600   // If we are not concurrent (i.e. we're doing remark) we don't need
3601   // to check anything else. The other steps are only needed during
3602   // the concurrent marking phase.
3603   if (!concurrent()) return;
3604 
3605   // (2) If marking has been aborted for Full GC, then we also abort.
3606   if (_cm->has_aborted()) {
3607     set_has_aborted();
3608     statsOnly( ++_aborted_cm_aborted );
3609     return;
3610   }
3611 
3612   double curr_time_ms = os::elapsedVTime() * 1000.0;
3613 
3614   // (3) If marking stats are enabled, then we update the step history.
3615 #if _MARKING_STATS_
3616   if (_words_scanned >= _words_scanned_limit) {
3617     ++_clock_due_to_scanning;
3618   }
3619   if (_refs_reached >= _refs_reached_limit) {
3620     ++_clock_due_to_marking;
3621   }
3622 
3623   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3624   _interval_start_time_ms = curr_time_ms;
3625   _all_clock_intervals_ms.add(last_interval_ms);
3626 
3627   if (_cm->verbose_medium()) {
3628       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3629                         "scanned = %d%s, refs reached = %d%s",
3630                         _worker_id, last_interval_ms,
3631                         _words_scanned,
3632                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3633                         _refs_reached,
3634                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3635   }
3636 #endif // _MARKING_STATS_
3637 
3638   // (4) We check whether we should yield. If we have to, then we abort.
3639   if (SuspendibleThreadSet::should_yield()) {
3640     // We should yield. To do this we abort the task. The caller is
3641     // responsible for yielding.
3642     set_has_aborted();
3643     statsOnly( ++_aborted_yield );
3644     return;
3645   }
3646 
3647   // (5) We check whether we've reached our time quota. If we have,
3648   // then we abort.
3649   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3650   if (elapsed_time_ms > _time_target_ms) {
3651     set_has_aborted();
3652     _has_timed_out = true;
3653     statsOnly( ++_aborted_timed_out );
3654     return;
3655   }
3656 
3657   // (6) Finally, we check whether there are enough completed STAB
3658   // buffers available for processing. If there are, we abort.
3659   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3660   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3661     if (_cm->verbose_low()) {
3662       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3663                              _worker_id);
3664     }
3665     // we do need to process SATB buffers, we'll abort and restart
3666     // the marking task to do so
3667     set_has_aborted();
3668     statsOnly( ++_aborted_satb );
3669     return;
3670   }
3671 }
3672 
3673 void CMTask::recalculate_limits() {
3674   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3675   _words_scanned_limit      = _real_words_scanned_limit;
3676 
3677   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3678   _refs_reached_limit       = _real_refs_reached_limit;
3679 }
3680 
3681 void CMTask::decrease_limits() {
3682   // This is called when we believe that we're going to do an infrequent
3683   // operation which will increase the per byte scanned cost (i.e. move
3684   // entries to/from the global stack). It basically tries to decrease the
3685   // scanning limit so that the clock is called earlier.
3686 
3687   if (_cm->verbose_medium()) {
3688     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3689   }
3690 
3691   _words_scanned_limit = _real_words_scanned_limit -
3692     3 * words_scanned_period / 4;
3693   _refs_reached_limit  = _real_refs_reached_limit -
3694     3 * refs_reached_period / 4;
3695 }
3696 
3697 void CMTask::move_entries_to_global_stack() {
3698   // local array where we'll store the entries that will be popped
3699   // from the local queue
3700   oop buffer[global_stack_transfer_size];
3701 
3702   int n = 0;
3703   oop obj;
3704   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3705     buffer[n] = obj;
3706     ++n;
3707   }
3708 
3709   if (n > 0) {
3710     // we popped at least one entry from the local queue
3711 
3712     statsOnly( ++_global_transfers_to; _local_pops += n );
3713 
3714     if (!_cm->mark_stack_push(buffer, n)) {
3715       if (_cm->verbose_low()) {
3716         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3717                                _worker_id);
3718       }
3719       set_has_aborted();
3720     } else {
3721       // the transfer was successful
3722 
3723       if (_cm->verbose_medium()) {
3724         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3725                                _worker_id, n);
3726       }
3727       statsOnly( int tmp_size = _cm->mark_stack_size();
3728                  if (tmp_size > _global_max_size) {
3729                    _global_max_size = tmp_size;
3730                  }
3731                  _global_pushes += n );
3732     }
3733   }
3734 
3735   // this operation was quite expensive, so decrease the limits
3736   decrease_limits();
3737 }
3738 
3739 void CMTask::get_entries_from_global_stack() {
3740   // local array where we'll store the entries that will be popped
3741   // from the global stack.
3742   oop buffer[global_stack_transfer_size];
3743   int n;
3744   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3745   assert(n <= global_stack_transfer_size,
3746          "we should not pop more than the given limit");
3747   if (n > 0) {
3748     // yes, we did actually pop at least one entry
3749 
3750     statsOnly( ++_global_transfers_from; _global_pops += n );
3751     if (_cm->verbose_medium()) {
3752       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3753                              _worker_id, n);
3754     }
3755     for (int i = 0; i < n; ++i) {
3756       bool success = _task_queue->push(buffer[i]);
3757       // We only call this when the local queue is empty or under a
3758       // given target limit. So, we do not expect this push to fail.
3759       assert(success, "invariant");
3760     }
3761 
3762     statsOnly( int tmp_size = _task_queue->size();
3763                if (tmp_size > _local_max_size) {
3764                  _local_max_size = tmp_size;
3765                }
3766                _local_pushes += n );
3767   }
3768 
3769   // this operation was quite expensive, so decrease the limits
3770   decrease_limits();
3771 }
3772 
3773 void CMTask::drain_local_queue(bool partially) {
3774   if (has_aborted()) return;
3775 
3776   // Decide what the target size is, depending whether we're going to
3777   // drain it partially (so that other tasks can steal if they run out
3778   // of things to do) or totally (at the very end).
3779   size_t target_size;
3780   if (partially) {
3781     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3782   } else {
3783     target_size = 0;
3784   }
3785 
3786   if (_task_queue->size() > target_size) {
3787     if (_cm->verbose_high()) {
3788       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3789                              _worker_id, target_size);
3790     }
3791 
3792     oop obj;
3793     bool ret = _task_queue->pop_local(obj);
3794     while (ret) {
3795       statsOnly( ++_local_pops );
3796 
3797       if (_cm->verbose_high()) {
3798         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3799                                p2i((void*) obj));
3800       }
3801 
3802       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3803       assert(!_g1h->is_on_master_free_list(
3804                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3805 
3806       scan_object(obj);
3807 
3808       if (_task_queue->size() <= target_size || has_aborted()) {
3809         ret = false;
3810       } else {
3811         ret = _task_queue->pop_local(obj);
3812       }
3813     }
3814 
3815     if (_cm->verbose_high()) {
3816       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3817                              _worker_id, _task_queue->size());
3818     }
3819   }
3820 }
3821 
3822 void CMTask::drain_global_stack(bool partially) {
3823   if (has_aborted()) return;
3824 
3825   // We have a policy to drain the local queue before we attempt to
3826   // drain the global stack.
3827   assert(partially || _task_queue->size() == 0, "invariant");
3828 
3829   // Decide what the target size is, depending whether we're going to
3830   // drain it partially (so that other tasks can steal if they run out
3831   // of things to do) or totally (at the very end).  Notice that,
3832   // because we move entries from the global stack in chunks or
3833   // because another task might be doing the same, we might in fact
3834   // drop below the target. But, this is not a problem.
3835   size_t target_size;
3836   if (partially) {
3837     target_size = _cm->partial_mark_stack_size_target();
3838   } else {
3839     target_size = 0;
3840   }
3841 
3842   if (_cm->mark_stack_size() > target_size) {
3843     if (_cm->verbose_low()) {
3844       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3845                              _worker_id, target_size);
3846     }
3847 
3848     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3849       get_entries_from_global_stack();
3850       drain_local_queue(partially);
3851     }
3852 
3853     if (_cm->verbose_low()) {
3854       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3855                              _worker_id, _cm->mark_stack_size());
3856     }
3857   }
3858 }
3859 
3860 // SATB Queue has several assumptions on whether to call the par or
3861 // non-par versions of the methods. this is why some of the code is
3862 // replicated. We should really get rid of the single-threaded version
3863 // of the code to simplify things.
3864 void CMTask::drain_satb_buffers() {
3865   if (has_aborted()) return;
3866 
3867   // We set this so that the regular clock knows that we're in the
3868   // middle of draining buffers and doesn't set the abort flag when it
3869   // notices that SATB buffers are available for draining. It'd be
3870   // very counter productive if it did that. :-)
3871   _draining_satb_buffers = true;
3872 
3873   CMObjectClosure oc(this);
3874   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3875   if (G1CollectedHeap::use_parallel_gc_threads()) {
3876     satb_mq_set.set_par_closure(_worker_id, &oc);
3877   } else {
3878     satb_mq_set.set_closure(&oc);
3879   }
3880 
3881   // This keeps claiming and applying the closure to completed buffers
3882   // until we run out of buffers or we need to abort.
3883   if (G1CollectedHeap::use_parallel_gc_threads()) {
3884     while (!has_aborted() &&
3885            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3886       if (_cm->verbose_medium()) {
3887         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3888       }
3889       statsOnly( ++_satb_buffers_processed );
3890       regular_clock_call();
3891     }
3892   } else {
3893     while (!has_aborted() &&
3894            satb_mq_set.apply_closure_to_completed_buffer()) {
3895       if (_cm->verbose_medium()) {
3896         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3897       }
3898       statsOnly( ++_satb_buffers_processed );
3899       regular_clock_call();
3900     }
3901   }
3902 
3903   if (!concurrent() && !has_aborted()) {
3904     // We should only do this during remark.
3905     if (G1CollectedHeap::use_parallel_gc_threads()) {
3906       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3907     } else {
3908       satb_mq_set.iterate_closure_all_threads();
3909     }
3910   }
3911 
3912   _draining_satb_buffers = false;
3913 
3914   assert(has_aborted() ||
3915          concurrent() ||
3916          satb_mq_set.completed_buffers_num() == 0, "invariant");
3917 
3918   if (G1CollectedHeap::use_parallel_gc_threads()) {
3919     satb_mq_set.set_par_closure(_worker_id, NULL);
3920   } else {
3921     satb_mq_set.set_closure(NULL);
3922   }
3923 
3924   // again, this was a potentially expensive operation, decrease the
3925   // limits to get the regular clock call early
3926   decrease_limits();
3927 }
3928 
3929 void CMTask::print_stats() {
3930   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3931                          _worker_id, _calls);
3932   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3933                          _elapsed_time_ms, _termination_time_ms);
3934   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3935                          _step_times_ms.num(), _step_times_ms.avg(),
3936                          _step_times_ms.sd());
3937   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3938                          _step_times_ms.maximum(), _step_times_ms.sum());
3939 
3940 #if _MARKING_STATS_
3941   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3942                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3943                          _all_clock_intervals_ms.sd());
3944   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3945                          _all_clock_intervals_ms.maximum(),
3946                          _all_clock_intervals_ms.sum());
3947   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3948                          _clock_due_to_scanning, _clock_due_to_marking);
3949   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3950                          _objs_scanned, _objs_found_on_bitmap);
3951   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3952                          _local_pushes, _local_pops, _local_max_size);
3953   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3954                          _global_pushes, _global_pops, _global_max_size);
3955   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3956                          _global_transfers_to,_global_transfers_from);
3957   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3958   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3959   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3960                          _steal_attempts, _steals);
3961   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3962   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3963                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3964   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3965                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3966 #endif // _MARKING_STATS_
3967 }
3968 
3969 /*****************************************************************************
3970 
3971     The do_marking_step(time_target_ms, ...) method is the building
3972     block of the parallel marking framework. It can be called in parallel
3973     with other invocations of do_marking_step() on different tasks
3974     (but only one per task, obviously) and concurrently with the
3975     mutator threads, or during remark, hence it eliminates the need
3976     for two versions of the code. When called during remark, it will
3977     pick up from where the task left off during the concurrent marking
3978     phase. Interestingly, tasks are also claimable during evacuation
3979     pauses too, since do_marking_step() ensures that it aborts before
3980     it needs to yield.
3981 
3982     The data structures that it uses to do marking work are the
3983     following:
3984 
3985       (1) Marking Bitmap. If there are gray objects that appear only
3986       on the bitmap (this happens either when dealing with an overflow
3987       or when the initial marking phase has simply marked the roots
3988       and didn't push them on the stack), then tasks claim heap
3989       regions whose bitmap they then scan to find gray objects. A
3990       global finger indicates where the end of the last claimed region
3991       is. A local finger indicates how far into the region a task has
3992       scanned. The two fingers are used to determine how to gray an
3993       object (i.e. whether simply marking it is OK, as it will be
3994       visited by a task in the future, or whether it needs to be also
3995       pushed on a stack).
3996 
3997       (2) Local Queue. The local queue of the task which is accessed
3998       reasonably efficiently by the task. Other tasks can steal from
3999       it when they run out of work. Throughout the marking phase, a
4000       task attempts to keep its local queue short but not totally
4001       empty, so that entries are available for stealing by other
4002       tasks. Only when there is no more work, a task will totally
4003       drain its local queue.
4004 
4005       (3) Global Mark Stack. This handles local queue overflow. During
4006       marking only sets of entries are moved between it and the local
4007       queues, as access to it requires a mutex and more fine-grain
4008       interaction with it which might cause contention. If it
4009       overflows, then the marking phase should restart and iterate
4010       over the bitmap to identify gray objects. Throughout the marking
4011       phase, tasks attempt to keep the global mark stack at a small
4012       length but not totally empty, so that entries are available for
4013       popping by other tasks. Only when there is no more work, tasks
4014       will totally drain the global mark stack.
4015 
4016       (4) SATB Buffer Queue. This is where completed SATB buffers are
4017       made available. Buffers are regularly removed from this queue
4018       and scanned for roots, so that the queue doesn't get too
4019       long. During remark, all completed buffers are processed, as
4020       well as the filled in parts of any uncompleted buffers.
4021 
4022     The do_marking_step() method tries to abort when the time target
4023     has been reached. There are a few other cases when the
4024     do_marking_step() method also aborts:
4025 
4026       (1) When the marking phase has been aborted (after a Full GC).
4027 
4028       (2) When a global overflow (on the global stack) has been
4029       triggered. Before the task aborts, it will actually sync up with
4030       the other tasks to ensure that all the marking data structures
4031       (local queues, stacks, fingers etc.)  are re-initialized so that
4032       when do_marking_step() completes, the marking phase can
4033       immediately restart.
4034 
4035       (3) When enough completed SATB buffers are available. The
4036       do_marking_step() method only tries to drain SATB buffers right
4037       at the beginning. So, if enough buffers are available, the
4038       marking step aborts and the SATB buffers are processed at
4039       the beginning of the next invocation.
4040 
4041       (4) To yield. when we have to yield then we abort and yield
4042       right at the end of do_marking_step(). This saves us from a lot
4043       of hassle as, by yielding we might allow a Full GC. If this
4044       happens then objects will be compacted underneath our feet, the
4045       heap might shrink, etc. We save checking for this by just
4046       aborting and doing the yield right at the end.
4047 
4048     From the above it follows that the do_marking_step() method should
4049     be called in a loop (or, otherwise, regularly) until it completes.
4050 
4051     If a marking step completes without its has_aborted() flag being
4052     true, it means it has completed the current marking phase (and
4053     also all other marking tasks have done so and have all synced up).
4054 
4055     A method called regular_clock_call() is invoked "regularly" (in
4056     sub ms intervals) throughout marking. It is this clock method that
4057     checks all the abort conditions which were mentioned above and
4058     decides when the task should abort. A work-based scheme is used to
4059     trigger this clock method: when the number of object words the
4060     marking phase has scanned or the number of references the marking
4061     phase has visited reach a given limit. Additional invocations to
4062     the method clock have been planted in a few other strategic places
4063     too. The initial reason for the clock method was to avoid calling
4064     vtime too regularly, as it is quite expensive. So, once it was in
4065     place, it was natural to piggy-back all the other conditions on it
4066     too and not constantly check them throughout the code.
4067 
4068     If do_termination is true then do_marking_step will enter its
4069     termination protocol.
4070 
4071     The value of is_serial must be true when do_marking_step is being
4072     called serially (i.e. by the VMThread) and do_marking_step should
4073     skip any synchronization in the termination and overflow code.
4074     Examples include the serial remark code and the serial reference
4075     processing closures.
4076 
4077     The value of is_serial must be false when do_marking_step is
4078     being called by any of the worker threads in a work gang.
4079     Examples include the concurrent marking code (CMMarkingTask),
4080     the MT remark code, and the MT reference processing closures.
4081 
4082  *****************************************************************************/
4083 
4084 void CMTask::do_marking_step(double time_target_ms,
4085                              bool do_termination,
4086                              bool is_serial) {
4087   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4088   assert(concurrent() == _cm->concurrent(), "they should be the same");
4089 
4090   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4091   assert(_task_queues != NULL, "invariant");
4092   assert(_task_queue != NULL, "invariant");
4093   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4094 
4095   assert(!_claimed,
4096          "only one thread should claim this task at any one time");
4097 
4098   // OK, this doesn't safeguard again all possible scenarios, as it is
4099   // possible for two threads to set the _claimed flag at the same
4100   // time. But it is only for debugging purposes anyway and it will
4101   // catch most problems.
4102   _claimed = true;
4103 
4104   _start_time_ms = os::elapsedVTime() * 1000.0;
4105   statsOnly( _interval_start_time_ms = _start_time_ms );
4106 
4107   // If do_stealing is true then do_marking_step will attempt to
4108   // steal work from the other CMTasks. It only makes sense to
4109   // enable stealing when the termination protocol is enabled
4110   // and do_marking_step() is not being called serially.
4111   bool do_stealing = do_termination && !is_serial;
4112 
4113   double diff_prediction_ms =
4114     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4115   _time_target_ms = time_target_ms - diff_prediction_ms;
4116 
4117   // set up the variables that are used in the work-based scheme to
4118   // call the regular clock method
4119   _words_scanned = 0;
4120   _refs_reached  = 0;
4121   recalculate_limits();
4122 
4123   // clear all flags
4124   clear_has_aborted();
4125   _has_timed_out = false;
4126   _draining_satb_buffers = false;
4127 
4128   ++_calls;
4129 
4130   if (_cm->verbose_low()) {
4131     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4132                            "target = %1.2lfms >>>>>>>>>>",
4133                            _worker_id, _calls, _time_target_ms);
4134   }
4135 
4136   // Set up the bitmap and oop closures. Anything that uses them is
4137   // eventually called from this method, so it is OK to allocate these
4138   // statically.
4139   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4140   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4141   set_cm_oop_closure(&cm_oop_closure);
4142 
4143   if (_cm->has_overflown()) {
4144     // This can happen if the mark stack overflows during a GC pause
4145     // and this task, after a yield point, restarts. We have to abort
4146     // as we need to get into the overflow protocol which happens
4147     // right at the end of this task.
4148     set_has_aborted();
4149   }
4150 
4151   // First drain any available SATB buffers. After this, we will not
4152   // look at SATB buffers before the next invocation of this method.
4153   // If enough completed SATB buffers are queued up, the regular clock
4154   // will abort this task so that it restarts.
4155   drain_satb_buffers();
4156   // ...then partially drain the local queue and the global stack
4157   drain_local_queue(true);
4158   drain_global_stack(true);
4159 
4160   do {
4161     if (!has_aborted() && _curr_region != NULL) {
4162       // This means that we're already holding on to a region.
4163       assert(_finger != NULL, "if region is not NULL, then the finger "
4164              "should not be NULL either");
4165 
4166       // We might have restarted this task after an evacuation pause
4167       // which might have evacuated the region we're holding on to
4168       // underneath our feet. Let's read its limit again to make sure
4169       // that we do not iterate over a region of the heap that
4170       // contains garbage (update_region_limit() will also move
4171       // _finger to the start of the region if it is found empty).
4172       update_region_limit();
4173       // We will start from _finger not from the start of the region,
4174       // as we might be restarting this task after aborting half-way
4175       // through scanning this region. In this case, _finger points to
4176       // the address where we last found a marked object. If this is a
4177       // fresh region, _finger points to start().
4178       MemRegion mr = MemRegion(_finger, _region_limit);
4179 
4180       if (_cm->verbose_low()) {
4181         gclog_or_tty->print_cr("[%u] we're scanning part "
4182                                "["PTR_FORMAT", "PTR_FORMAT") "
4183                                "of region "HR_FORMAT,
4184                                _worker_id, p2i(_finger), p2i(_region_limit),
4185                                HR_FORMAT_PARAMS(_curr_region));
4186       }
4187 
4188       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4189              "humongous regions should go around loop once only");
4190 
4191       // Some special cases:
4192       // If the memory region is empty, we can just give up the region.
4193       // If the current region is humongous then we only need to check
4194       // the bitmap for the bit associated with the start of the object,
4195       // scan the object if it's live, and give up the region.
4196       // Otherwise, let's iterate over the bitmap of the part of the region
4197       // that is left.
4198       // If the iteration is successful, give up the region.
4199       if (mr.is_empty()) {
4200         giveup_current_region();
4201         regular_clock_call();
4202       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4203         if (_nextMarkBitMap->isMarked(mr.start())) {
4204           // The object is marked - apply the closure
4205           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4206           bitmap_closure.do_bit(offset);
4207         }
4208         // Even if this task aborted while scanning the humongous object
4209         // we can (and should) give up the current region.
4210         giveup_current_region();
4211         regular_clock_call();
4212       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4213         giveup_current_region();
4214         regular_clock_call();
4215       } else {
4216         assert(has_aborted(), "currently the only way to do so");
4217         // The only way to abort the bitmap iteration is to return
4218         // false from the do_bit() method. However, inside the
4219         // do_bit() method we move the _finger to point to the
4220         // object currently being looked at. So, if we bail out, we
4221         // have definitely set _finger to something non-null.
4222         assert(_finger != NULL, "invariant");
4223 
4224         // Region iteration was actually aborted. So now _finger
4225         // points to the address of the object we last scanned. If we
4226         // leave it there, when we restart this task, we will rescan
4227         // the object. It is easy to avoid this. We move the finger by
4228         // enough to point to the next possible object header (the
4229         // bitmap knows by how much we need to move it as it knows its
4230         // granularity).
4231         assert(_finger < _region_limit, "invariant");
4232         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4233         // Check if bitmap iteration was aborted while scanning the last object
4234         if (new_finger >= _region_limit) {
4235           giveup_current_region();
4236         } else {
4237           move_finger_to(new_finger);
4238         }
4239       }
4240     }
4241     // At this point we have either completed iterating over the
4242     // region we were holding on to, or we have aborted.
4243 
4244     // We then partially drain the local queue and the global stack.
4245     // (Do we really need this?)
4246     drain_local_queue(true);
4247     drain_global_stack(true);
4248 
4249     // Read the note on the claim_region() method on why it might
4250     // return NULL with potentially more regions available for
4251     // claiming and why we have to check out_of_regions() to determine
4252     // whether we're done or not.
4253     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4254       // We are going to try to claim a new region. We should have
4255       // given up on the previous one.
4256       // Separated the asserts so that we know which one fires.
4257       assert(_curr_region  == NULL, "invariant");
4258       assert(_finger       == NULL, "invariant");
4259       assert(_region_limit == NULL, "invariant");
4260       if (_cm->verbose_low()) {
4261         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4262       }
4263       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4264       if (claimed_region != NULL) {
4265         // Yes, we managed to claim one
4266         statsOnly( ++_regions_claimed );
4267 
4268         if (_cm->verbose_low()) {
4269           gclog_or_tty->print_cr("[%u] we successfully claimed "
4270                                  "region "PTR_FORMAT,
4271                                  _worker_id, p2i(claimed_region));
4272         }
4273 
4274         setup_for_region(claimed_region);
4275         assert(_curr_region == claimed_region, "invariant");
4276       }
4277       // It is important to call the regular clock here. It might take
4278       // a while to claim a region if, for example, we hit a large
4279       // block of empty regions. So we need to call the regular clock
4280       // method once round the loop to make sure it's called
4281       // frequently enough.
4282       regular_clock_call();
4283     }
4284 
4285     if (!has_aborted() && _curr_region == NULL) {
4286       assert(_cm->out_of_regions(),
4287              "at this point we should be out of regions");
4288     }
4289   } while ( _curr_region != NULL && !has_aborted());
4290 
4291   if (!has_aborted()) {
4292     // We cannot check whether the global stack is empty, since other
4293     // tasks might be pushing objects to it concurrently.
4294     assert(_cm->out_of_regions(),
4295            "at this point we should be out of regions");
4296 
4297     if (_cm->verbose_low()) {
4298       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4299     }
4300 
4301     // Try to reduce the number of available SATB buffers so that
4302     // remark has less work to do.
4303     drain_satb_buffers();
4304   }
4305 
4306   // Since we've done everything else, we can now totally drain the
4307   // local queue and global stack.
4308   drain_local_queue(false);
4309   drain_global_stack(false);
4310 
4311   // Attempt at work stealing from other task's queues.
4312   if (do_stealing && !has_aborted()) {
4313     // We have not aborted. This means that we have finished all that
4314     // we could. Let's try to do some stealing...
4315 
4316     // We cannot check whether the global stack is empty, since other
4317     // tasks might be pushing objects to it concurrently.
4318     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4319            "only way to reach here");
4320 
4321     if (_cm->verbose_low()) {
4322       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4323     }
4324 
4325     while (!has_aborted()) {
4326       oop obj;
4327       statsOnly( ++_steal_attempts );
4328 
4329       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4330         if (_cm->verbose_medium()) {
4331           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4332                                  _worker_id, p2i((void*) obj));
4333         }
4334 
4335         statsOnly( ++_steals );
4336 
4337         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4338                "any stolen object should be marked");
4339         scan_object(obj);
4340 
4341         // And since we're towards the end, let's totally drain the
4342         // local queue and global stack.
4343         drain_local_queue(false);
4344         drain_global_stack(false);
4345       } else {
4346         break;
4347       }
4348     }
4349   }
4350 
4351   // If we are about to wrap up and go into termination, check if we
4352   // should raise the overflow flag.
4353   if (do_termination && !has_aborted()) {
4354     if (_cm->force_overflow()->should_force()) {
4355       _cm->set_has_overflown();
4356       regular_clock_call();
4357     }
4358   }
4359 
4360   // We still haven't aborted. Now, let's try to get into the
4361   // termination protocol.
4362   if (do_termination && !has_aborted()) {
4363     // We cannot check whether the global stack is empty, since other
4364     // tasks might be concurrently pushing objects on it.
4365     // Separated the asserts so that we know which one fires.
4366     assert(_cm->out_of_regions(), "only way to reach here");
4367     assert(_task_queue->size() == 0, "only way to reach here");
4368 
4369     if (_cm->verbose_low()) {
4370       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4371     }
4372 
4373     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4374 
4375     // The CMTask class also extends the TerminatorTerminator class,
4376     // hence its should_exit_termination() method will also decide
4377     // whether to exit the termination protocol or not.
4378     bool finished = (is_serial ||
4379                      _cm->terminator()->offer_termination(this));
4380     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4381     _termination_time_ms +=
4382       termination_end_time_ms - _termination_start_time_ms;
4383 
4384     if (finished) {
4385       // We're all done.
4386 
4387       if (_worker_id == 0) {
4388         // let's allow task 0 to do this
4389         if (concurrent()) {
4390           assert(_cm->concurrent_marking_in_progress(), "invariant");
4391           // we need to set this to false before the next
4392           // safepoint. This way we ensure that the marking phase
4393           // doesn't observe any more heap expansions.
4394           _cm->clear_concurrent_marking_in_progress();
4395         }
4396       }
4397 
4398       // We can now guarantee that the global stack is empty, since
4399       // all other tasks have finished. We separated the guarantees so
4400       // that, if a condition is false, we can immediately find out
4401       // which one.
4402       guarantee(_cm->out_of_regions(), "only way to reach here");
4403       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4404       guarantee(_task_queue->size() == 0, "only way to reach here");
4405       guarantee(!_cm->has_overflown(), "only way to reach here");
4406       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4407 
4408       if (_cm->verbose_low()) {
4409         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4410       }
4411     } else {
4412       // Apparently there's more work to do. Let's abort this task. It
4413       // will restart it and we can hopefully find more things to do.
4414 
4415       if (_cm->verbose_low()) {
4416         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4417                                _worker_id);
4418       }
4419 
4420       set_has_aborted();
4421       statsOnly( ++_aborted_termination );
4422     }
4423   }
4424 
4425   // Mainly for debugging purposes to make sure that a pointer to the
4426   // closure which was statically allocated in this frame doesn't
4427   // escape it by accident.
4428   set_cm_oop_closure(NULL);
4429   double end_time_ms = os::elapsedVTime() * 1000.0;
4430   double elapsed_time_ms = end_time_ms - _start_time_ms;
4431   // Update the step history.
4432   _step_times_ms.add(elapsed_time_ms);
4433 
4434   if (has_aborted()) {
4435     // The task was aborted for some reason.
4436 
4437     statsOnly( ++_aborted );
4438 
4439     if (_has_timed_out) {
4440       double diff_ms = elapsed_time_ms - _time_target_ms;
4441       // Keep statistics of how well we did with respect to hitting
4442       // our target only if we actually timed out (if we aborted for
4443       // other reasons, then the results might get skewed).
4444       _marking_step_diffs_ms.add(diff_ms);
4445     }
4446 
4447     if (_cm->has_overflown()) {
4448       // This is the interesting one. We aborted because a global
4449       // overflow was raised. This means we have to restart the
4450       // marking phase and start iterating over regions. However, in
4451       // order to do this we have to make sure that all tasks stop
4452       // what they are doing and re-initialise in a safe manner. We
4453       // will achieve this with the use of two barrier sync points.
4454 
4455       if (_cm->verbose_low()) {
4456         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4457       }
4458 
4459       if (!is_serial) {
4460         // We only need to enter the sync barrier if being called
4461         // from a parallel context
4462         _cm->enter_first_sync_barrier(_worker_id);
4463 
4464         // When we exit this sync barrier we know that all tasks have
4465         // stopped doing marking work. So, it's now safe to
4466         // re-initialise our data structures. At the end of this method,
4467         // task 0 will clear the global data structures.
4468       }
4469 
4470       statsOnly( ++_aborted_overflow );
4471 
4472       // We clear the local state of this task...
4473       clear_region_fields();
4474 
4475       if (!is_serial) {
4476         // ...and enter the second barrier.
4477         _cm->enter_second_sync_barrier(_worker_id);
4478       }
4479       // At this point, if we're during the concurrent phase of
4480       // marking, everything has been re-initialized and we're
4481       // ready to restart.
4482     }
4483 
4484     if (_cm->verbose_low()) {
4485       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4486                              "elapsed = %1.2lfms <<<<<<<<<<",
4487                              _worker_id, _time_target_ms, elapsed_time_ms);
4488       if (_cm->has_aborted()) {
4489         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4490                                _worker_id);
4491       }
4492     }
4493   } else {
4494     if (_cm->verbose_low()) {
4495       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4496                              "elapsed = %1.2lfms <<<<<<<<<<",
4497                              _worker_id, _time_target_ms, elapsed_time_ms);
4498     }
4499   }
4500 
4501   _claimed = false;
4502 }
4503 
4504 CMTask::CMTask(uint worker_id,
4505                ConcurrentMark* cm,
4506                size_t* marked_bytes,
4507                BitMap* card_bm,
4508                CMTaskQueue* task_queue,
4509                CMTaskQueueSet* task_queues)
4510   : _g1h(G1CollectedHeap::heap()),
4511     _worker_id(worker_id), _cm(cm),
4512     _claimed(false),
4513     _nextMarkBitMap(NULL), _hash_seed(17),
4514     _task_queue(task_queue),
4515     _task_queues(task_queues),
4516     _cm_oop_closure(NULL),
4517     _marked_bytes_array(marked_bytes),
4518     _card_bm(card_bm) {
4519   guarantee(task_queue != NULL, "invariant");
4520   guarantee(task_queues != NULL, "invariant");
4521 
4522   statsOnly( _clock_due_to_scanning = 0;
4523              _clock_due_to_marking  = 0 );
4524 
4525   _marking_step_diffs_ms.add(0.5);
4526 }
4527 
4528 // These are formatting macros that are used below to ensure
4529 // consistent formatting. The *_H_* versions are used to format the
4530 // header for a particular value and they should be kept consistent
4531 // with the corresponding macro. Also note that most of the macros add
4532 // the necessary white space (as a prefix) which makes them a bit
4533 // easier to compose.
4534 
4535 // All the output lines are prefixed with this string to be able to
4536 // identify them easily in a large log file.
4537 #define G1PPRL_LINE_PREFIX            "###"
4538 
4539 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4540 #ifdef _LP64
4541 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4542 #else // _LP64
4543 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4544 #endif // _LP64
4545 
4546 // For per-region info
4547 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4548 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4549 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4550 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4551 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4552 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4553 
4554 // For summary info
4555 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4556 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4557 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4558 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4559 
4560 G1PrintRegionLivenessInfoClosure::
4561 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4562   : _out(out),
4563     _total_used_bytes(0), _total_capacity_bytes(0),
4564     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4565     _hum_used_bytes(0), _hum_capacity_bytes(0),
4566     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4567     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4568   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4569   MemRegion g1_committed = g1h->g1_committed();
4570   MemRegion g1_reserved = g1h->g1_reserved();
4571   double now = os::elapsedTime();
4572 
4573   // Print the header of the output.
4574   _out->cr();
4575   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4576   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4577                  G1PPRL_SUM_ADDR_FORMAT("committed")
4578                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4579                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4580                  p2i(g1_committed.start()), p2i(g1_committed.end()),
4581                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4582                  HeapRegion::GrainBytes);
4583   _out->print_cr(G1PPRL_LINE_PREFIX);
4584   _out->print_cr(G1PPRL_LINE_PREFIX
4585                 G1PPRL_TYPE_H_FORMAT
4586                 G1PPRL_ADDR_BASE_H_FORMAT
4587                 G1PPRL_BYTE_H_FORMAT
4588                 G1PPRL_BYTE_H_FORMAT
4589                 G1PPRL_BYTE_H_FORMAT
4590                 G1PPRL_DOUBLE_H_FORMAT
4591                 G1PPRL_BYTE_H_FORMAT
4592                 G1PPRL_BYTE_H_FORMAT,
4593                 "type", "address-range",
4594                 "used", "prev-live", "next-live", "gc-eff",
4595                 "remset", "code-roots");
4596   _out->print_cr(G1PPRL_LINE_PREFIX
4597                 G1PPRL_TYPE_H_FORMAT
4598                 G1PPRL_ADDR_BASE_H_FORMAT
4599                 G1PPRL_BYTE_H_FORMAT
4600                 G1PPRL_BYTE_H_FORMAT
4601                 G1PPRL_BYTE_H_FORMAT
4602                 G1PPRL_DOUBLE_H_FORMAT
4603                 G1PPRL_BYTE_H_FORMAT
4604                 G1PPRL_BYTE_H_FORMAT,
4605                 "", "",
4606                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4607                 "(bytes)", "(bytes)");
4608 }
4609 
4610 // It takes as a parameter a reference to one of the _hum_* fields, it
4611 // deduces the corresponding value for a region in a humongous region
4612 // series (either the region size, or what's left if the _hum_* field
4613 // is < the region size), and updates the _hum_* field accordingly.
4614 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4615   size_t bytes = 0;
4616   // The > 0 check is to deal with the prev and next live bytes which
4617   // could be 0.
4618   if (*hum_bytes > 0) {
4619     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4620     *hum_bytes -= bytes;
4621   }
4622   return bytes;
4623 }
4624 
4625 // It deduces the values for a region in a humongous region series
4626 // from the _hum_* fields and updates those accordingly. It assumes
4627 // that that _hum_* fields have already been set up from the "starts
4628 // humongous" region and we visit the regions in address order.
4629 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4630                                                      size_t* capacity_bytes,
4631                                                      size_t* prev_live_bytes,
4632                                                      size_t* next_live_bytes) {
4633   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4634   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4635   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4636   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4637   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4638 }
4639 
4640 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4641   const char* type = "";
4642   HeapWord* bottom       = r->bottom();
4643   HeapWord* end          = r->end();
4644   size_t capacity_bytes  = r->capacity();
4645   size_t used_bytes      = r->used();
4646   size_t prev_live_bytes = r->live_bytes();
4647   size_t next_live_bytes = r->next_live_bytes();
4648   double gc_eff          = r->gc_efficiency();
4649   size_t remset_bytes    = r->rem_set()->mem_size();
4650   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4651 
4652   if (r->used() == 0) {
4653     type = "FREE";
4654   } else if (r->is_survivor()) {
4655     type = "SURV";
4656   } else if (r->is_young()) {
4657     type = "EDEN";
4658   } else if (r->startsHumongous()) {
4659     type = "HUMS";
4660 
4661     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4662            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4663            "they should have been zeroed after the last time we used them");
4664     // Set up the _hum_* fields.
4665     _hum_capacity_bytes  = capacity_bytes;
4666     _hum_used_bytes      = used_bytes;
4667     _hum_prev_live_bytes = prev_live_bytes;
4668     _hum_next_live_bytes = next_live_bytes;
4669     get_hum_bytes(&used_bytes, &capacity_bytes,
4670                   &prev_live_bytes, &next_live_bytes);
4671     end = bottom + HeapRegion::GrainWords;
4672   } else if (r->continuesHumongous()) {
4673     type = "HUMC";
4674     get_hum_bytes(&used_bytes, &capacity_bytes,
4675                   &prev_live_bytes, &next_live_bytes);
4676     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4677   } else {
4678     type = "OLD";
4679   }
4680 
4681   _total_used_bytes      += used_bytes;
4682   _total_capacity_bytes  += capacity_bytes;
4683   _total_prev_live_bytes += prev_live_bytes;
4684   _total_next_live_bytes += next_live_bytes;
4685   _total_remset_bytes    += remset_bytes;
4686   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4687 
4688   // Print a line for this particular region.
4689   _out->print_cr(G1PPRL_LINE_PREFIX
4690                  G1PPRL_TYPE_FORMAT
4691                  G1PPRL_ADDR_BASE_FORMAT
4692                  G1PPRL_BYTE_FORMAT
4693                  G1PPRL_BYTE_FORMAT
4694                  G1PPRL_BYTE_FORMAT
4695                  G1PPRL_DOUBLE_FORMAT
4696                  G1PPRL_BYTE_FORMAT
4697                  G1PPRL_BYTE_FORMAT,
4698                  type, p2i(bottom), p2i(end),
4699                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4700                  remset_bytes, strong_code_roots_bytes);
4701 
4702   return false;
4703 }
4704 
4705 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4706   // add static memory usages to remembered set sizes
4707   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4708   // Print the footer of the output.
4709   _out->print_cr(G1PPRL_LINE_PREFIX);
4710   _out->print_cr(G1PPRL_LINE_PREFIX
4711                  " SUMMARY"
4712                  G1PPRL_SUM_MB_FORMAT("capacity")
4713                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4714                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4715                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4716                  G1PPRL_SUM_MB_FORMAT("remset")
4717                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4718                  bytes_to_mb(_total_capacity_bytes),
4719                  bytes_to_mb(_total_used_bytes),
4720                  perc(_total_used_bytes, _total_capacity_bytes),
4721                  bytes_to_mb(_total_prev_live_bytes),
4722                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4723                  bytes_to_mb(_total_next_live_bytes),
4724                  perc(_total_next_live_bytes, _total_capacity_bytes),
4725                  bytes_to_mb(_total_remset_bytes),
4726                  bytes_to_mb(_total_strong_code_roots_bytes));
4727   _out->cr();
4728 }