1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "memory/genOopClosures.inline.hpp"
  40 #include "memory/referencePolicy.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "oops/oop.inline.hpp"
  43 #include "runtime/handles.inline.hpp"
  44 #include "runtime/java.hpp"
  45 #include "services/memTracker.hpp"
  46 
  47 // Concurrent marking bit map wrapper
  48 
  49 CMBitMapRO::CMBitMapRO(int shifter) :
  50   _bm(),
  51   _shifter(shifter) {
  52   _bmStartWord = 0;
  53   _bmWordSize = 0;
  54 }
  55 
  56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  57                                                HeapWord* limit) const {
  58   // First we must round addr *up* to a possible object boundary.
  59   addr = (HeapWord*)align_size_up((intptr_t)addr,
  60                                   HeapWordSize << _shifter);
  61   size_t addrOffset = heapWordToOffset(addr);
  62   if (limit == NULL) {
  63     limit = _bmStartWord + _bmWordSize;
  64   }
  65   size_t limitOffset = heapWordToOffset(limit);
  66   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  67   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  68   assert(nextAddr >= addr, "get_next_one postcondition");
  69   assert(nextAddr == limit || isMarked(nextAddr),
  70          "get_next_one postcondition");
  71   return nextAddr;
  72 }
  73 
  74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  75                                                  HeapWord* limit) const {
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || !isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  90   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  91   return (int) (diff >> _shifter);
  92 }
  93 
  94 #ifndef PRODUCT
  95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
  96   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  97   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  98          "size inconsistency");
  99   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 100          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 101 }
 102 #endif
 103 
 104 bool CMBitMap::allocate(ReservedSpace heap_rs) {
 105   _bmStartWord = (HeapWord*)(heap_rs.base());
 106   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 107   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 108                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 109   if (!brs.is_reserved()) {
 110     warning("ConcurrentMark marking bit map allocation failure");
 111     return false;
 112   }
 113   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 114   // For now we'll just commit all of the bit map up front.
 115   // Later on we'll try to be more parsimonious with swap.
 116   if (!_virtual_space.initialize(brs, brs.size())) {
 117     warning("ConcurrentMark marking bit map backing store failure");
 118     return false;
 119   }
 120   assert(_virtual_space.committed_size() == brs.size(),
 121          "didn't reserve backing store for all of concurrent marking bit map?");
 122   _bm.set_map((uintptr_t*)_virtual_space.low());
 123   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 124          _bmWordSize, "inconsistency in bit map sizing");
 125   _bm.set_size(_bmWordSize >> _shifter);
 126   return true;
 127 }
 128 
 129 void CMBitMap::clearAll() {
 130   _bm.clear();
 131   return;
 132 }
 133 
 134 void CMBitMap::markRange(MemRegion mr) {
 135   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 136   assert(!mr.is_empty(), "unexpected empty region");
 137   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 138           ((HeapWord *) mr.end())),
 139          "markRange memory region end is not card aligned");
 140   // convert address range into offset range
 141   _bm.at_put_range(heapWordToOffset(mr.start()),
 142                    heapWordToOffset(mr.end()), true);
 143 }
 144 
 145 void CMBitMap::clearRange(MemRegion mr) {
 146   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 147   assert(!mr.is_empty(), "unexpected empty region");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), false);
 151 }
 152 
 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 154                                             HeapWord* end_addr) {
 155   HeapWord* start = getNextMarkedWordAddress(addr);
 156   start = MIN2(start, end_addr);
 157   HeapWord* end   = getNextUnmarkedWordAddress(start);
 158   end = MIN2(end, end_addr);
 159   assert(start <= end, "Consistency check");
 160   MemRegion mr(start, end);
 161   if (!mr.is_empty()) {
 162     clearRange(mr);
 163   }
 164   return mr;
 165 }
 166 
 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 168   _base(NULL), _cm(cm)
 169 #ifdef ASSERT
 170   , _drain_in_progress(false)
 171   , _drain_in_progress_yields(false)
 172 #endif
 173 {}
 174 
 175 bool CMMarkStack::allocate(size_t capacity) {
 176   // allocate a stack of the requisite depth
 177   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 178   if (!rs.is_reserved()) {
 179     warning("ConcurrentMark MarkStack allocation failure");
 180     return false;
 181   }
 182   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 183   if (!_virtual_space.initialize(rs, rs.size())) {
 184     warning("ConcurrentMark MarkStack backing store failure");
 185     // Release the virtual memory reserved for the marking stack
 186     rs.release();
 187     return false;
 188   }
 189   assert(_virtual_space.committed_size() == rs.size(),
 190          "Didn't reserve backing store for all of ConcurrentMark stack?");
 191   _base = (oop*) _virtual_space.low();
 192   setEmpty();
 193   _capacity = (jint) capacity;
 194   _saved_index = -1;
 195   _should_expand = false;
 196   NOT_PRODUCT(_max_depth = 0);
 197   return true;
 198 }
 199 
 200 void CMMarkStack::expand() {
 201   // Called, during remark, if we've overflown the marking stack during marking.
 202   assert(isEmpty(), "stack should been emptied while handling overflow");
 203   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 204   // Clear expansion flag
 205   _should_expand = false;
 206   if (_capacity == (jint) MarkStackSizeMax) {
 207     if (PrintGCDetails && Verbose) {
 208       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 209     }
 210     return;
 211   }
 212   // Double capacity if possible
 213   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 214   // Do not give up existing stack until we have managed to
 215   // get the double capacity that we desired.
 216   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 217                                                            sizeof(oop)));
 218   if (rs.is_reserved()) {
 219     // Release the backing store associated with old stack
 220     _virtual_space.release();
 221     // Reinitialize virtual space for new stack
 222     if (!_virtual_space.initialize(rs, rs.size())) {
 223       fatal("Not enough swap for expanded marking stack capacity");
 224     }
 225     _base = (oop*)(_virtual_space.low());
 226     _index = 0;
 227     _capacity = new_capacity;
 228   } else {
 229     if (PrintGCDetails && Verbose) {
 230       // Failed to double capacity, continue;
 231       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 232                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 233                           _capacity / K, new_capacity / K);
 234     }
 235   }
 236 }
 237 
 238 void CMMarkStack::set_should_expand() {
 239   // If we're resetting the marking state because of an
 240   // marking stack overflow, record that we should, if
 241   // possible, expand the stack.
 242   _should_expand = _cm->has_overflown();
 243 }
 244 
 245 CMMarkStack::~CMMarkStack() {
 246   if (_base != NULL) {
 247     _base = NULL;
 248     _virtual_space.release();
 249   }
 250 }
 251 
 252 void CMMarkStack::par_push(oop ptr) {
 253   while (true) {
 254     if (isFull()) {
 255       _overflow = true;
 256       return;
 257     }
 258     // Otherwise...
 259     jint index = _index;
 260     jint next_index = index+1;
 261     jint res = Atomic::cmpxchg(next_index, &_index, index);
 262     if (res == index) {
 263       _base[index] = ptr;
 264       // Note that we don't maintain this atomically.  We could, but it
 265       // doesn't seem necessary.
 266       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 267       return;
 268     }
 269     // Otherwise, we need to try again.
 270   }
 271 }
 272 
 273 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 274   while (true) {
 275     if (isFull()) {
 276       _overflow = true;
 277       return;
 278     }
 279     // Otherwise...
 280     jint index = _index;
 281     jint next_index = index + n;
 282     if (next_index > _capacity) {
 283       _overflow = true;
 284       return;
 285     }
 286     jint res = Atomic::cmpxchg(next_index, &_index, index);
 287     if (res == index) {
 288       for (int i = 0; i < n; i++) {
 289         int  ind = index + i;
 290         assert(ind < _capacity, "By overflow test above.");
 291         _base[ind] = ptr_arr[i];
 292       }
 293       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 294       return;
 295     }
 296     // Otherwise, we need to try again.
 297   }
 298 }
 299 
 300 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 301   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 302   jint start = _index;
 303   jint next_index = start + n;
 304   if (next_index > _capacity) {
 305     _overflow = true;
 306     return;
 307   }
 308   // Otherwise.
 309   _index = next_index;
 310   for (int i = 0; i < n; i++) {
 311     int ind = start + i;
 312     assert(ind < _capacity, "By overflow test above.");
 313     _base[ind] = ptr_arr[i];
 314   }
 315   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 316 }
 317 
 318 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 319   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 320   jint index = _index;
 321   if (index == 0) {
 322     *n = 0;
 323     return false;
 324   } else {
 325     int k = MIN2(max, index);
 326     jint  new_ind = index - k;
 327     for (int j = 0; j < k; j++) {
 328       ptr_arr[j] = _base[new_ind + j];
 329     }
 330     _index = new_ind;
 331     *n = k;
 332     return true;
 333   }
 334 }
 335 
 336 template<class OopClosureClass>
 337 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 338   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 339          || SafepointSynchronize::is_at_safepoint(),
 340          "Drain recursion must be yield-safe.");
 341   bool res = true;
 342   debug_only(_drain_in_progress = true);
 343   debug_only(_drain_in_progress_yields = yield_after);
 344   while (!isEmpty()) {
 345     oop newOop = pop();
 346     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 347     assert(newOop->is_oop(), "Expected an oop");
 348     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 349            "only grey objects on this stack");
 350     newOop->oop_iterate(cl);
 351     if (yield_after && _cm->do_yield_check()) {
 352       res = false;
 353       break;
 354     }
 355   }
 356   debug_only(_drain_in_progress = false);
 357   return res;
 358 }
 359 
 360 void CMMarkStack::note_start_of_gc() {
 361   assert(_saved_index == -1,
 362          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 363   _saved_index = _index;
 364 }
 365 
 366 void CMMarkStack::note_end_of_gc() {
 367   // This is intentionally a guarantee, instead of an assert. If we
 368   // accidentally add something to the mark stack during GC, it
 369   // will be a correctness issue so it's better if we crash. we'll
 370   // only check this once per GC anyway, so it won't be a performance
 371   // issue in any way.
 372   guarantee(_saved_index == _index,
 373             err_msg("saved index: %d index: %d", _saved_index, _index));
 374   _saved_index = -1;
 375 }
 376 
 377 void CMMarkStack::oops_do(OopClosure* f) {
 378   assert(_saved_index == _index,
 379          err_msg("saved index: %d index: %d", _saved_index, _index));
 380   for (int i = 0; i < _index; i += 1) {
 381     f->do_oop(&_base[i]);
 382   }
 383 }
 384 
 385 bool ConcurrentMark::not_yet_marked(oop obj) const {
 386   return _g1h->is_obj_ill(obj);
 387 }
 388 
 389 CMRootRegions::CMRootRegions() :
 390   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 391   _should_abort(false),  _next_survivor(NULL) { }
 392 
 393 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 394   _young_list = g1h->young_list();
 395   _cm = cm;
 396 }
 397 
 398 void CMRootRegions::prepare_for_scan() {
 399   assert(!scan_in_progress(), "pre-condition");
 400 
 401   // Currently, only survivors can be root regions.
 402   assert(_next_survivor == NULL, "pre-condition");
 403   _next_survivor = _young_list->first_survivor_region();
 404   _scan_in_progress = (_next_survivor != NULL);
 405   _should_abort = false;
 406 }
 407 
 408 HeapRegion* CMRootRegions::claim_next() {
 409   if (_should_abort) {
 410     // If someone has set the should_abort flag, we return NULL to
 411     // force the caller to bail out of their loop.
 412     return NULL;
 413   }
 414 
 415   // Currently, only survivors can be root regions.
 416   HeapRegion* res = _next_survivor;
 417   if (res != NULL) {
 418     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 419     // Read it again in case it changed while we were waiting for the lock.
 420     res = _next_survivor;
 421     if (res != NULL) {
 422       if (res == _young_list->last_survivor_region()) {
 423         // We just claimed the last survivor so store NULL to indicate
 424         // that we're done.
 425         _next_survivor = NULL;
 426       } else {
 427         _next_survivor = res->get_next_young_region();
 428       }
 429     } else {
 430       // Someone else claimed the last survivor while we were trying
 431       // to take the lock so nothing else to do.
 432     }
 433   }
 434   assert(res == NULL || res->is_survivor(), "post-condition");
 435 
 436   return res;
 437 }
 438 
 439 void CMRootRegions::scan_finished() {
 440   assert(scan_in_progress(), "pre-condition");
 441 
 442   // Currently, only survivors can be root regions.
 443   if (!_should_abort) {
 444     assert(_next_survivor == NULL, "we should have claimed all survivors");
 445   }
 446   _next_survivor = NULL;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     _scan_in_progress = false;
 451     RootRegionScan_lock->notify_all();
 452   }
 453 }
 454 
 455 bool CMRootRegions::wait_until_scan_finished() {
 456   if (!scan_in_progress()) return false;
 457 
 458   {
 459     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 460     while (scan_in_progress()) {
 461       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 462     }
 463   }
 464   return true;
 465 }
 466 
 467 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 468 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 469 #endif // _MSC_VER
 470 
 471 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 472   return MAX2((n_par_threads + 2) / 4, 1U);
 473 }
 474 
 475 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 476   _g1h(g1h),
 477   _markBitMap1(MinObjAlignment - 1),
 478   _markBitMap2(MinObjAlignment - 1),
 479 
 480   _parallel_marking_threads(0),
 481   _max_parallel_marking_threads(0),
 482   _sleep_factor(0.0),
 483   _marking_task_overhead(1.0),
 484   _cleanup_sleep_factor(0.0),
 485   _cleanup_task_overhead(1.0),
 486   _cleanup_list("Cleanup List"),
 487   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 488   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 489             CardTableModRefBS::card_shift,
 490             false /* in_resource_area*/),
 491 
 492   _prevMarkBitMap(&_markBitMap1),
 493   _nextMarkBitMap(&_markBitMap2),
 494 
 495   _markStack(this),
 496   // _finger set in set_non_marking_state
 497 
 498   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 499   // _active_tasks set in set_non_marking_state
 500   // _tasks set inside the constructor
 501   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 502   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 503 
 504   _has_overflown(false),
 505   _concurrent(false),
 506   _has_aborted(false),
 507   _restart_for_overflow(false),
 508   _concurrent_marking_in_progress(false),
 509 
 510   // _verbose_level set below
 511 
 512   _init_times(),
 513   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 514   _cleanup_times(),
 515   _total_counting_time(0.0),
 516   _total_rs_scrub_time(0.0),
 517 
 518   _parallel_workers(NULL),
 519 
 520   _count_card_bitmaps(NULL),
 521   _count_marked_bytes(NULL),
 522   _completed_initialization(false) {
 523   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 524   if (verbose_level < no_verbose) {
 525     verbose_level = no_verbose;
 526   }
 527   if (verbose_level > high_verbose) {
 528     verbose_level = high_verbose;
 529   }
 530   _verbose_level = verbose_level;
 531 
 532   if (verbose_low()) {
 533     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 534                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 535   }
 536 
 537   if (!_markBitMap1.allocate(heap_rs)) {
 538     warning("Failed to allocate first CM bit map");
 539     return;
 540   }
 541   if (!_markBitMap2.allocate(heap_rs)) {
 542     warning("Failed to allocate second CM bit map");
 543     return;
 544   }
 545 
 546   // Create & start a ConcurrentMark thread.
 547   _cmThread = new ConcurrentMarkThread(this);
 548   assert(cmThread() != NULL, "CM Thread should have been created");
 549   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 550 
 551   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 552   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 553   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 554 
 555   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 556   satb_qs.set_buffer_size(G1SATBBufferSize);
 557 
 558   _root_regions.init(_g1h, this);
 559 
 560   if (ConcGCThreads > ParallelGCThreads) {
 561     warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
 562             "than ParallelGCThreads (" UINT32_FORMAT ").",
 563             ConcGCThreads, ParallelGCThreads);
 564     return;
 565   }
 566   if (ParallelGCThreads == 0) {
 567     // if we are not running with any parallel GC threads we will not
 568     // spawn any marking threads either
 569     _parallel_marking_threads =       0;
 570     _max_parallel_marking_threads =   0;
 571     _sleep_factor             =     0.0;
 572     _marking_task_overhead    =     1.0;
 573   } else {
 574     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 575       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 576       // if both are set
 577       _sleep_factor             = 0.0;
 578       _marking_task_overhead    = 1.0;
 579     } else if (G1MarkingOverheadPercent > 0) {
 580       // We will calculate the number of parallel marking threads based
 581       // on a target overhead with respect to the soft real-time goal
 582       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 583       double overall_cm_overhead =
 584         (double) MaxGCPauseMillis * marking_overhead /
 585         (double) GCPauseIntervalMillis;
 586       double cpu_ratio = 1.0 / (double) os::processor_count();
 587       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 588       double marking_task_overhead =
 589         overall_cm_overhead / marking_thread_num *
 590                                                 (double) os::processor_count();
 591       double sleep_factor =
 592                          (1.0 - marking_task_overhead) / marking_task_overhead;
 593 
 594       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 595       _sleep_factor             = sleep_factor;
 596       _marking_task_overhead    = marking_task_overhead;
 597     } else {
 598       // Calculate the number of parallel marking threads by scaling
 599       // the number of parallel GC threads.
 600       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 601       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 602       _sleep_factor             = 0.0;
 603       _marking_task_overhead    = 1.0;
 604     }
 605 
 606     assert(ConcGCThreads > 0, "Should have been set");
 607     _parallel_marking_threads = (uint) ConcGCThreads;
 608     _max_parallel_marking_threads = _parallel_marking_threads;
 609 
 610     if (parallel_marking_threads() > 1) {
 611       _cleanup_task_overhead = 1.0;
 612     } else {
 613       _cleanup_task_overhead = marking_task_overhead();
 614     }
 615     _cleanup_sleep_factor =
 616                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 617 
 618 #if 0
 619     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 620     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 621     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 622     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 623     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 624 #endif
 625 
 626     guarantee(parallel_marking_threads() > 0, "peace of mind");
 627     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 628          _max_parallel_marking_threads, false, true);
 629     if (_parallel_workers == NULL) {
 630       vm_exit_during_initialization("Failed necessary allocation.");
 631     } else {
 632       _parallel_workers->initialize_workers();
 633     }
 634   }
 635 
 636   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 637     uintx mark_stack_size =
 638       MIN2(MarkStackSizeMax,
 639           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 640     // Verify that the calculated value for MarkStackSize is in range.
 641     // It would be nice to use the private utility routine from Arguments.
 642     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 643       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 644               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 645               mark_stack_size, 1, MarkStackSizeMax);
 646       return;
 647     }
 648     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 649   } else {
 650     // Verify MarkStackSize is in range.
 651     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 652       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 653         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 654           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 655                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 656                   MarkStackSize, 1, MarkStackSizeMax);
 657           return;
 658         }
 659       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 660         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 661           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 662                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 663                   MarkStackSize, MarkStackSizeMax);
 664           return;
 665         }
 666       }
 667     }
 668   }
 669 
 670   if (!_markStack.allocate(MarkStackSize)) {
 671     warning("Failed to allocate CM marking stack");
 672     return;
 673   }
 674 
 675   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 676   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 677 
 678   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 679   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 680 
 681   BitMap::idx_t card_bm_size = _card_bm.size();
 682 
 683   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 684   _active_tasks = _max_worker_id;
 685 
 686   size_t max_regions = (size_t) _g1h->max_regions();
 687   for (uint i = 0; i < _max_worker_id; ++i) {
 688     CMTaskQueue* task_queue = new CMTaskQueue();
 689     task_queue->initialize();
 690     _task_queues->register_queue(i, task_queue);
 691 
 692     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 693     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 694 
 695     _tasks[i] = new CMTask(i, this,
 696                            _count_marked_bytes[i],
 697                            &_count_card_bitmaps[i],
 698                            task_queue, _task_queues);
 699 
 700     _accum_task_vtime[i] = 0.0;
 701   }
 702 
 703   // Calculate the card number for the bottom of the heap. Used
 704   // in biasing indexes into the accounting card bitmaps.
 705   _heap_bottom_card_num =
 706     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 707                                 CardTableModRefBS::card_shift);
 708 
 709   // Clear all the liveness counting data
 710   clear_all_count_data();
 711 
 712   // so that the call below can read a sensible value
 713   _heap_start = (HeapWord*) heap_rs.base();
 714   set_non_marking_state();
 715   _completed_initialization = true;
 716 }
 717 
 718 void ConcurrentMark::update_g1_committed(bool force) {
 719   // If concurrent marking is not in progress, then we do not need to
 720   // update _heap_end.
 721   if (!concurrent_marking_in_progress() && !force) return;
 722 
 723   MemRegion committed = _g1h->g1_committed();
 724   assert(committed.start() == _heap_start, "start shouldn't change");
 725   HeapWord* new_end = committed.end();
 726   if (new_end > _heap_end) {
 727     // The heap has been expanded.
 728 
 729     _heap_end = new_end;
 730   }
 731   // Notice that the heap can also shrink. However, this only happens
 732   // during a Full GC (at least currently) and the entire marking
 733   // phase will bail out and the task will not be restarted. So, let's
 734   // do nothing.
 735 }
 736 
 737 void ConcurrentMark::reset() {
 738   // Starting values for these two. This should be called in a STW
 739   // phase. CM will be notified of any future g1_committed expansions
 740   // will be at the end of evacuation pauses, when tasks are
 741   // inactive.
 742   MemRegion committed = _g1h->g1_committed();
 743   _heap_start = committed.start();
 744   _heap_end   = committed.end();
 745 
 746   // Separated the asserts so that we know which one fires.
 747   assert(_heap_start != NULL, "heap bounds should look ok");
 748   assert(_heap_end != NULL, "heap bounds should look ok");
 749   assert(_heap_start < _heap_end, "heap bounds should look ok");
 750 
 751   // Reset all the marking data structures and any necessary flags
 752   reset_marking_state();
 753 
 754   if (verbose_low()) {
 755     gclog_or_tty->print_cr("[global] resetting");
 756   }
 757 
 758   // We do reset all of them, since different phases will use
 759   // different number of active threads. So, it's easiest to have all
 760   // of them ready.
 761   for (uint i = 0; i < _max_worker_id; ++i) {
 762     _tasks[i]->reset(_nextMarkBitMap);
 763   }
 764 
 765   // we need this to make sure that the flag is on during the evac
 766   // pause with initial mark piggy-backed
 767   set_concurrent_marking_in_progress();
 768 }
 769 
 770 
 771 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 772   _markStack.set_should_expand();
 773   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 774   if (clear_overflow) {
 775     clear_has_overflown();
 776   } else {
 777     assert(has_overflown(), "pre-condition");
 778   }
 779   _finger = _heap_start;
 780 
 781   for (uint i = 0; i < _max_worker_id; ++i) {
 782     CMTaskQueue* queue = _task_queues->queue(i);
 783     queue->set_empty();
 784   }
 785 }
 786 
 787 void ConcurrentMark::set_concurrency(uint active_tasks) {
 788   assert(active_tasks <= _max_worker_id, "we should not have more");
 789 
 790   _active_tasks = active_tasks;
 791   // Need to update the three data structures below according to the
 792   // number of active threads for this phase.
 793   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 794   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 795   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 796 }
 797 
 798 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 799   set_concurrency(active_tasks);
 800 
 801   _concurrent = concurrent;
 802   // We propagate this to all tasks, not just the active ones.
 803   for (uint i = 0; i < _max_worker_id; ++i)
 804     _tasks[i]->set_concurrent(concurrent);
 805 
 806   if (concurrent) {
 807     set_concurrent_marking_in_progress();
 808   } else {
 809     // We currently assume that the concurrent flag has been set to
 810     // false before we start remark. At this point we should also be
 811     // in a STW phase.
 812     assert(!concurrent_marking_in_progress(), "invariant");
 813     assert(_finger == _heap_end,
 814            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 815                    _finger, _heap_end));
 816     update_g1_committed(true);
 817   }
 818 }
 819 
 820 void ConcurrentMark::set_non_marking_state() {
 821   // We set the global marking state to some default values when we're
 822   // not doing marking.
 823   reset_marking_state();
 824   _active_tasks = 0;
 825   clear_concurrent_marking_in_progress();
 826 }
 827 
 828 ConcurrentMark::~ConcurrentMark() {
 829   // The ConcurrentMark instance is never freed.
 830   ShouldNotReachHere();
 831 }
 832 
 833 void ConcurrentMark::clearNextBitmap() {
 834   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 835   G1CollectorPolicy* g1p = g1h->g1_policy();
 836 
 837   // Make sure that the concurrent mark thread looks to still be in
 838   // the current cycle.
 839   guarantee(cmThread()->during_cycle(), "invariant");
 840 
 841   // We are finishing up the current cycle by clearing the next
 842   // marking bitmap and getting it ready for the next cycle. During
 843   // this time no other cycle can start. So, let's make sure that this
 844   // is the case.
 845   guarantee(!g1h->mark_in_progress(), "invariant");
 846 
 847   // clear the mark bitmap (no grey objects to start with).
 848   // We need to do this in chunks and offer to yield in between
 849   // each chunk.
 850   HeapWord* start  = _nextMarkBitMap->startWord();
 851   HeapWord* end    = _nextMarkBitMap->endWord();
 852   HeapWord* cur    = start;
 853   size_t chunkSize = M;
 854   while (cur < end) {
 855     HeapWord* next = cur + chunkSize;
 856     if (next > end) {
 857       next = end;
 858     }
 859     MemRegion mr(cur,next);
 860     _nextMarkBitMap->clearRange(mr);
 861     cur = next;
 862     do_yield_check();
 863 
 864     // Repeat the asserts from above. We'll do them as asserts here to
 865     // minimize their overhead on the product. However, we'll have
 866     // them as guarantees at the beginning / end of the bitmap
 867     // clearing to get some checking in the product.
 868     assert(cmThread()->during_cycle(), "invariant");
 869     assert(!g1h->mark_in_progress(), "invariant");
 870   }
 871 
 872   // Clear the liveness counting data
 873   clear_all_count_data();
 874 
 875   // Repeat the asserts from above.
 876   guarantee(cmThread()->during_cycle(), "invariant");
 877   guarantee(!g1h->mark_in_progress(), "invariant");
 878 }
 879 
 880 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 881 public:
 882   bool doHeapRegion(HeapRegion* r) {
 883     if (!r->continuesHumongous()) {
 884       r->note_start_of_marking();
 885     }
 886     return false;
 887   }
 888 };
 889 
 890 void ConcurrentMark::checkpointRootsInitialPre() {
 891   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 892   G1CollectorPolicy* g1p = g1h->g1_policy();
 893 
 894   _has_aborted = false;
 895 
 896 #ifndef PRODUCT
 897   if (G1PrintReachableAtInitialMark) {
 898     print_reachable("at-cycle-start",
 899                     VerifyOption_G1UsePrevMarking, true /* all */);
 900   }
 901 #endif
 902 
 903   // Initialise marking structures. This has to be done in a STW phase.
 904   reset();
 905 
 906   // For each region note start of marking.
 907   NoteStartOfMarkHRClosure startcl;
 908   g1h->heap_region_iterate(&startcl);
 909 }
 910 
 911 
 912 void ConcurrentMark::checkpointRootsInitialPost() {
 913   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 914 
 915   // If we force an overflow during remark, the remark operation will
 916   // actually abort and we'll restart concurrent marking. If we always
 917   // force an oveflow during remark we'll never actually complete the
 918   // marking phase. So, we initilize this here, at the start of the
 919   // cycle, so that at the remaining overflow number will decrease at
 920   // every remark and we'll eventually not need to cause one.
 921   force_overflow_stw()->init();
 922 
 923   // Start Concurrent Marking weak-reference discovery.
 924   ReferenceProcessor* rp = g1h->ref_processor_cm();
 925   // enable ("weak") refs discovery
 926   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 927   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 928 
 929   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 930   // This is the start of  the marking cycle, we're expected all
 931   // threads to have SATB queues with active set to false.
 932   satb_mq_set.set_active_all_threads(true, /* new active value */
 933                                      false /* expected_active */);
 934 
 935   _root_regions.prepare_for_scan();
 936 
 937   // update_g1_committed() will be called at the end of an evac pause
 938   // when marking is on. So, it's also called at the end of the
 939   // initial-mark pause to update the heap end, if the heap expands
 940   // during it. No need to call it here.
 941 }
 942 
 943 /*
 944  * Notice that in the next two methods, we actually leave the STS
 945  * during the barrier sync and join it immediately afterwards. If we
 946  * do not do this, the following deadlock can occur: one thread could
 947  * be in the barrier sync code, waiting for the other thread to also
 948  * sync up, whereas another one could be trying to yield, while also
 949  * waiting for the other threads to sync up too.
 950  *
 951  * Note, however, that this code is also used during remark and in
 952  * this case we should not attempt to leave / enter the STS, otherwise
 953  * we'll either hit an asseert (debug / fastdebug) or deadlock
 954  * (product). So we should only leave / enter the STS if we are
 955  * operating concurrently.
 956  *
 957  * Because the thread that does the sync barrier has left the STS, it
 958  * is possible to be suspended for a Full GC or an evacuation pause
 959  * could occur. This is actually safe, since the entering the sync
 960  * barrier is one of the last things do_marking_step() does, and it
 961  * doesn't manipulate any data structures afterwards.
 962  */
 963 
 964 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 965   if (verbose_low()) {
 966     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 967   }
 968 
 969   if (concurrent()) {
 970     ConcurrentGCThread::stsLeave();
 971   }
 972   _first_overflow_barrier_sync.enter();
 973   if (concurrent()) {
 974     ConcurrentGCThread::stsJoin();
 975   }
 976   // at this point everyone should have synced up and not be doing any
 977   // more work
 978 
 979   if (verbose_low()) {
 980     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 981   }
 982 
 983   // If we're executing the concurrent phase of marking, reset the marking
 984   // state; otherwise the marking state is reset after reference processing,
 985   // during the remark pause.
 986   // If we reset here as a result of an overflow during the remark we will
 987   // see assertion failures from any subsequent set_concurrency_and_phase()
 988   // calls.
 989   if (concurrent()) {
 990     // let the task associated with with worker 0 do this
 991     if (worker_id == 0) {
 992       // task 0 is responsible for clearing the global data structures
 993       // We should be here because of an overflow. During STW we should
 994       // not clear the overflow flag since we rely on it being true when
 995       // we exit this method to abort the pause and restart concurent
 996       // marking.
 997       reset_marking_state(true /* clear_overflow */);
 998       force_overflow()->update();
 999 
1000       if (G1Log::fine()) {
1001         gclog_or_tty->date_stamp(PrintGCDateStamps);
1002         gclog_or_tty->stamp(PrintGCTimeStamps);
1003         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1004       }
1005     }
1006   }
1007 
1008   // after this, each task should reset its own data structures then
1009   // then go into the second barrier
1010 }
1011 
1012 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1013   if (verbose_low()) {
1014     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1015   }
1016 
1017   if (concurrent()) {
1018     ConcurrentGCThread::stsLeave();
1019   }
1020   _second_overflow_barrier_sync.enter();
1021   if (concurrent()) {
1022     ConcurrentGCThread::stsJoin();
1023   }
1024   // at this point everything should be re-initialized and ready to go
1025 
1026   if (verbose_low()) {
1027     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1028   }
1029 }
1030 
1031 #ifndef PRODUCT
1032 void ForceOverflowSettings::init() {
1033   _num_remaining = G1ConcMarkForceOverflow;
1034   _force = false;
1035   update();
1036 }
1037 
1038 void ForceOverflowSettings::update() {
1039   if (_num_remaining > 0) {
1040     _num_remaining -= 1;
1041     _force = true;
1042   } else {
1043     _force = false;
1044   }
1045 }
1046 
1047 bool ForceOverflowSettings::should_force() {
1048   if (_force) {
1049     _force = false;
1050     return true;
1051   } else {
1052     return false;
1053   }
1054 }
1055 #endif // !PRODUCT
1056 
1057 class CMConcurrentMarkingTask: public AbstractGangTask {
1058 private:
1059   ConcurrentMark*       _cm;
1060   ConcurrentMarkThread* _cmt;
1061 
1062 public:
1063   void work(uint worker_id) {
1064     assert(Thread::current()->is_ConcurrentGC_thread(),
1065            "this should only be done by a conc GC thread");
1066     ResourceMark rm;
1067 
1068     double start_vtime = os::elapsedVTime();
1069 
1070     ConcurrentGCThread::stsJoin();
1071 
1072     assert(worker_id < _cm->active_tasks(), "invariant");
1073     CMTask* the_task = _cm->task(worker_id);
1074     the_task->record_start_time();
1075     if (!_cm->has_aborted()) {
1076       do {
1077         double start_vtime_sec = os::elapsedVTime();
1078         double start_time_sec = os::elapsedTime();
1079         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1080 
1081         the_task->do_marking_step(mark_step_duration_ms,
1082                                   true  /* do_stealing    */,
1083                                   true  /* do_termination */,
1084                                   false /* is_serial*/);
1085 
1086         double end_time_sec = os::elapsedTime();
1087         double end_vtime_sec = os::elapsedVTime();
1088         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1089         double elapsed_time_sec = end_time_sec - start_time_sec;
1090         _cm->clear_has_overflown();
1091 
1092         bool ret = _cm->do_yield_check(worker_id);
1093 
1094         jlong sleep_time_ms;
1095         if (!_cm->has_aborted() && the_task->has_aborted()) {
1096           sleep_time_ms =
1097             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1098           ConcurrentGCThread::stsLeave();
1099           os::sleep(Thread::current(), sleep_time_ms, false);
1100           ConcurrentGCThread::stsJoin();
1101         }
1102         double end_time2_sec = os::elapsedTime();
1103         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1104 
1105 #if 0
1106           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1107                                  "overhead %1.4lf",
1108                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1109                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1110           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1111                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1112 #endif
1113       } while (!_cm->has_aborted() && the_task->has_aborted());
1114     }
1115     the_task->record_end_time();
1116     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1117 
1118     ConcurrentGCThread::stsLeave();
1119 
1120     double end_vtime = os::elapsedVTime();
1121     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1122   }
1123 
1124   CMConcurrentMarkingTask(ConcurrentMark* cm,
1125                           ConcurrentMarkThread* cmt) :
1126       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1127 
1128   ~CMConcurrentMarkingTask() { }
1129 };
1130 
1131 // Calculates the number of active workers for a concurrent
1132 // phase.
1133 uint ConcurrentMark::calc_parallel_marking_threads() {
1134   if (G1CollectedHeap::use_parallel_gc_threads()) {
1135     uint n_conc_workers = 0;
1136     if (!UseDynamicNumberOfGCThreads ||
1137         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1138          !ForceDynamicNumberOfGCThreads)) {
1139       n_conc_workers = max_parallel_marking_threads();
1140     } else {
1141       n_conc_workers =
1142         AdaptiveSizePolicy::calc_default_active_workers(
1143                                      max_parallel_marking_threads(),
1144                                      1, /* Minimum workers */
1145                                      parallel_marking_threads(),
1146                                      Threads::number_of_non_daemon_threads());
1147       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1148       // that scaling has already gone into "_max_parallel_marking_threads".
1149     }
1150     assert(n_conc_workers > 0, "Always need at least 1");
1151     return n_conc_workers;
1152   }
1153   // If we are not running with any parallel GC threads we will not
1154   // have spawned any marking threads either. Hence the number of
1155   // concurrent workers should be 0.
1156   return 0;
1157 }
1158 
1159 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1160   // Currently, only survivors can be root regions.
1161   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1162   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1163 
1164   const uintx interval = PrefetchScanIntervalInBytes;
1165   HeapWord* curr = hr->bottom();
1166   const HeapWord* end = hr->top();
1167   while (curr < end) {
1168     Prefetch::read(curr, interval);
1169     oop obj = oop(curr);
1170     int size = obj->oop_iterate(&cl);
1171     assert(size == obj->size(), "sanity");
1172     curr += size;
1173   }
1174 }
1175 
1176 class CMRootRegionScanTask : public AbstractGangTask {
1177 private:
1178   ConcurrentMark* _cm;
1179 
1180 public:
1181   CMRootRegionScanTask(ConcurrentMark* cm) :
1182     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1183 
1184   void work(uint worker_id) {
1185     assert(Thread::current()->is_ConcurrentGC_thread(),
1186            "this should only be done by a conc GC thread");
1187 
1188     CMRootRegions* root_regions = _cm->root_regions();
1189     HeapRegion* hr = root_regions->claim_next();
1190     while (hr != NULL) {
1191       _cm->scanRootRegion(hr, worker_id);
1192       hr = root_regions->claim_next();
1193     }
1194   }
1195 };
1196 
1197 void ConcurrentMark::scanRootRegions() {
1198   // scan_in_progress() will have been set to true only if there was
1199   // at least one root region to scan. So, if it's false, we
1200   // should not attempt to do any further work.
1201   if (root_regions()->scan_in_progress()) {
1202     _parallel_marking_threads = calc_parallel_marking_threads();
1203     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1204            "Maximum number of marking threads exceeded");
1205     uint active_workers = MAX2(1U, parallel_marking_threads());
1206 
1207     CMRootRegionScanTask task(this);
1208     if (use_parallel_marking_threads()) {
1209       _parallel_workers->set_active_workers((int) active_workers);
1210       _parallel_workers->run_task(&task);
1211     } else {
1212       task.work(0);
1213     }
1214 
1215     // It's possible that has_aborted() is true here without actually
1216     // aborting the survivor scan earlier. This is OK as it's
1217     // mainly used for sanity checking.
1218     root_regions()->scan_finished();
1219   }
1220 }
1221 
1222 void ConcurrentMark::markFromRoots() {
1223   // we might be tempted to assert that:
1224   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1225   //        "inconsistent argument?");
1226   // However that wouldn't be right, because it's possible that
1227   // a safepoint is indeed in progress as a younger generation
1228   // stop-the-world GC happens even as we mark in this generation.
1229 
1230   _restart_for_overflow = false;
1231   force_overflow_conc()->init();
1232 
1233   // _g1h has _n_par_threads
1234   _parallel_marking_threads = calc_parallel_marking_threads();
1235   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1236     "Maximum number of marking threads exceeded");
1237 
1238   uint active_workers = MAX2(1U, parallel_marking_threads());
1239 
1240   // Parallel task terminator is set in "set_concurrency_and_phase()"
1241   set_concurrency_and_phase(active_workers, true /* concurrent */);
1242 
1243   CMConcurrentMarkingTask markingTask(this, cmThread());
1244   if (use_parallel_marking_threads()) {
1245     _parallel_workers->set_active_workers((int)active_workers);
1246     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1247     // and the decisions on that MT processing is made elsewhere.
1248     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1249     _parallel_workers->run_task(&markingTask);
1250   } else {
1251     markingTask.work(0);
1252   }
1253   print_stats();
1254 }
1255 
1256 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1257   // world is stopped at this checkpoint
1258   assert(SafepointSynchronize::is_at_safepoint(),
1259          "world should be stopped");
1260 
1261   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1262 
1263   // If a full collection has happened, we shouldn't do this.
1264   if (has_aborted()) {
1265     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1266     return;
1267   }
1268 
1269   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1270 
1271   if (VerifyDuringGC) {
1272     HandleMark hm;  // handle scope
1273     gclog_or_tty->print(" VerifyDuringGC:(before)");
1274     Universe::heap()->prepare_for_verify();
1275     Universe::verify(/* silent */ false,
1276                      /* option */ VerifyOption_G1UsePrevMarking);
1277   }
1278 
1279   G1CollectorPolicy* g1p = g1h->g1_policy();
1280   g1p->record_concurrent_mark_remark_start();
1281 
1282   double start = os::elapsedTime();
1283 
1284   checkpointRootsFinalWork();
1285 
1286   double mark_work_end = os::elapsedTime();
1287 
1288   weakRefsWork(clear_all_soft_refs);
1289 
1290   if (has_overflown()) {
1291     // Oops.  We overflowed.  Restart concurrent marking.
1292     _restart_for_overflow = true;
1293     // Clear the marking state because we will be restarting
1294     // marking due to overflowing the global mark stack.
1295     reset_marking_state();
1296     if (G1TraceMarkStackOverflow) {
1297       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1298     }
1299   } else {
1300     // Aggregate the per-task counting data that we have accumulated
1301     // while marking.
1302     aggregate_count_data();
1303 
1304     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1305     // We're done with marking.
1306     // This is the end of  the marking cycle, we're expected all
1307     // threads to have SATB queues with active set to true.
1308     satb_mq_set.set_active_all_threads(false, /* new active value */
1309                                        true /* expected_active */);
1310 
1311     if (VerifyDuringGC) {
1312       HandleMark hm;  // handle scope
1313       gclog_or_tty->print(" VerifyDuringGC:(after)");
1314       Universe::heap()->prepare_for_verify();
1315       Universe::verify(/* silent */ false,
1316                        /* option */ VerifyOption_G1UseNextMarking);
1317     }
1318     assert(!restart_for_overflow(), "sanity");
1319     // Completely reset the marking state since marking completed
1320     set_non_marking_state();
1321   }
1322 
1323   // Expand the marking stack, if we have to and if we can.
1324   if (_markStack.should_expand()) {
1325     _markStack.expand();
1326   }
1327 
1328 #if VERIFY_OBJS_PROCESSED
1329   _scan_obj_cl.objs_processed = 0;
1330   ThreadLocalObjQueue::objs_enqueued = 0;
1331 #endif
1332 
1333   // Statistics
1334   double now = os::elapsedTime();
1335   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1336   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1337   _remark_times.add((now - start) * 1000.0);
1338 
1339   g1p->record_concurrent_mark_remark_end();
1340 }
1341 
1342 // Base class of the closures that finalize and verify the
1343 // liveness counting data.
1344 class CMCountDataClosureBase: public HeapRegionClosure {
1345 protected:
1346   G1CollectedHeap* _g1h;
1347   ConcurrentMark* _cm;
1348   CardTableModRefBS* _ct_bs;
1349 
1350   BitMap* _region_bm;
1351   BitMap* _card_bm;
1352 
1353   // Takes a region that's not empty (i.e., it has at least one
1354   // live object in it and sets its corresponding bit on the region
1355   // bitmap to 1. If the region is "starts humongous" it will also set
1356   // to 1 the bits on the region bitmap that correspond to its
1357   // associated "continues humongous" regions.
1358   void set_bit_for_region(HeapRegion* hr) {
1359     assert(!hr->continuesHumongous(), "should have filtered those out");
1360 
1361     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1362     if (!hr->startsHumongous()) {
1363       // Normal (non-humongous) case: just set the bit.
1364       _region_bm->par_at_put(index, true);
1365     } else {
1366       // Starts humongous case: calculate how many regions are part of
1367       // this humongous region and then set the bit range.
1368       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1369       _region_bm->par_at_put_range(index, end_index, true);
1370     }
1371   }
1372 
1373 public:
1374   CMCountDataClosureBase(G1CollectedHeap* g1h,
1375                          BitMap* region_bm, BitMap* card_bm):
1376     _g1h(g1h), _cm(g1h->concurrent_mark()),
1377     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1378     _region_bm(region_bm), _card_bm(card_bm) { }
1379 };
1380 
1381 // Closure that calculates the # live objects per region. Used
1382 // for verification purposes during the cleanup pause.
1383 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1384   CMBitMapRO* _bm;
1385   size_t _region_marked_bytes;
1386 
1387 public:
1388   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1389                          BitMap* region_bm, BitMap* card_bm) :
1390     CMCountDataClosureBase(g1h, region_bm, card_bm),
1391     _bm(bm), _region_marked_bytes(0) { }
1392 
1393   bool doHeapRegion(HeapRegion* hr) {
1394 
1395     if (hr->continuesHumongous()) {
1396       // We will ignore these here and process them when their
1397       // associated "starts humongous" region is processed (see
1398       // set_bit_for_heap_region()). Note that we cannot rely on their
1399       // associated "starts humongous" region to have their bit set to
1400       // 1 since, due to the region chunking in the parallel region
1401       // iteration, a "continues humongous" region might be visited
1402       // before its associated "starts humongous".
1403       return false;
1404     }
1405 
1406     HeapWord* ntams = hr->next_top_at_mark_start();
1407     HeapWord* start = hr->bottom();
1408 
1409     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1410            err_msg("Preconditions not met - "
1411                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1412                    start, ntams, hr->end()));
1413 
1414     // Find the first marked object at or after "start".
1415     start = _bm->getNextMarkedWordAddress(start, ntams);
1416 
1417     size_t marked_bytes = 0;
1418 
1419     while (start < ntams) {
1420       oop obj = oop(start);
1421       int obj_sz = obj->size();
1422       HeapWord* obj_end = start + obj_sz;
1423 
1424       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1425       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1426 
1427       // Note: if we're looking at the last region in heap - obj_end
1428       // could be actually just beyond the end of the heap; end_idx
1429       // will then correspond to a (non-existent) card that is also
1430       // just beyond the heap.
1431       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1432         // end of object is not card aligned - increment to cover
1433         // all the cards spanned by the object
1434         end_idx += 1;
1435       }
1436 
1437       // Set the bits in the card BM for the cards spanned by this object.
1438       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1439 
1440       // Add the size of this object to the number of marked bytes.
1441       marked_bytes += (size_t)obj_sz * HeapWordSize;
1442 
1443       // Find the next marked object after this one.
1444       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1445     }
1446 
1447     // Mark the allocated-since-marking portion...
1448     HeapWord* top = hr->top();
1449     if (ntams < top) {
1450       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1451       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1452 
1453       // Note: if we're looking at the last region in heap - top
1454       // could be actually just beyond the end of the heap; end_idx
1455       // will then correspond to a (non-existent) card that is also
1456       // just beyond the heap.
1457       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1458         // end of object is not card aligned - increment to cover
1459         // all the cards spanned by the object
1460         end_idx += 1;
1461       }
1462       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1463 
1464       // This definitely means the region has live objects.
1465       set_bit_for_region(hr);
1466     }
1467 
1468     // Update the live region bitmap.
1469     if (marked_bytes > 0) {
1470       set_bit_for_region(hr);
1471     }
1472 
1473     // Set the marked bytes for the current region so that
1474     // it can be queried by a calling verificiation routine
1475     _region_marked_bytes = marked_bytes;
1476 
1477     return false;
1478   }
1479 
1480   size_t region_marked_bytes() const { return _region_marked_bytes; }
1481 };
1482 
1483 // Heap region closure used for verifying the counting data
1484 // that was accumulated concurrently and aggregated during
1485 // the remark pause. This closure is applied to the heap
1486 // regions during the STW cleanup pause.
1487 
1488 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1489   G1CollectedHeap* _g1h;
1490   ConcurrentMark* _cm;
1491   CalcLiveObjectsClosure _calc_cl;
1492   BitMap* _region_bm;   // Region BM to be verified
1493   BitMap* _card_bm;     // Card BM to be verified
1494   bool _verbose;        // verbose output?
1495 
1496   BitMap* _exp_region_bm; // Expected Region BM values
1497   BitMap* _exp_card_bm;   // Expected card BM values
1498 
1499   int _failures;
1500 
1501 public:
1502   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1503                                 BitMap* region_bm,
1504                                 BitMap* card_bm,
1505                                 BitMap* exp_region_bm,
1506                                 BitMap* exp_card_bm,
1507                                 bool verbose) :
1508     _g1h(g1h), _cm(g1h->concurrent_mark()),
1509     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1510     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1511     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1512     _failures(0) { }
1513 
1514   int failures() const { return _failures; }
1515 
1516   bool doHeapRegion(HeapRegion* hr) {
1517     if (hr->continuesHumongous()) {
1518       // We will ignore these here and process them when their
1519       // associated "starts humongous" region is processed (see
1520       // set_bit_for_heap_region()). Note that we cannot rely on their
1521       // associated "starts humongous" region to have their bit set to
1522       // 1 since, due to the region chunking in the parallel region
1523       // iteration, a "continues humongous" region might be visited
1524       // before its associated "starts humongous".
1525       return false;
1526     }
1527 
1528     int failures = 0;
1529 
1530     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1531     // this region and set the corresponding bits in the expected region
1532     // and card bitmaps.
1533     bool res = _calc_cl.doHeapRegion(hr);
1534     assert(res == false, "should be continuing");
1535 
1536     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1537                     Mutex::_no_safepoint_check_flag);
1538 
1539     // Verify the marked bytes for this region.
1540     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1541     size_t act_marked_bytes = hr->next_marked_bytes();
1542 
1543     // We're not OK if expected marked bytes > actual marked bytes. It means
1544     // we have missed accounting some objects during the actual marking.
1545     if (exp_marked_bytes > act_marked_bytes) {
1546       if (_verbose) {
1547         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1548                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1549                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1550       }
1551       failures += 1;
1552     }
1553 
1554     // Verify the bit, for this region, in the actual and expected
1555     // (which was just calculated) region bit maps.
1556     // We're not OK if the bit in the calculated expected region
1557     // bitmap is set and the bit in the actual region bitmap is not.
1558     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1559 
1560     bool expected = _exp_region_bm->at(index);
1561     bool actual = _region_bm->at(index);
1562     if (expected && !actual) {
1563       if (_verbose) {
1564         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1565                                "expected: %s, actual: %s",
1566                                hr->hrs_index(),
1567                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1568       }
1569       failures += 1;
1570     }
1571 
1572     // Verify that the card bit maps for the cards spanned by the current
1573     // region match. We have an error if we have a set bit in the expected
1574     // bit map and the corresponding bit in the actual bitmap is not set.
1575 
1576     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1577     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1578 
1579     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1580       expected = _exp_card_bm->at(i);
1581       actual = _card_bm->at(i);
1582 
1583       if (expected && !actual) {
1584         if (_verbose) {
1585           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1586                                  "expected: %s, actual: %s",
1587                                  hr->hrs_index(), i,
1588                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1589         }
1590         failures += 1;
1591       }
1592     }
1593 
1594     if (failures > 0 && _verbose)  {
1595       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1596                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1597                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1598                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1599     }
1600 
1601     _failures += failures;
1602 
1603     // We could stop iteration over the heap when we
1604     // find the first violating region by returning true.
1605     return false;
1606   }
1607 };
1608 
1609 
1610 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1611 protected:
1612   G1CollectedHeap* _g1h;
1613   ConcurrentMark* _cm;
1614   BitMap* _actual_region_bm;
1615   BitMap* _actual_card_bm;
1616 
1617   uint    _n_workers;
1618 
1619   BitMap* _expected_region_bm;
1620   BitMap* _expected_card_bm;
1621 
1622   int  _failures;
1623   bool _verbose;
1624 
1625 public:
1626   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1627                             BitMap* region_bm, BitMap* card_bm,
1628                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1629     : AbstractGangTask("G1 verify final counting"),
1630       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1631       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1632       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1633       _failures(0), _verbose(false),
1634       _n_workers(0) {
1635     assert(VerifyDuringGC, "don't call this otherwise");
1636 
1637     // Use the value already set as the number of active threads
1638     // in the call to run_task().
1639     if (G1CollectedHeap::use_parallel_gc_threads()) {
1640       assert( _g1h->workers()->active_workers() > 0,
1641         "Should have been previously set");
1642       _n_workers = _g1h->workers()->active_workers();
1643     } else {
1644       _n_workers = 1;
1645     }
1646 
1647     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1648     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1649 
1650     _verbose = _cm->verbose_medium();
1651   }
1652 
1653   void work(uint worker_id) {
1654     assert(worker_id < _n_workers, "invariant");
1655 
1656     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1657                                             _actual_region_bm, _actual_card_bm,
1658                                             _expected_region_bm,
1659                                             _expected_card_bm,
1660                                             _verbose);
1661 
1662     if (G1CollectedHeap::use_parallel_gc_threads()) {
1663       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1664                                             worker_id,
1665                                             _n_workers,
1666                                             HeapRegion::VerifyCountClaimValue);
1667     } else {
1668       _g1h->heap_region_iterate(&verify_cl);
1669     }
1670 
1671     Atomic::add(verify_cl.failures(), &_failures);
1672   }
1673 
1674   int failures() const { return _failures; }
1675 };
1676 
1677 // Closure that finalizes the liveness counting data.
1678 // Used during the cleanup pause.
1679 // Sets the bits corresponding to the interval [NTAMS, top]
1680 // (which contains the implicitly live objects) in the
1681 // card liveness bitmap. Also sets the bit for each region,
1682 // containing live data, in the region liveness bitmap.
1683 
1684 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1685  public:
1686   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1687                               BitMap* region_bm,
1688                               BitMap* card_bm) :
1689     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1690 
1691   bool doHeapRegion(HeapRegion* hr) {
1692 
1693     if (hr->continuesHumongous()) {
1694       // We will ignore these here and process them when their
1695       // associated "starts humongous" region is processed (see
1696       // set_bit_for_heap_region()). Note that we cannot rely on their
1697       // associated "starts humongous" region to have their bit set to
1698       // 1 since, due to the region chunking in the parallel region
1699       // iteration, a "continues humongous" region might be visited
1700       // before its associated "starts humongous".
1701       return false;
1702     }
1703 
1704     HeapWord* ntams = hr->next_top_at_mark_start();
1705     HeapWord* top   = hr->top();
1706 
1707     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1708 
1709     // Mark the allocated-since-marking portion...
1710     if (ntams < top) {
1711       // This definitely means the region has live objects.
1712       set_bit_for_region(hr);
1713 
1714       // Now set the bits in the card bitmap for [ntams, top)
1715       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1716       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1717 
1718       // Note: if we're looking at the last region in heap - top
1719       // could be actually just beyond the end of the heap; end_idx
1720       // will then correspond to a (non-existent) card that is also
1721       // just beyond the heap.
1722       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1723         // end of object is not card aligned - increment to cover
1724         // all the cards spanned by the object
1725         end_idx += 1;
1726       }
1727 
1728       assert(end_idx <= _card_bm->size(),
1729              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1730                      end_idx, _card_bm->size()));
1731       assert(start_idx < _card_bm->size(),
1732              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1733                      start_idx, _card_bm->size()));
1734 
1735       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1736     }
1737 
1738     // Set the bit for the region if it contains live data
1739     if (hr->next_marked_bytes() > 0) {
1740       set_bit_for_region(hr);
1741     }
1742 
1743     return false;
1744   }
1745 };
1746 
1747 class G1ParFinalCountTask: public AbstractGangTask {
1748 protected:
1749   G1CollectedHeap* _g1h;
1750   ConcurrentMark* _cm;
1751   BitMap* _actual_region_bm;
1752   BitMap* _actual_card_bm;
1753 
1754   uint    _n_workers;
1755 
1756 public:
1757   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1758     : AbstractGangTask("G1 final counting"),
1759       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1760       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1761       _n_workers(0) {
1762     // Use the value already set as the number of active threads
1763     // in the call to run_task().
1764     if (G1CollectedHeap::use_parallel_gc_threads()) {
1765       assert( _g1h->workers()->active_workers() > 0,
1766         "Should have been previously set");
1767       _n_workers = _g1h->workers()->active_workers();
1768     } else {
1769       _n_workers = 1;
1770     }
1771   }
1772 
1773   void work(uint worker_id) {
1774     assert(worker_id < _n_workers, "invariant");
1775 
1776     FinalCountDataUpdateClosure final_update_cl(_g1h,
1777                                                 _actual_region_bm,
1778                                                 _actual_card_bm);
1779 
1780     if (G1CollectedHeap::use_parallel_gc_threads()) {
1781       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1782                                             worker_id,
1783                                             _n_workers,
1784                                             HeapRegion::FinalCountClaimValue);
1785     } else {
1786       _g1h->heap_region_iterate(&final_update_cl);
1787     }
1788   }
1789 };
1790 
1791 class G1ParNoteEndTask;
1792 
1793 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1794   G1CollectedHeap* _g1;
1795   int _worker_num;
1796   size_t _max_live_bytes;
1797   uint _regions_claimed;
1798   size_t _freed_bytes;
1799   FreeRegionList* _local_cleanup_list;
1800   OldRegionSet* _old_proxy_set;
1801   HumongousRegionSet* _humongous_proxy_set;
1802   HRRSCleanupTask* _hrrs_cleanup_task;
1803   double _claimed_region_time;
1804   double _max_region_time;
1805 
1806 public:
1807   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1808                              int worker_num,
1809                              FreeRegionList* local_cleanup_list,
1810                              OldRegionSet* old_proxy_set,
1811                              HumongousRegionSet* humongous_proxy_set,
1812                              HRRSCleanupTask* hrrs_cleanup_task) :
1813     _g1(g1), _worker_num(worker_num),
1814     _max_live_bytes(0), _regions_claimed(0),
1815     _freed_bytes(0),
1816     _claimed_region_time(0.0), _max_region_time(0.0),
1817     _local_cleanup_list(local_cleanup_list),
1818     _old_proxy_set(old_proxy_set),
1819     _humongous_proxy_set(humongous_proxy_set),
1820     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1821 
1822   size_t freed_bytes() { return _freed_bytes; }
1823 
1824   bool doHeapRegion(HeapRegion *hr) {
1825     if (hr->continuesHumongous()) {
1826       return false;
1827     }
1828     // We use a claim value of zero here because all regions
1829     // were claimed with value 1 in the FinalCount task.
1830     _g1->reset_gc_time_stamps(hr);
1831     double start = os::elapsedTime();
1832     _regions_claimed++;
1833     hr->note_end_of_marking();
1834     _max_live_bytes += hr->max_live_bytes();
1835     _g1->free_region_if_empty(hr,
1836                               &_freed_bytes,
1837                               _local_cleanup_list,
1838                               _old_proxy_set,
1839                               _humongous_proxy_set,
1840                               _hrrs_cleanup_task,
1841                               true /* par */);
1842     double region_time = (os::elapsedTime() - start);
1843     _claimed_region_time += region_time;
1844     if (region_time > _max_region_time) {
1845       _max_region_time = region_time;
1846     }
1847     return false;
1848   }
1849 
1850   size_t max_live_bytes() { return _max_live_bytes; }
1851   uint regions_claimed() { return _regions_claimed; }
1852   double claimed_region_time_sec() { return _claimed_region_time; }
1853   double max_region_time_sec() { return _max_region_time; }
1854 };
1855 
1856 class G1ParNoteEndTask: public AbstractGangTask {
1857   friend class G1NoteEndOfConcMarkClosure;
1858 
1859 protected:
1860   G1CollectedHeap* _g1h;
1861   size_t _max_live_bytes;
1862   size_t _freed_bytes;
1863   FreeRegionList* _cleanup_list;
1864 
1865 public:
1866   G1ParNoteEndTask(G1CollectedHeap* g1h,
1867                    FreeRegionList* cleanup_list) :
1868     AbstractGangTask("G1 note end"), _g1h(g1h),
1869     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1870 
1871   void work(uint worker_id) {
1872     double start = os::elapsedTime();
1873     FreeRegionList local_cleanup_list("Local Cleanup List");
1874     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1875     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1876     HRRSCleanupTask hrrs_cleanup_task;
1877     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1878                                            &old_proxy_set,
1879                                            &humongous_proxy_set,
1880                                            &hrrs_cleanup_task);
1881     if (G1CollectedHeap::use_parallel_gc_threads()) {
1882       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1883                                             _g1h->workers()->active_workers(),
1884                                             HeapRegion::NoteEndClaimValue);
1885     } else {
1886       _g1h->heap_region_iterate(&g1_note_end);
1887     }
1888     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1889 
1890     // Now update the lists
1891     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1892                                             NULL /* free_list */,
1893                                             &old_proxy_set,
1894                                             &humongous_proxy_set,
1895                                             true /* par */);
1896     {
1897       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1898       _max_live_bytes += g1_note_end.max_live_bytes();
1899       _freed_bytes += g1_note_end.freed_bytes();
1900 
1901       // If we iterate over the global cleanup list at the end of
1902       // cleanup to do this printing we will not guarantee to only
1903       // generate output for the newly-reclaimed regions (the list
1904       // might not be empty at the beginning of cleanup; we might
1905       // still be working on its previous contents). So we do the
1906       // printing here, before we append the new regions to the global
1907       // cleanup list.
1908 
1909       G1HRPrinter* hr_printer = _g1h->hr_printer();
1910       if (hr_printer->is_active()) {
1911         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1912         while (iter.more_available()) {
1913           HeapRegion* hr = iter.get_next();
1914           hr_printer->cleanup(hr);
1915         }
1916       }
1917 
1918       _cleanup_list->add_as_tail(&local_cleanup_list);
1919       assert(local_cleanup_list.is_empty(), "post-condition");
1920 
1921       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1922     }
1923   }
1924   size_t max_live_bytes() { return _max_live_bytes; }
1925   size_t freed_bytes() { return _freed_bytes; }
1926 };
1927 
1928 class G1ParScrubRemSetTask: public AbstractGangTask {
1929 protected:
1930   G1RemSet* _g1rs;
1931   BitMap* _region_bm;
1932   BitMap* _card_bm;
1933 public:
1934   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1935                        BitMap* region_bm, BitMap* card_bm) :
1936     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1937     _region_bm(region_bm), _card_bm(card_bm) { }
1938 
1939   void work(uint worker_id) {
1940     if (G1CollectedHeap::use_parallel_gc_threads()) {
1941       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1942                        HeapRegion::ScrubRemSetClaimValue);
1943     } else {
1944       _g1rs->scrub(_region_bm, _card_bm);
1945     }
1946   }
1947 
1948 };
1949 
1950 void ConcurrentMark::cleanup() {
1951   // world is stopped at this checkpoint
1952   assert(SafepointSynchronize::is_at_safepoint(),
1953          "world should be stopped");
1954   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1955 
1956   // If a full collection has happened, we shouldn't do this.
1957   if (has_aborted()) {
1958     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1959     return;
1960   }
1961 
1962   HRSPhaseSetter x(HRSPhaseCleanup);
1963   g1h->verify_region_sets_optional();
1964 
1965   if (VerifyDuringGC) {
1966     HandleMark hm;  // handle scope
1967     gclog_or_tty->print(" VerifyDuringGC:(before)");
1968     Universe::heap()->prepare_for_verify();
1969     Universe::verify(/* silent */ false,
1970                      /* option */ VerifyOption_G1UsePrevMarking);
1971   }
1972 
1973   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1974   g1p->record_concurrent_mark_cleanup_start();
1975 
1976   double start = os::elapsedTime();
1977 
1978   HeapRegionRemSet::reset_for_cleanup_tasks();
1979 
1980   uint n_workers;
1981 
1982   // Do counting once more with the world stopped for good measure.
1983   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1984 
1985   if (G1CollectedHeap::use_parallel_gc_threads()) {
1986    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1987            "sanity check");
1988 
1989     g1h->set_par_threads();
1990     n_workers = g1h->n_par_threads();
1991     assert(g1h->n_par_threads() == n_workers,
1992            "Should not have been reset");
1993     g1h->workers()->run_task(&g1_par_count_task);
1994     // Done with the parallel phase so reset to 0.
1995     g1h->set_par_threads(0);
1996 
1997     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1998            "sanity check");
1999   } else {
2000     n_workers = 1;
2001     g1_par_count_task.work(0);
2002   }
2003 
2004   if (VerifyDuringGC) {
2005     // Verify that the counting data accumulated during marking matches
2006     // that calculated by walking the marking bitmap.
2007 
2008     // Bitmaps to hold expected values
2009     BitMap expected_region_bm(_region_bm.size(), false);
2010     BitMap expected_card_bm(_card_bm.size(), false);
2011 
2012     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2013                                                  &_region_bm,
2014                                                  &_card_bm,
2015                                                  &expected_region_bm,
2016                                                  &expected_card_bm);
2017 
2018     if (G1CollectedHeap::use_parallel_gc_threads()) {
2019       g1h->set_par_threads((int)n_workers);
2020       g1h->workers()->run_task(&g1_par_verify_task);
2021       // Done with the parallel phase so reset to 0.
2022       g1h->set_par_threads(0);
2023 
2024       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2025              "sanity check");
2026     } else {
2027       g1_par_verify_task.work(0);
2028     }
2029 
2030     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2031   }
2032 
2033   size_t start_used_bytes = g1h->used();
2034   g1h->set_marking_complete();
2035 
2036   double count_end = os::elapsedTime();
2037   double this_final_counting_time = (count_end - start);
2038   _total_counting_time += this_final_counting_time;
2039 
2040   if (G1PrintRegionLivenessInfo) {
2041     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2042     _g1h->heap_region_iterate(&cl);
2043   }
2044 
2045   // Install newly created mark bitMap as "prev".
2046   swapMarkBitMaps();
2047 
2048   g1h->reset_gc_time_stamp();
2049 
2050   // Note end of marking in all heap regions.
2051   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2052   if (G1CollectedHeap::use_parallel_gc_threads()) {
2053     g1h->set_par_threads((int)n_workers);
2054     g1h->workers()->run_task(&g1_par_note_end_task);
2055     g1h->set_par_threads(0);
2056 
2057     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2058            "sanity check");
2059   } else {
2060     g1_par_note_end_task.work(0);
2061   }
2062   g1h->check_gc_time_stamps();
2063 
2064   if (!cleanup_list_is_empty()) {
2065     // The cleanup list is not empty, so we'll have to process it
2066     // concurrently. Notify anyone else that might be wanting free
2067     // regions that there will be more free regions coming soon.
2068     g1h->set_free_regions_coming();
2069   }
2070 
2071   // call below, since it affects the metric by which we sort the heap
2072   // regions.
2073   if (G1ScrubRemSets) {
2074     double rs_scrub_start = os::elapsedTime();
2075     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2076     if (G1CollectedHeap::use_parallel_gc_threads()) {
2077       g1h->set_par_threads((int)n_workers);
2078       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2079       g1h->set_par_threads(0);
2080 
2081       assert(g1h->check_heap_region_claim_values(
2082                                             HeapRegion::ScrubRemSetClaimValue),
2083              "sanity check");
2084     } else {
2085       g1_par_scrub_rs_task.work(0);
2086     }
2087 
2088     double rs_scrub_end = os::elapsedTime();
2089     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2090     _total_rs_scrub_time += this_rs_scrub_time;
2091   }
2092 
2093   // this will also free any regions totally full of garbage objects,
2094   // and sort the regions.
2095   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2096 
2097   // Statistics.
2098   double end = os::elapsedTime();
2099   _cleanup_times.add((end - start) * 1000.0);
2100 
2101   if (G1Log::fine()) {
2102     g1h->print_size_transition(gclog_or_tty,
2103                                start_used_bytes,
2104                                g1h->used(),
2105                                g1h->capacity());
2106   }
2107 
2108   // Clean up will have freed any regions completely full of garbage.
2109   // Update the soft reference policy with the new heap occupancy.
2110   Universe::update_heap_info_at_gc();
2111 
2112   // We need to make this be a "collection" so any collection pause that
2113   // races with it goes around and waits for completeCleanup to finish.
2114   g1h->increment_total_collections();
2115 
2116   // We reclaimed old regions so we should calculate the sizes to make
2117   // sure we update the old gen/space data.
2118   g1h->g1mm()->update_sizes();
2119 
2120   if (VerifyDuringGC) {
2121     HandleMark hm;  // handle scope
2122     gclog_or_tty->print(" VerifyDuringGC:(after)");
2123     Universe::heap()->prepare_for_verify();
2124     Universe::verify(/* silent */ false,
2125                      /* option */ VerifyOption_G1UsePrevMarking);
2126   }
2127 
2128   g1h->verify_region_sets_optional();
2129 }
2130 
2131 void ConcurrentMark::completeCleanup() {
2132   if (has_aborted()) return;
2133 
2134   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2135 
2136   _cleanup_list.verify_optional();
2137   FreeRegionList tmp_free_list("Tmp Free List");
2138 
2139   if (G1ConcRegionFreeingVerbose) {
2140     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2141                            "cleanup list has %u entries",
2142                            _cleanup_list.length());
2143   }
2144 
2145   // Noone else should be accessing the _cleanup_list at this point,
2146   // so it's not necessary to take any locks
2147   while (!_cleanup_list.is_empty()) {
2148     HeapRegion* hr = _cleanup_list.remove_head();
2149     assert(hr != NULL, "the list was not empty");
2150     hr->par_clear();
2151     tmp_free_list.add_as_tail(hr);
2152 
2153     // Instead of adding one region at a time to the secondary_free_list,
2154     // we accumulate them in the local list and move them a few at a
2155     // time. This also cuts down on the number of notify_all() calls
2156     // we do during this process. We'll also append the local list when
2157     // _cleanup_list is empty (which means we just removed the last
2158     // region from the _cleanup_list).
2159     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2160         _cleanup_list.is_empty()) {
2161       if (G1ConcRegionFreeingVerbose) {
2162         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2163                                "appending %u entries to the secondary_free_list, "
2164                                "cleanup list still has %u entries",
2165                                tmp_free_list.length(),
2166                                _cleanup_list.length());
2167       }
2168 
2169       {
2170         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2171         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2172         SecondaryFreeList_lock->notify_all();
2173       }
2174 
2175       if (G1StressConcRegionFreeing) {
2176         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2177           os::sleep(Thread::current(), (jlong) 1, false);
2178         }
2179       }
2180     }
2181   }
2182   assert(tmp_free_list.is_empty(), "post-condition");
2183 }
2184 
2185 // Supporting Object and Oop closures for reference discovery
2186 // and processing in during marking
2187 
2188 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2189   HeapWord* addr = (HeapWord*)obj;
2190   return addr != NULL &&
2191          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2192 }
2193 
2194 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2195 // Uses the CMTask associated with a worker thread (for serial reference
2196 // processing the CMTask for worker 0 is used) to preserve (mark) and
2197 // trace referent objects.
2198 //
2199 // Using the CMTask and embedded local queues avoids having the worker
2200 // threads operating on the global mark stack. This reduces the risk
2201 // of overflowing the stack - which we would rather avoid at this late
2202 // state. Also using the tasks' local queues removes the potential
2203 // of the workers interfering with each other that could occur if
2204 // operating on the global stack.
2205 
2206 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2207   ConcurrentMark* _cm;
2208   CMTask*         _task;
2209   int             _ref_counter_limit;
2210   int             _ref_counter;
2211   bool            _is_serial;
2212  public:
2213   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2214     _cm(cm), _task(task), _is_serial(is_serial),
2215     _ref_counter_limit(G1RefProcDrainInterval) {
2216     assert(_ref_counter_limit > 0, "sanity");
2217     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2218     _ref_counter = _ref_counter_limit;
2219   }
2220 
2221   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2222   virtual void do_oop(      oop* p) { do_oop_work(p); }
2223 
2224   template <class T> void do_oop_work(T* p) {
2225     if (!_cm->has_overflown()) {
2226       oop obj = oopDesc::load_decode_heap_oop(p);
2227       if (_cm->verbose_high()) {
2228         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2229                                "*"PTR_FORMAT" = "PTR_FORMAT,
2230                                _task->worker_id(), p, (void*) obj);
2231       }
2232 
2233       _task->deal_with_reference(obj);
2234       _ref_counter--;
2235 
2236       if (_ref_counter == 0) {
2237         // We have dealt with _ref_counter_limit references, pushing them
2238         // and objects reachable from them on to the local stack (and
2239         // possibly the global stack). Call CMTask::do_marking_step() to
2240         // process these entries.
2241         //
2242         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2243         // there's nothing more to do (i.e. we're done with the entries that
2244         // were pushed as a result of the CMTask::deal_with_reference() calls
2245         // above) or we overflow.
2246         //
2247         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2248         // flag while there may still be some work to do. (See the comment at
2249         // the beginning of CMTask::do_marking_step() for those conditions -
2250         // one of which is reaching the specified time target.) It is only
2251         // when CMTask::do_marking_step() returns without setting the
2252         // has_aborted() flag that the marking step has completed.
2253         do {
2254           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2255           _task->do_marking_step(mark_step_duration_ms,
2256                                  false      /* do_stealing    */,
2257                                  false      /* do_termination */,
2258                                  _is_serial /* is_serial      */);
2259         } while (_task->has_aborted() && !_cm->has_overflown());
2260         _ref_counter = _ref_counter_limit;
2261       }
2262     } else {
2263       if (_cm->verbose_high()) {
2264          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2265       }
2266     }
2267   }
2268 };
2269 
2270 // 'Drain' oop closure used by both serial and parallel reference processing.
2271 // Uses the CMTask associated with a given worker thread (for serial
2272 // reference processing the CMtask for worker 0 is used). Calls the
2273 // do_marking_step routine, with an unbelievably large timeout value,
2274 // to drain the marking data structures of the remaining entries
2275 // added by the 'keep alive' oop closure above.
2276 
2277 class G1CMDrainMarkingStackClosure: public VoidClosure {
2278   ConcurrentMark* _cm;
2279   CMTask*         _task;
2280   bool            _is_serial;
2281  public:
2282   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2283     _cm(cm), _task(task) {
2284     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2285   }
2286 
2287   void do_void() {
2288     do {
2289       if (_cm->verbose_high()) {
2290         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2291                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2292       }
2293 
2294       // We call CMTask::do_marking_step() to completely drain the local
2295       // and global marking stacks of entries pushed by the 'keep alive'
2296       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2297       //
2298       // CMTask::do_marking_step() is called in a loop, which we'll exit
2299       // if there's nothing more to do (i.e. we'completely drained the
2300       // entries that were pushed as a a result of applying the 'keep alive'
2301       // closure to the entries on the discovered ref lists) or we overflow
2302       // the global marking stack.
2303       //
2304       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2305       // flag while there may still be some work to do. (See the comment at
2306       // the beginning of CMTask::do_marking_step() for those conditions -
2307       // one of which is reaching the specified time target.) It is only
2308       // when CMTask::do_marking_step() returns without setting the
2309       // has_aborted() flag that the marking step has completed.
2310 
2311       // It only makes sense to allow stealing in CMTask::do_marking_step()
2312       // if this closure is being instantiated for parallel reference
2313       // processing.
2314 
2315       _task->do_marking_step(1000000000.0 /* something very large */,
2316                              !_is_serial /* do_stealing    */,
2317                              true        /* do_termination */,
2318                              _is_serial  /* is_serial      */);
2319     } while (_task->has_aborted() && !_cm->has_overflown());
2320   }
2321 };
2322 
2323 // Implementation of AbstractRefProcTaskExecutor for parallel
2324 // reference processing at the end of G1 concurrent marking
2325 
2326 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2327 private:
2328   G1CollectedHeap* _g1h;
2329   ConcurrentMark*  _cm;
2330   WorkGang*        _workers;
2331   int              _active_workers;
2332 
2333 public:
2334   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2335                         ConcurrentMark* cm,
2336                         WorkGang* workers,
2337                         int n_workers) :
2338     _g1h(g1h), _cm(cm),
2339     _workers(workers), _active_workers(n_workers) { }
2340 
2341   // Executes the given task using concurrent marking worker threads.
2342   virtual void execute(ProcessTask& task);
2343   virtual void execute(EnqueueTask& task);
2344 };
2345 
2346 class G1CMRefProcTaskProxy: public AbstractGangTask {
2347   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2348   ProcessTask&     _proc_task;
2349   G1CollectedHeap* _g1h;
2350   ConcurrentMark*  _cm;
2351 
2352 public:
2353   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2354                      G1CollectedHeap* g1h,
2355                      ConcurrentMark* cm) :
2356     AbstractGangTask("Process reference objects in parallel"),
2357     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2358     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2359     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2360   }
2361 
2362   virtual void work(uint worker_id) {
2363     CMTask* task = _cm->task(worker_id);
2364     G1CMIsAliveClosure g1_is_alive(_g1h);
2365     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2366     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2367 
2368     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2369   }
2370 };
2371 
2372 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2373   assert(_workers != NULL, "Need parallel worker threads.");
2374   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2375 
2376   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2377 
2378   // We need to reset the concurrency level before each
2379   // proxy task execution, so that the termination protocol
2380   // and overflow handling in CMTask::do_marking_step() knows
2381   // how many workers to wait for.
2382   _cm->set_concurrency(_active_workers);
2383   _g1h->set_par_threads(_active_workers);
2384   _workers->run_task(&proc_task_proxy);
2385   _g1h->set_par_threads(0);
2386 }
2387 
2388 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2389   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2390   EnqueueTask& _enq_task;
2391 
2392 public:
2393   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2394     AbstractGangTask("Enqueue reference objects in parallel"),
2395     _enq_task(enq_task) { }
2396 
2397   virtual void work(uint worker_id) {
2398     _enq_task.work(worker_id);
2399   }
2400 };
2401 
2402 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2403   assert(_workers != NULL, "Need parallel worker threads.");
2404   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2405 
2406   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2407 
2408   // Not strictly necessary but...
2409   //
2410   // We need to reset the concurrency level before each
2411   // proxy task execution, so that the termination protocol
2412   // and overflow handling in CMTask::do_marking_step() knows
2413   // how many workers to wait for.
2414   _cm->set_concurrency(_active_workers);
2415   _g1h->set_par_threads(_active_workers);
2416   _workers->run_task(&enq_task_proxy);
2417   _g1h->set_par_threads(0);
2418 }
2419 
2420 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2421   if (has_overflown()) {
2422     // Skip processing the discovered references if we have
2423     // overflown the global marking stack. Reference objects
2424     // only get discovered once so it is OK to not
2425     // de-populate the discovered reference lists. We could have,
2426     // but the only benefit would be that, when marking restarts,
2427     // less reference objects are discovered.
2428     return;
2429   }
2430 
2431   ResourceMark rm;
2432   HandleMark   hm;
2433 
2434   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2435 
2436   // Is alive closure.
2437   G1CMIsAliveClosure g1_is_alive(g1h);
2438 
2439   // Inner scope to exclude the cleaning of the string and symbol
2440   // tables from the displayed time.
2441   {
2442     if (G1Log::finer()) {
2443       gclog_or_tty->put(' ');
2444     }
2445     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2446 
2447     ReferenceProcessor* rp = g1h->ref_processor_cm();
2448 
2449     // See the comment in G1CollectedHeap::ref_processing_init()
2450     // about how reference processing currently works in G1.
2451 
2452     // Set the soft reference policy
2453     rp->setup_policy(clear_all_soft_refs);
2454     assert(_markStack.isEmpty(), "mark stack should be empty");
2455 
2456     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2457     // in serial reference processing. Note these closures are also
2458     // used for serially processing (by the the current thread) the
2459     // JNI references during parallel reference processing.
2460     //
2461     // These closures do not need to synchronize with the worker
2462     // threads involved in parallel reference processing as these
2463     // instances are executed serially by the current thread (e.g.
2464     // reference processing is not multi-threaded and is thus
2465     // performed by the current thread instead of a gang worker).
2466     //
2467     // The gang tasks involved in parallel reference procssing create
2468     // their own instances of these closures, which do their own
2469     // synchronization among themselves.
2470     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2471     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2472 
2473     // We need at least one active thread. If reference processing
2474     // is not multi-threaded we use the current (VMThread) thread,
2475     // otherwise we use the work gang from the G1CollectedHeap and
2476     // we utilize all the worker threads we can.
2477     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2478     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2479     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2480 
2481     // Parallel processing task executor.
2482     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2483                                               g1h->workers(), active_workers);
2484     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2485 
2486     // Set the concurrency level. The phase was already set prior to
2487     // executing the remark task.
2488     set_concurrency(active_workers);
2489 
2490     // Set the degree of MT processing here.  If the discovery was done MT,
2491     // the number of threads involved during discovery could differ from
2492     // the number of active workers.  This is OK as long as the discovered
2493     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2494     rp->set_active_mt_degree(active_workers);
2495 
2496     // Process the weak references.
2497     rp->process_discovered_references(&g1_is_alive,
2498                                       &g1_keep_alive,
2499                                       &g1_drain_mark_stack,
2500                                       executor);
2501 
2502     // The do_oop work routines of the keep_alive and drain_marking_stack
2503     // oop closures will set the has_overflown flag if we overflow the
2504     // global marking stack.
2505 
2506     assert(_markStack.overflow() || _markStack.isEmpty(),
2507             "mark stack should be empty (unless it overflowed)");
2508 
2509     if (_markStack.overflow()) {
2510       // This should have been done already when we tried to push an
2511       // entry on to the global mark stack. But let's do it again.
2512       set_has_overflown();
2513     }
2514 
2515     assert(rp->num_q() == active_workers, "why not");
2516 
2517     rp->enqueue_discovered_references(executor);
2518 
2519     rp->verify_no_references_recorded();
2520     assert(!rp->discovery_enabled(), "Post condition");
2521   }
2522 
2523   // Now clean up stale oops in StringTable
2524   StringTable::unlink(&g1_is_alive);
2525   // Clean up unreferenced symbols in symbol table.
2526   SymbolTable::unlink();
2527 }
2528 
2529 void ConcurrentMark::swapMarkBitMaps() {
2530   CMBitMapRO* temp = _prevMarkBitMap;
2531   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2532   _nextMarkBitMap  = (CMBitMap*)  temp;
2533 }
2534 
2535 class CMRemarkTask: public AbstractGangTask {
2536 private:
2537   ConcurrentMark* _cm;
2538   bool            _is_serial;
2539 public:
2540   void work(uint worker_id) {
2541     // Since all available tasks are actually started, we should
2542     // only proceed if we're supposed to be actived.
2543     if (worker_id < _cm->active_tasks()) {
2544       CMTask* task = _cm->task(worker_id);
2545       task->record_start_time();
2546       do {
2547         task->do_marking_step(1000000000.0 /* something very large */,
2548                               true /* do_stealing    */,
2549                               true /* do_termination */,
2550                               _is_serial);
2551       } while (task->has_aborted() && !_cm->has_overflown());
2552       // If we overflow, then we do not want to restart. We instead
2553       // want to abort remark and do concurrent marking again.
2554       task->record_end_time();
2555     }
2556   }
2557 
2558   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2559     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2560     _cm->terminator()->reset_for_reuse(active_workers);
2561   }
2562 };
2563 
2564 void ConcurrentMark::checkpointRootsFinalWork() {
2565   ResourceMark rm;
2566   HandleMark   hm;
2567   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2568 
2569   g1h->ensure_parsability(false);
2570 
2571   if (G1CollectedHeap::use_parallel_gc_threads()) {
2572     G1CollectedHeap::StrongRootsScope srs(g1h);
2573     // this is remark, so we'll use up all active threads
2574     uint active_workers = g1h->workers()->active_workers();
2575     if (active_workers == 0) {
2576       assert(active_workers > 0, "Should have been set earlier");
2577       active_workers = (uint) ParallelGCThreads;
2578       g1h->workers()->set_active_workers(active_workers);
2579     }
2580     set_concurrency_and_phase(active_workers, false /* concurrent */);
2581     // Leave _parallel_marking_threads at it's
2582     // value originally calculated in the ConcurrentMark
2583     // constructor and pass values of the active workers
2584     // through the gang in the task.
2585 
2586     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2587     // We will start all available threads, even if we decide that the
2588     // active_workers will be fewer. The extra ones will just bail out
2589     // immediately.
2590     g1h->set_par_threads(active_workers);
2591     g1h->workers()->run_task(&remarkTask);
2592     g1h->set_par_threads(0);
2593   } else {
2594     G1CollectedHeap::StrongRootsScope srs(g1h);
2595     uint active_workers = 1;
2596     set_concurrency_and_phase(active_workers, false /* concurrent */);
2597 
2598     // Note - if there's no work gang then the VMThread will be
2599     // the thread to execute the remark - serially. We have
2600     // to pass true for the is_serial parameter so that
2601     // CMTask::do_marking_step() doesn't enter the sync
2602     // barriers in the event of an overflow. Doing so will
2603     // cause an assert that the current thread is not a
2604     // concurrent GC thread.
2605     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2606     remarkTask.work(0);
2607   }
2608   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2609   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2610 
2611   print_stats();
2612 
2613 #if VERIFY_OBJS_PROCESSED
2614   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2615     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2616                            _scan_obj_cl.objs_processed,
2617                            ThreadLocalObjQueue::objs_enqueued);
2618     guarantee(_scan_obj_cl.objs_processed ==
2619               ThreadLocalObjQueue::objs_enqueued,
2620               "Different number of objs processed and enqueued.");
2621   }
2622 #endif
2623 }
2624 
2625 #ifndef PRODUCT
2626 
2627 class PrintReachableOopClosure: public OopClosure {
2628 private:
2629   G1CollectedHeap* _g1h;
2630   outputStream*    _out;
2631   VerifyOption     _vo;
2632   bool             _all;
2633 
2634 public:
2635   PrintReachableOopClosure(outputStream* out,
2636                            VerifyOption  vo,
2637                            bool          all) :
2638     _g1h(G1CollectedHeap::heap()),
2639     _out(out), _vo(vo), _all(all) { }
2640 
2641   void do_oop(narrowOop* p) { do_oop_work(p); }
2642   void do_oop(      oop* p) { do_oop_work(p); }
2643 
2644   template <class T> void do_oop_work(T* p) {
2645     oop         obj = oopDesc::load_decode_heap_oop(p);
2646     const char* str = NULL;
2647     const char* str2 = "";
2648 
2649     if (obj == NULL) {
2650       str = "";
2651     } else if (!_g1h->is_in_g1_reserved(obj)) {
2652       str = " O";
2653     } else {
2654       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2655       guarantee(hr != NULL, "invariant");
2656       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2657       bool marked = _g1h->is_marked(obj, _vo);
2658 
2659       if (over_tams) {
2660         str = " >";
2661         if (marked) {
2662           str2 = " AND MARKED";
2663         }
2664       } else if (marked) {
2665         str = " M";
2666       } else {
2667         str = " NOT";
2668       }
2669     }
2670 
2671     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2672                    p, (void*) obj, str, str2);
2673   }
2674 };
2675 
2676 class PrintReachableObjectClosure : public ObjectClosure {
2677 private:
2678   G1CollectedHeap* _g1h;
2679   outputStream*    _out;
2680   VerifyOption     _vo;
2681   bool             _all;
2682   HeapRegion*      _hr;
2683 
2684 public:
2685   PrintReachableObjectClosure(outputStream* out,
2686                               VerifyOption  vo,
2687                               bool          all,
2688                               HeapRegion*   hr) :
2689     _g1h(G1CollectedHeap::heap()),
2690     _out(out), _vo(vo), _all(all), _hr(hr) { }
2691 
2692   void do_object(oop o) {
2693     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2694     bool marked = _g1h->is_marked(o, _vo);
2695     bool print_it = _all || over_tams || marked;
2696 
2697     if (print_it) {
2698       _out->print_cr(" "PTR_FORMAT"%s",
2699                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2700       PrintReachableOopClosure oopCl(_out, _vo, _all);
2701       o->oop_iterate_no_header(&oopCl);
2702     }
2703   }
2704 };
2705 
2706 class PrintReachableRegionClosure : public HeapRegionClosure {
2707 private:
2708   G1CollectedHeap* _g1h;
2709   outputStream*    _out;
2710   VerifyOption     _vo;
2711   bool             _all;
2712 
2713 public:
2714   bool doHeapRegion(HeapRegion* hr) {
2715     HeapWord* b = hr->bottom();
2716     HeapWord* e = hr->end();
2717     HeapWord* t = hr->top();
2718     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2719     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2720                    "TAMS: "PTR_FORMAT, b, e, t, p);
2721     _out->cr();
2722 
2723     HeapWord* from = b;
2724     HeapWord* to   = t;
2725 
2726     if (to > from) {
2727       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2728       _out->cr();
2729       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2730       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2731       _out->cr();
2732     }
2733 
2734     return false;
2735   }
2736 
2737   PrintReachableRegionClosure(outputStream* out,
2738                               VerifyOption  vo,
2739                               bool          all) :
2740     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2741 };
2742 
2743 void ConcurrentMark::print_reachable(const char* str,
2744                                      VerifyOption vo,
2745                                      bool all) {
2746   gclog_or_tty->cr();
2747   gclog_or_tty->print_cr("== Doing heap dump... ");
2748 
2749   if (G1PrintReachableBaseFile == NULL) {
2750     gclog_or_tty->print_cr("  #### error: no base file defined");
2751     return;
2752   }
2753 
2754   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2755       (JVM_MAXPATHLEN - 1)) {
2756     gclog_or_tty->print_cr("  #### error: file name too long");
2757     return;
2758   }
2759 
2760   char file_name[JVM_MAXPATHLEN];
2761   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2762   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2763 
2764   fileStream fout(file_name);
2765   if (!fout.is_open()) {
2766     gclog_or_tty->print_cr("  #### error: could not open file");
2767     return;
2768   }
2769 
2770   outputStream* out = &fout;
2771   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2772   out->cr();
2773 
2774   out->print_cr("--- ITERATING OVER REGIONS");
2775   out->cr();
2776   PrintReachableRegionClosure rcl(out, vo, all);
2777   _g1h->heap_region_iterate(&rcl);
2778   out->cr();
2779 
2780   gclog_or_tty->print_cr("  done");
2781   gclog_or_tty->flush();
2782 }
2783 
2784 #endif // PRODUCT
2785 
2786 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2787   // Note we are overriding the read-only view of the prev map here, via
2788   // the cast.
2789   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2790 }
2791 
2792 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2793   _nextMarkBitMap->clearRange(mr);
2794 }
2795 
2796 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2797   clearRangePrevBitmap(mr);
2798   clearRangeNextBitmap(mr);
2799 }
2800 
2801 HeapRegion*
2802 ConcurrentMark::claim_region(uint worker_id) {
2803   // "checkpoint" the finger
2804   HeapWord* finger = _finger;
2805 
2806   // _heap_end will not change underneath our feet; it only changes at
2807   // yield points.
2808   while (finger < _heap_end) {
2809     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2810 
2811     // Note on how this code handles humongous regions. In the
2812     // normal case the finger will reach the start of a "starts
2813     // humongous" (SH) region. Its end will either be the end of the
2814     // last "continues humongous" (CH) region in the sequence, or the
2815     // standard end of the SH region (if the SH is the only region in
2816     // the sequence). That way claim_region() will skip over the CH
2817     // regions. However, there is a subtle race between a CM thread
2818     // executing this method and a mutator thread doing a humongous
2819     // object allocation. The two are not mutually exclusive as the CM
2820     // thread does not need to hold the Heap_lock when it gets
2821     // here. So there is a chance that claim_region() will come across
2822     // a free region that's in the progress of becoming a SH or a CH
2823     // region. In the former case, it will either
2824     //   a) Miss the update to the region's end, in which case it will
2825     //      visit every subsequent CH region, will find their bitmaps
2826     //      empty, and do nothing, or
2827     //   b) Will observe the update of the region's end (in which case
2828     //      it will skip the subsequent CH regions).
2829     // If it comes across a region that suddenly becomes CH, the
2830     // scenario will be similar to b). So, the race between
2831     // claim_region() and a humongous object allocation might force us
2832     // to do a bit of unnecessary work (due to some unnecessary bitmap
2833     // iterations) but it should not introduce and correctness issues.
2834     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2835     HeapWord*   bottom        = curr_region->bottom();
2836     HeapWord*   end           = curr_region->end();
2837     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2838 
2839     if (verbose_low()) {
2840       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2841                              "["PTR_FORMAT", "PTR_FORMAT"), "
2842                              "limit = "PTR_FORMAT,
2843                              worker_id, curr_region, bottom, end, limit);
2844     }
2845 
2846     // Is the gap between reading the finger and doing the CAS too long?
2847     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2848     if (res == finger) {
2849       // we succeeded
2850 
2851       // notice that _finger == end cannot be guaranteed here since,
2852       // someone else might have moved the finger even further
2853       assert(_finger >= end, "the finger should have moved forward");
2854 
2855       if (verbose_low()) {
2856         gclog_or_tty->print_cr("[%u] we were successful with region = "
2857                                PTR_FORMAT, worker_id, curr_region);
2858       }
2859 
2860       if (limit > bottom) {
2861         if (verbose_low()) {
2862           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2863                                  "returning it ", worker_id, curr_region);
2864         }
2865         return curr_region;
2866       } else {
2867         assert(limit == bottom,
2868                "the region limit should be at bottom");
2869         if (verbose_low()) {
2870           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2871                                  "returning NULL", worker_id, curr_region);
2872         }
2873         // we return NULL and the caller should try calling
2874         // claim_region() again.
2875         return NULL;
2876       }
2877     } else {
2878       assert(_finger > finger, "the finger should have moved forward");
2879       if (verbose_low()) {
2880         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2881                                "global finger = "PTR_FORMAT", "
2882                                "our finger = "PTR_FORMAT,
2883                                worker_id, _finger, finger);
2884       }
2885 
2886       // read it again
2887       finger = _finger;
2888     }
2889   }
2890 
2891   return NULL;
2892 }
2893 
2894 #ifndef PRODUCT
2895 enum VerifyNoCSetOopsPhase {
2896   VerifyNoCSetOopsStack,
2897   VerifyNoCSetOopsQueues,
2898   VerifyNoCSetOopsSATBCompleted,
2899   VerifyNoCSetOopsSATBThread
2900 };
2901 
2902 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2903 private:
2904   G1CollectedHeap* _g1h;
2905   VerifyNoCSetOopsPhase _phase;
2906   int _info;
2907 
2908   const char* phase_str() {
2909     switch (_phase) {
2910     case VerifyNoCSetOopsStack:         return "Stack";
2911     case VerifyNoCSetOopsQueues:        return "Queue";
2912     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2913     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2914     default:                            ShouldNotReachHere();
2915     }
2916     return NULL;
2917   }
2918 
2919   void do_object_work(oop obj) {
2920     guarantee(!_g1h->obj_in_cs(obj),
2921               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2922                       (void*) obj, phase_str(), _info));
2923   }
2924 
2925 public:
2926   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2927 
2928   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2929     _phase = phase;
2930     _info = info;
2931   }
2932 
2933   virtual void do_oop(oop* p) {
2934     oop obj = oopDesc::load_decode_heap_oop(p);
2935     do_object_work(obj);
2936   }
2937 
2938   virtual void do_oop(narrowOop* p) {
2939     // We should not come across narrow oops while scanning marking
2940     // stacks and SATB buffers.
2941     ShouldNotReachHere();
2942   }
2943 
2944   virtual void do_object(oop obj) {
2945     do_object_work(obj);
2946   }
2947 };
2948 
2949 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2950                                          bool verify_enqueued_buffers,
2951                                          bool verify_thread_buffers,
2952                                          bool verify_fingers) {
2953   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2954   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2955     return;
2956   }
2957 
2958   VerifyNoCSetOopsClosure cl;
2959 
2960   if (verify_stacks) {
2961     // Verify entries on the global mark stack
2962     cl.set_phase(VerifyNoCSetOopsStack);
2963     _markStack.oops_do(&cl);
2964 
2965     // Verify entries on the task queues
2966     for (uint i = 0; i < _max_worker_id; i += 1) {
2967       cl.set_phase(VerifyNoCSetOopsQueues, i);
2968       CMTaskQueue* queue = _task_queues->queue(i);
2969       queue->oops_do(&cl);
2970     }
2971   }
2972 
2973   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2974 
2975   // Verify entries on the enqueued SATB buffers
2976   if (verify_enqueued_buffers) {
2977     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2978     satb_qs.iterate_completed_buffers_read_only(&cl);
2979   }
2980 
2981   // Verify entries on the per-thread SATB buffers
2982   if (verify_thread_buffers) {
2983     cl.set_phase(VerifyNoCSetOopsSATBThread);
2984     satb_qs.iterate_thread_buffers_read_only(&cl);
2985   }
2986 
2987   if (verify_fingers) {
2988     // Verify the global finger
2989     HeapWord* global_finger = finger();
2990     if (global_finger != NULL && global_finger < _heap_end) {
2991       // The global finger always points to a heap region boundary. We
2992       // use heap_region_containing_raw() to get the containing region
2993       // given that the global finger could be pointing to a free region
2994       // which subsequently becomes continues humongous. If that
2995       // happens, heap_region_containing() will return the bottom of the
2996       // corresponding starts humongous region and the check below will
2997       // not hold any more.
2998       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2999       guarantee(global_finger == global_hr->bottom(),
3000                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3001                         global_finger, HR_FORMAT_PARAMS(global_hr)));
3002     }
3003 
3004     // Verify the task fingers
3005     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3006     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3007       CMTask* task = _tasks[i];
3008       HeapWord* task_finger = task->finger();
3009       if (task_finger != NULL && task_finger < _heap_end) {
3010         // See above note on the global finger verification.
3011         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3012         guarantee(task_finger == task_hr->bottom() ||
3013                   !task_hr->in_collection_set(),
3014                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3015                           task_finger, HR_FORMAT_PARAMS(task_hr)));
3016       }
3017     }
3018   }
3019 }
3020 #endif // PRODUCT
3021 
3022 // Aggregate the counting data that was constructed concurrently
3023 // with marking.
3024 class AggregateCountDataHRClosure: public HeapRegionClosure {
3025   G1CollectedHeap* _g1h;
3026   ConcurrentMark* _cm;
3027   CardTableModRefBS* _ct_bs;
3028   BitMap* _cm_card_bm;
3029   uint _max_worker_id;
3030 
3031  public:
3032   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3033                               BitMap* cm_card_bm,
3034                               uint max_worker_id) :
3035     _g1h(g1h), _cm(g1h->concurrent_mark()),
3036     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3037     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3038 
3039   bool doHeapRegion(HeapRegion* hr) {
3040     if (hr->continuesHumongous()) {
3041       // We will ignore these here and process them when their
3042       // associated "starts humongous" region is processed.
3043       // Note that we cannot rely on their associated
3044       // "starts humongous" region to have their bit set to 1
3045       // since, due to the region chunking in the parallel region
3046       // iteration, a "continues humongous" region might be visited
3047       // before its associated "starts humongous".
3048       return false;
3049     }
3050 
3051     HeapWord* start = hr->bottom();
3052     HeapWord* limit = hr->next_top_at_mark_start();
3053     HeapWord* end = hr->end();
3054 
3055     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3056            err_msg("Preconditions not met - "
3057                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3058                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3059                    start, limit, hr->top(), hr->end()));
3060 
3061     assert(hr->next_marked_bytes() == 0, "Precondition");
3062 
3063     if (start == limit) {
3064       // NTAMS of this region has not been set so nothing to do.
3065       return false;
3066     }
3067 
3068     // 'start' should be in the heap.
3069     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3070     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3071     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3072 
3073     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3074     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3075     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3076 
3077     // If ntams is not card aligned then we bump card bitmap index
3078     // for limit so that we get the all the cards spanned by
3079     // the object ending at ntams.
3080     // Note: if this is the last region in the heap then ntams
3081     // could be actually just beyond the end of the the heap;
3082     // limit_idx will then  correspond to a (non-existent) card
3083     // that is also outside the heap.
3084     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3085       limit_idx += 1;
3086     }
3087 
3088     assert(limit_idx <= end_idx, "or else use atomics");
3089 
3090     // Aggregate the "stripe" in the count data associated with hr.
3091     uint hrs_index = hr->hrs_index();
3092     size_t marked_bytes = 0;
3093 
3094     for (uint i = 0; i < _max_worker_id; i += 1) {
3095       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3096       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3097 
3098       // Fetch the marked_bytes in this region for task i and
3099       // add it to the running total for this region.
3100       marked_bytes += marked_bytes_array[hrs_index];
3101 
3102       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3103       // into the global card bitmap.
3104       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3105 
3106       while (scan_idx < limit_idx) {
3107         assert(task_card_bm->at(scan_idx) == true, "should be");
3108         _cm_card_bm->set_bit(scan_idx);
3109         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3110 
3111         // BitMap::get_next_one_offset() can handle the case when
3112         // its left_offset parameter is greater than its right_offset
3113         // parameter. It does, however, have an early exit if
3114         // left_offset == right_offset. So let's limit the value
3115         // passed in for left offset here.
3116         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3117         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3118       }
3119     }
3120 
3121     // Update the marked bytes for this region.
3122     hr->add_to_marked_bytes(marked_bytes);
3123 
3124     // Next heap region
3125     return false;
3126   }
3127 };
3128 
3129 class G1AggregateCountDataTask: public AbstractGangTask {
3130 protected:
3131   G1CollectedHeap* _g1h;
3132   ConcurrentMark* _cm;
3133   BitMap* _cm_card_bm;
3134   uint _max_worker_id;
3135   int _active_workers;
3136 
3137 public:
3138   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3139                            ConcurrentMark* cm,
3140                            BitMap* cm_card_bm,
3141                            uint max_worker_id,
3142                            int n_workers) :
3143     AbstractGangTask("Count Aggregation"),
3144     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3145     _max_worker_id(max_worker_id),
3146     _active_workers(n_workers) { }
3147 
3148   void work(uint worker_id) {
3149     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3150 
3151     if (G1CollectedHeap::use_parallel_gc_threads()) {
3152       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3153                                             _active_workers,
3154                                             HeapRegion::AggregateCountClaimValue);
3155     } else {
3156       _g1h->heap_region_iterate(&cl);
3157     }
3158   }
3159 };
3160 
3161 
3162 void ConcurrentMark::aggregate_count_data() {
3163   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3164                         _g1h->workers()->active_workers() :
3165                         1);
3166 
3167   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3168                                            _max_worker_id, n_workers);
3169 
3170   if (G1CollectedHeap::use_parallel_gc_threads()) {
3171     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3172            "sanity check");
3173     _g1h->set_par_threads(n_workers);
3174     _g1h->workers()->run_task(&g1_par_agg_task);
3175     _g1h->set_par_threads(0);
3176 
3177     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3178            "sanity check");
3179     _g1h->reset_heap_region_claim_values();
3180   } else {
3181     g1_par_agg_task.work(0);
3182   }
3183 }
3184 
3185 // Clear the per-worker arrays used to store the per-region counting data
3186 void ConcurrentMark::clear_all_count_data() {
3187   // Clear the global card bitmap - it will be filled during
3188   // liveness count aggregation (during remark) and the
3189   // final counting task.
3190   _card_bm.clear();
3191 
3192   // Clear the global region bitmap - it will be filled as part
3193   // of the final counting task.
3194   _region_bm.clear();
3195 
3196   uint max_regions = _g1h->max_regions();
3197   assert(_max_worker_id > 0, "uninitialized");
3198 
3199   for (uint i = 0; i < _max_worker_id; i += 1) {
3200     BitMap* task_card_bm = count_card_bitmap_for(i);
3201     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3202 
3203     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3204     assert(marked_bytes_array != NULL, "uninitialized");
3205 
3206     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3207     task_card_bm->clear();
3208   }
3209 }
3210 
3211 void ConcurrentMark::print_stats() {
3212   if (verbose_stats()) {
3213     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3214     for (size_t i = 0; i < _active_tasks; ++i) {
3215       _tasks[i]->print_stats();
3216       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3217     }
3218   }
3219 }
3220 
3221 // abandon current marking iteration due to a Full GC
3222 void ConcurrentMark::abort() {
3223   // Clear all marks to force marking thread to do nothing
3224   _nextMarkBitMap->clearAll();
3225   // Clear the liveness counting data
3226   clear_all_count_data();
3227   // Empty mark stack
3228   reset_marking_state();
3229   for (uint i = 0; i < _max_worker_id; ++i) {
3230     _tasks[i]->clear_region_fields();
3231   }
3232   _has_aborted = true;
3233 
3234   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3235   satb_mq_set.abandon_partial_marking();
3236   // This can be called either during or outside marking, we'll read
3237   // the expected_active value from the SATB queue set.
3238   satb_mq_set.set_active_all_threads(
3239                                  false, /* new active value */
3240                                  satb_mq_set.is_active() /* expected_active */);
3241 }
3242 
3243 static void print_ms_time_info(const char* prefix, const char* name,
3244                                NumberSeq& ns) {
3245   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3246                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3247   if (ns.num() > 0) {
3248     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3249                            prefix, ns.sd(), ns.maximum());
3250   }
3251 }
3252 
3253 void ConcurrentMark::print_summary_info() {
3254   gclog_or_tty->print_cr(" Concurrent marking:");
3255   print_ms_time_info("  ", "init marks", _init_times);
3256   print_ms_time_info("  ", "remarks", _remark_times);
3257   {
3258     print_ms_time_info("     ", "final marks", _remark_mark_times);
3259     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3260 
3261   }
3262   print_ms_time_info("  ", "cleanups", _cleanup_times);
3263   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3264                          _total_counting_time,
3265                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3266                           (double)_cleanup_times.num()
3267                          : 0.0));
3268   if (G1ScrubRemSets) {
3269     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3270                            _total_rs_scrub_time,
3271                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3272                             (double)_cleanup_times.num()
3273                            : 0.0));
3274   }
3275   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3276                          (_init_times.sum() + _remark_times.sum() +
3277                           _cleanup_times.sum())/1000.0);
3278   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3279                 "(%8.2f s marking).",
3280                 cmThread()->vtime_accum(),
3281                 cmThread()->vtime_mark_accum());
3282 }
3283 
3284 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3285   if (use_parallel_marking_threads()) {
3286     _parallel_workers->print_worker_threads_on(st);
3287   }
3288 }
3289 
3290 // We take a break if someone is trying to stop the world.
3291 bool ConcurrentMark::do_yield_check(uint worker_id) {
3292   if (should_yield()) {
3293     if (worker_id == 0) {
3294       _g1h->g1_policy()->record_concurrent_pause();
3295     }
3296     cmThread()->yield();
3297     return true;
3298   } else {
3299     return false;
3300   }
3301 }
3302 
3303 bool ConcurrentMark::should_yield() {
3304   return cmThread()->should_yield();
3305 }
3306 
3307 bool ConcurrentMark::containing_card_is_marked(void* p) {
3308   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3309   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3310 }
3311 
3312 bool ConcurrentMark::containing_cards_are_marked(void* start,
3313                                                  void* last) {
3314   return containing_card_is_marked(start) &&
3315          containing_card_is_marked(last);
3316 }
3317 
3318 #ifndef PRODUCT
3319 // for debugging purposes
3320 void ConcurrentMark::print_finger() {
3321   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3322                          _heap_start, _heap_end, _finger);
3323   for (uint i = 0; i < _max_worker_id; ++i) {
3324     gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
3325   }
3326   gclog_or_tty->print_cr("");
3327 }
3328 #endif
3329 
3330 void CMTask::scan_object(oop obj) {
3331   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3332 
3333   if (_cm->verbose_high()) {
3334     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3335                            _worker_id, (void*) obj);
3336   }
3337 
3338   size_t obj_size = obj->size();
3339   _words_scanned += obj_size;
3340 
3341   obj->oop_iterate(_cm_oop_closure);
3342   statsOnly( ++_objs_scanned );
3343   check_limits();
3344 }
3345 
3346 // Closure for iteration over bitmaps
3347 class CMBitMapClosure : public BitMapClosure {
3348 private:
3349   // the bitmap that is being iterated over
3350   CMBitMap*                   _nextMarkBitMap;
3351   ConcurrentMark*             _cm;
3352   CMTask*                     _task;
3353 
3354 public:
3355   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3356     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3357 
3358   bool do_bit(size_t offset) {
3359     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3360     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3361     assert( addr < _cm->finger(), "invariant");
3362 
3363     statsOnly( _task->increase_objs_found_on_bitmap() );
3364     assert(addr >= _task->finger(), "invariant");
3365 
3366     // We move that task's local finger along.
3367     _task->move_finger_to(addr);
3368 
3369     _task->scan_object(oop(addr));
3370     // we only partially drain the local queue and global stack
3371     _task->drain_local_queue(true);
3372     _task->drain_global_stack(true);
3373 
3374     // if the has_aborted flag has been raised, we need to bail out of
3375     // the iteration
3376     return !_task->has_aborted();
3377   }
3378 };
3379 
3380 // Closure for iterating over objects, currently only used for
3381 // processing SATB buffers.
3382 class CMObjectClosure : public ObjectClosure {
3383 private:
3384   CMTask* _task;
3385 
3386 public:
3387   void do_object(oop obj) {
3388     _task->deal_with_reference(obj);
3389   }
3390 
3391   CMObjectClosure(CMTask* task) : _task(task) { }
3392 };
3393 
3394 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3395                                ConcurrentMark* cm,
3396                                CMTask* task)
3397   : _g1h(g1h), _cm(cm), _task(task) {
3398   assert(_ref_processor == NULL, "should be initialized to NULL");
3399 
3400   if (G1UseConcMarkReferenceProcessing) {
3401     _ref_processor = g1h->ref_processor_cm();
3402     assert(_ref_processor != NULL, "should not be NULL");
3403   }
3404 }
3405 
3406 void CMTask::setup_for_region(HeapRegion* hr) {
3407   // Separated the asserts so that we know which one fires.
3408   assert(hr != NULL,
3409         "claim_region() should have filtered out continues humongous regions");
3410   assert(!hr->continuesHumongous(),
3411         "claim_region() should have filtered out continues humongous regions");
3412 
3413   if (_cm->verbose_low()) {
3414     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3415                            _worker_id, hr);
3416   }
3417 
3418   _curr_region  = hr;
3419   _finger       = hr->bottom();
3420   update_region_limit();
3421 }
3422 
3423 void CMTask::update_region_limit() {
3424   HeapRegion* hr            = _curr_region;
3425   HeapWord* bottom          = hr->bottom();
3426   HeapWord* limit           = hr->next_top_at_mark_start();
3427 
3428   if (limit == bottom) {
3429     if (_cm->verbose_low()) {
3430       gclog_or_tty->print_cr("[%u] found an empty region "
3431                              "["PTR_FORMAT", "PTR_FORMAT")",
3432                              _worker_id, bottom, limit);
3433     }
3434     // The region was collected underneath our feet.
3435     // We set the finger to bottom to ensure that the bitmap
3436     // iteration that will follow this will not do anything.
3437     // (this is not a condition that holds when we set the region up,
3438     // as the region is not supposed to be empty in the first place)
3439     _finger = bottom;
3440   } else if (limit >= _region_limit) {
3441     assert(limit >= _finger, "peace of mind");
3442   } else {
3443     assert(limit < _region_limit, "only way to get here");
3444     // This can happen under some pretty unusual circumstances.  An
3445     // evacuation pause empties the region underneath our feet (NTAMS
3446     // at bottom). We then do some allocation in the region (NTAMS
3447     // stays at bottom), followed by the region being used as a GC
3448     // alloc region (NTAMS will move to top() and the objects
3449     // originally below it will be grayed). All objects now marked in
3450     // the region are explicitly grayed, if below the global finger,
3451     // and we do not need in fact to scan anything else. So, we simply
3452     // set _finger to be limit to ensure that the bitmap iteration
3453     // doesn't do anything.
3454     _finger = limit;
3455   }
3456 
3457   _region_limit = limit;
3458 }
3459 
3460 void CMTask::giveup_current_region() {
3461   assert(_curr_region != NULL, "invariant");
3462   if (_cm->verbose_low()) {
3463     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3464                            _worker_id, _curr_region);
3465   }
3466   clear_region_fields();
3467 }
3468 
3469 void CMTask::clear_region_fields() {
3470   // Values for these three fields that indicate that we're not
3471   // holding on to a region.
3472   _curr_region   = NULL;
3473   _finger        = NULL;
3474   _region_limit  = NULL;
3475 }
3476 
3477 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3478   if (cm_oop_closure == NULL) {
3479     assert(_cm_oop_closure != NULL, "invariant");
3480   } else {
3481     assert(_cm_oop_closure == NULL, "invariant");
3482   }
3483   _cm_oop_closure = cm_oop_closure;
3484 }
3485 
3486 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3487   guarantee(nextMarkBitMap != NULL, "invariant");
3488 
3489   if (_cm->verbose_low()) {
3490     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3491   }
3492 
3493   _nextMarkBitMap                = nextMarkBitMap;
3494   clear_region_fields();
3495 
3496   _calls                         = 0;
3497   _elapsed_time_ms               = 0.0;
3498   _termination_time_ms           = 0.0;
3499   _termination_start_time_ms     = 0.0;
3500 
3501 #if _MARKING_STATS_
3502   _local_pushes                  = 0;
3503   _local_pops                    = 0;
3504   _local_max_size                = 0;
3505   _objs_scanned                  = 0;
3506   _global_pushes                 = 0;
3507   _global_pops                   = 0;
3508   _global_max_size               = 0;
3509   _global_transfers_to           = 0;
3510   _global_transfers_from         = 0;
3511   _regions_claimed               = 0;
3512   _objs_found_on_bitmap          = 0;
3513   _satb_buffers_processed        = 0;
3514   _steal_attempts                = 0;
3515   _steals                        = 0;
3516   _aborted                       = 0;
3517   _aborted_overflow              = 0;
3518   _aborted_cm_aborted            = 0;
3519   _aborted_yield                 = 0;
3520   _aborted_timed_out             = 0;
3521   _aborted_satb                  = 0;
3522   _aborted_termination           = 0;
3523 #endif // _MARKING_STATS_
3524 }
3525 
3526 bool CMTask::should_exit_termination() {
3527   regular_clock_call();
3528   // This is called when we are in the termination protocol. We should
3529   // quit if, for some reason, this task wants to abort or the global
3530   // stack is not empty (this means that we can get work from it).
3531   return !_cm->mark_stack_empty() || has_aborted();
3532 }
3533 
3534 void CMTask::reached_limit() {
3535   assert(_words_scanned >= _words_scanned_limit ||
3536          _refs_reached >= _refs_reached_limit ,
3537          "shouldn't have been called otherwise");
3538   regular_clock_call();
3539 }
3540 
3541 void CMTask::regular_clock_call() {
3542   if (has_aborted()) return;
3543 
3544   // First, we need to recalculate the words scanned and refs reached
3545   // limits for the next clock call.
3546   recalculate_limits();
3547 
3548   // During the regular clock call we do the following
3549 
3550   // (1) If an overflow has been flagged, then we abort.
3551   if (_cm->has_overflown()) {
3552     set_has_aborted();
3553     return;
3554   }
3555 
3556   // If we are not concurrent (i.e. we're doing remark) we don't need
3557   // to check anything else. The other steps are only needed during
3558   // the concurrent marking phase.
3559   if (!concurrent()) return;
3560 
3561   // (2) If marking has been aborted for Full GC, then we also abort.
3562   if (_cm->has_aborted()) {
3563     set_has_aborted();
3564     statsOnly( ++_aborted_cm_aborted );
3565     return;
3566   }
3567 
3568   double curr_time_ms = os::elapsedVTime() * 1000.0;
3569 
3570   // (3) If marking stats are enabled, then we update the step history.
3571 #if _MARKING_STATS_
3572   if (_words_scanned >= _words_scanned_limit) {
3573     ++_clock_due_to_scanning;
3574   }
3575   if (_refs_reached >= _refs_reached_limit) {
3576     ++_clock_due_to_marking;
3577   }
3578 
3579   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3580   _interval_start_time_ms = curr_time_ms;
3581   _all_clock_intervals_ms.add(last_interval_ms);
3582 
3583   if (_cm->verbose_medium()) {
3584       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3585                         "scanned = %d%s, refs reached = %d%s",
3586                         _worker_id, last_interval_ms,
3587                         _words_scanned,
3588                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3589                         _refs_reached,
3590                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3591   }
3592 #endif // _MARKING_STATS_
3593 
3594   // (4) We check whether we should yield. If we have to, then we abort.
3595   if (_cm->should_yield()) {
3596     // We should yield. To do this we abort the task. The caller is
3597     // responsible for yielding.
3598     set_has_aborted();
3599     statsOnly( ++_aborted_yield );
3600     return;
3601   }
3602 
3603   // (5) We check whether we've reached our time quota. If we have,
3604   // then we abort.
3605   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3606   if (elapsed_time_ms > _time_target_ms) {
3607     set_has_aborted();
3608     _has_timed_out = true;
3609     statsOnly( ++_aborted_timed_out );
3610     return;
3611   }
3612 
3613   // (6) Finally, we check whether there are enough completed STAB
3614   // buffers available for processing. If there are, we abort.
3615   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3616   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3617     if (_cm->verbose_low()) {
3618       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3619                              _worker_id);
3620     }
3621     // we do need to process SATB buffers, we'll abort and restart
3622     // the marking task to do so
3623     set_has_aborted();
3624     statsOnly( ++_aborted_satb );
3625     return;
3626   }
3627 }
3628 
3629 void CMTask::recalculate_limits() {
3630   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3631   _words_scanned_limit      = _real_words_scanned_limit;
3632 
3633   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3634   _refs_reached_limit       = _real_refs_reached_limit;
3635 }
3636 
3637 void CMTask::decrease_limits() {
3638   // This is called when we believe that we're going to do an infrequent
3639   // operation which will increase the per byte scanned cost (i.e. move
3640   // entries to/from the global stack). It basically tries to decrease the
3641   // scanning limit so that the clock is called earlier.
3642 
3643   if (_cm->verbose_medium()) {
3644     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3645   }
3646 
3647   _words_scanned_limit = _real_words_scanned_limit -
3648     3 * words_scanned_period / 4;
3649   _refs_reached_limit  = _real_refs_reached_limit -
3650     3 * refs_reached_period / 4;
3651 }
3652 
3653 void CMTask::move_entries_to_global_stack() {
3654   // local array where we'll store the entries that will be popped
3655   // from the local queue
3656   oop buffer[global_stack_transfer_size];
3657 
3658   int n = 0;
3659   oop obj;
3660   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3661     buffer[n] = obj;
3662     ++n;
3663   }
3664 
3665   if (n > 0) {
3666     // we popped at least one entry from the local queue
3667 
3668     statsOnly( ++_global_transfers_to; _local_pops += n );
3669 
3670     if (!_cm->mark_stack_push(buffer, n)) {
3671       if (_cm->verbose_low()) {
3672         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3673                                _worker_id);
3674       }
3675       set_has_aborted();
3676     } else {
3677       // the transfer was successful
3678 
3679       if (_cm->verbose_medium()) {
3680         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3681                                _worker_id, n);
3682       }
3683       statsOnly( int tmp_size = _cm->mark_stack_size();
3684                  if (tmp_size > _global_max_size) {
3685                    _global_max_size = tmp_size;
3686                  }
3687                  _global_pushes += n );
3688     }
3689   }
3690 
3691   // this operation was quite expensive, so decrease the limits
3692   decrease_limits();
3693 }
3694 
3695 void CMTask::get_entries_from_global_stack() {
3696   // local array where we'll store the entries that will be popped
3697   // from the global stack.
3698   oop buffer[global_stack_transfer_size];
3699   int n;
3700   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3701   assert(n <= global_stack_transfer_size,
3702          "we should not pop more than the given limit");
3703   if (n > 0) {
3704     // yes, we did actually pop at least one entry
3705 
3706     statsOnly( ++_global_transfers_from; _global_pops += n );
3707     if (_cm->verbose_medium()) {
3708       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3709                              _worker_id, n);
3710     }
3711     for (int i = 0; i < n; ++i) {
3712       bool success = _task_queue->push(buffer[i]);
3713       // We only call this when the local queue is empty or under a
3714       // given target limit. So, we do not expect this push to fail.
3715       assert(success, "invariant");
3716     }
3717 
3718     statsOnly( int tmp_size = _task_queue->size();
3719                if (tmp_size > _local_max_size) {
3720                  _local_max_size = tmp_size;
3721                }
3722                _local_pushes += n );
3723   }
3724 
3725   // this operation was quite expensive, so decrease the limits
3726   decrease_limits();
3727 }
3728 
3729 void CMTask::drain_local_queue(bool partially) {
3730   if (has_aborted()) return;
3731 
3732   // Decide what the target size is, depending whether we're going to
3733   // drain it partially (so that other tasks can steal if they run out
3734   // of things to do) or totally (at the very end).
3735   size_t target_size;
3736   if (partially) {
3737     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3738   } else {
3739     target_size = 0;
3740   }
3741 
3742   if (_task_queue->size() > target_size) {
3743     if (_cm->verbose_high()) {
3744       gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3745                              _worker_id, target_size);
3746     }
3747 
3748     oop obj;
3749     bool ret = _task_queue->pop_local(obj);
3750     while (ret) {
3751       statsOnly( ++_local_pops );
3752 
3753       if (_cm->verbose_high()) {
3754         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3755                                (void*) obj);
3756       }
3757 
3758       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3759       assert(!_g1h->is_on_master_free_list(
3760                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3761 
3762       scan_object(obj);
3763 
3764       if (_task_queue->size() <= target_size || has_aborted()) {
3765         ret = false;
3766       } else {
3767         ret = _task_queue->pop_local(obj);
3768       }
3769     }
3770 
3771     if (_cm->verbose_high()) {
3772       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3773                              _worker_id, _task_queue->size());
3774     }
3775   }
3776 }
3777 
3778 void CMTask::drain_global_stack(bool partially) {
3779   if (has_aborted()) return;
3780 
3781   // We have a policy to drain the local queue before we attempt to
3782   // drain the global stack.
3783   assert(partially || _task_queue->size() == 0, "invariant");
3784 
3785   // Decide what the target size is, depending whether we're going to
3786   // drain it partially (so that other tasks can steal if they run out
3787   // of things to do) or totally (at the very end).  Notice that,
3788   // because we move entries from the global stack in chunks or
3789   // because another task might be doing the same, we might in fact
3790   // drop below the target. But, this is not a problem.
3791   size_t target_size;
3792   if (partially) {
3793     target_size = _cm->partial_mark_stack_size_target();
3794   } else {
3795     target_size = 0;
3796   }
3797 
3798   if (_cm->mark_stack_size() > target_size) {
3799     if (_cm->verbose_low()) {
3800       gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3801                              _worker_id, target_size);
3802     }
3803 
3804     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3805       get_entries_from_global_stack();
3806       drain_local_queue(partially);
3807     }
3808 
3809     if (_cm->verbose_low()) {
3810       gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3811                              _worker_id, _cm->mark_stack_size());
3812     }
3813   }
3814 }
3815 
3816 // SATB Queue has several assumptions on whether to call the par or
3817 // non-par versions of the methods. this is why some of the code is
3818 // replicated. We should really get rid of the single-threaded version
3819 // of the code to simplify things.
3820 void CMTask::drain_satb_buffers() {
3821   if (has_aborted()) return;
3822 
3823   // We set this so that the regular clock knows that we're in the
3824   // middle of draining buffers and doesn't set the abort flag when it
3825   // notices that SATB buffers are available for draining. It'd be
3826   // very counter productive if it did that. :-)
3827   _draining_satb_buffers = true;
3828 
3829   CMObjectClosure oc(this);
3830   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3831   if (G1CollectedHeap::use_parallel_gc_threads()) {
3832     satb_mq_set.set_par_closure(_worker_id, &oc);
3833   } else {
3834     satb_mq_set.set_closure(&oc);
3835   }
3836 
3837   // This keeps claiming and applying the closure to completed buffers
3838   // until we run out of buffers or we need to abort.
3839   if (G1CollectedHeap::use_parallel_gc_threads()) {
3840     while (!has_aborted() &&
3841            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3842       if (_cm->verbose_medium()) {
3843         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3844       }
3845       statsOnly( ++_satb_buffers_processed );
3846       regular_clock_call();
3847     }
3848   } else {
3849     while (!has_aborted() &&
3850            satb_mq_set.apply_closure_to_completed_buffer()) {
3851       if (_cm->verbose_medium()) {
3852         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3853       }
3854       statsOnly( ++_satb_buffers_processed );
3855       regular_clock_call();
3856     }
3857   }
3858 
3859   if (!concurrent() && !has_aborted()) {
3860     // We should only do this during remark.
3861     if (G1CollectedHeap::use_parallel_gc_threads()) {
3862       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3863     } else {
3864       satb_mq_set.iterate_closure_all_threads();
3865     }
3866   }
3867 
3868   _draining_satb_buffers = false;
3869 
3870   assert(has_aborted() ||
3871          concurrent() ||
3872          satb_mq_set.completed_buffers_num() == 0, "invariant");
3873 
3874   if (G1CollectedHeap::use_parallel_gc_threads()) {
3875     satb_mq_set.set_par_closure(_worker_id, NULL);
3876   } else {
3877     satb_mq_set.set_closure(NULL);
3878   }
3879 
3880   // again, this was a potentially expensive operation, decrease the
3881   // limits to get the regular clock call early
3882   decrease_limits();
3883 }
3884 
3885 void CMTask::print_stats() {
3886   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3887                          _worker_id, _calls);
3888   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3889                          _elapsed_time_ms, _termination_time_ms);
3890   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3891                          _step_times_ms.num(), _step_times_ms.avg(),
3892                          _step_times_ms.sd());
3893   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3894                          _step_times_ms.maximum(), _step_times_ms.sum());
3895 
3896 #if _MARKING_STATS_
3897   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3898                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3899                          _all_clock_intervals_ms.sd());
3900   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3901                          _all_clock_intervals_ms.maximum(),
3902                          _all_clock_intervals_ms.sum());
3903   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3904                          _clock_due_to_scanning, _clock_due_to_marking);
3905   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3906                          _objs_scanned, _objs_found_on_bitmap);
3907   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3908                          _local_pushes, _local_pops, _local_max_size);
3909   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3910                          _global_pushes, _global_pops, _global_max_size);
3911   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3912                          _global_transfers_to,_global_transfers_from);
3913   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3914   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3915   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3916                          _steal_attempts, _steals);
3917   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3918   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3919                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3920   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3921                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3922 #endif // _MARKING_STATS_
3923 }
3924 
3925 /*****************************************************************************
3926 
3927     The do_marking_step(time_target_ms, ...) method is the building
3928     block of the parallel marking framework. It can be called in parallel
3929     with other invocations of do_marking_step() on different tasks
3930     (but only one per task, obviously) and concurrently with the
3931     mutator threads, or during remark, hence it eliminates the need
3932     for two versions of the code. When called during remark, it will
3933     pick up from where the task left off during the concurrent marking
3934     phase. Interestingly, tasks are also claimable during evacuation
3935     pauses too, since do_marking_step() ensures that it aborts before
3936     it needs to yield.
3937 
3938     The data structures that it uses to do marking work are the
3939     following:
3940 
3941       (1) Marking Bitmap. If there are gray objects that appear only
3942       on the bitmap (this happens either when dealing with an overflow
3943       or when the initial marking phase has simply marked the roots
3944       and didn't push them on the stack), then tasks claim heap
3945       regions whose bitmap they then scan to find gray objects. A
3946       global finger indicates where the end of the last claimed region
3947       is. A local finger indicates how far into the region a task has
3948       scanned. The two fingers are used to determine how to gray an
3949       object (i.e. whether simply marking it is OK, as it will be
3950       visited by a task in the future, or whether it needs to be also
3951       pushed on a stack).
3952 
3953       (2) Local Queue. The local queue of the task which is accessed
3954       reasonably efficiently by the task. Other tasks can steal from
3955       it when they run out of work. Throughout the marking phase, a
3956       task attempts to keep its local queue short but not totally
3957       empty, so that entries are available for stealing by other
3958       tasks. Only when there is no more work, a task will totally
3959       drain its local queue.
3960 
3961       (3) Global Mark Stack. This handles local queue overflow. During
3962       marking only sets of entries are moved between it and the local
3963       queues, as access to it requires a mutex and more fine-grain
3964       interaction with it which might cause contention. If it
3965       overflows, then the marking phase should restart and iterate
3966       over the bitmap to identify gray objects. Throughout the marking
3967       phase, tasks attempt to keep the global mark stack at a small
3968       length but not totally empty, so that entries are available for
3969       popping by other tasks. Only when there is no more work, tasks
3970       will totally drain the global mark stack.
3971 
3972       (4) SATB Buffer Queue. This is where completed SATB buffers are
3973       made available. Buffers are regularly removed from this queue
3974       and scanned for roots, so that the queue doesn't get too
3975       long. During remark, all completed buffers are processed, as
3976       well as the filled in parts of any uncompleted buffers.
3977 
3978     The do_marking_step() method tries to abort when the time target
3979     has been reached. There are a few other cases when the
3980     do_marking_step() method also aborts:
3981 
3982       (1) When the marking phase has been aborted (after a Full GC).
3983 
3984       (2) When a global overflow (on the global stack) has been
3985       triggered. Before the task aborts, it will actually sync up with
3986       the other tasks to ensure that all the marking data structures
3987       (local queues, stacks, fingers etc.)  are re-initialized so that
3988       when do_marking_step() completes, the marking phase can
3989       immediately restart.
3990 
3991       (3) When enough completed SATB buffers are available. The
3992       do_marking_step() method only tries to drain SATB buffers right
3993       at the beginning. So, if enough buffers are available, the
3994       marking step aborts and the SATB buffers are processed at
3995       the beginning of the next invocation.
3996 
3997       (4) To yield. when we have to yield then we abort and yield
3998       right at the end of do_marking_step(). This saves us from a lot
3999       of hassle as, by yielding we might allow a Full GC. If this
4000       happens then objects will be compacted underneath our feet, the
4001       heap might shrink, etc. We save checking for this by just
4002       aborting and doing the yield right at the end.
4003 
4004     From the above it follows that the do_marking_step() method should
4005     be called in a loop (or, otherwise, regularly) until it completes.
4006 
4007     If a marking step completes without its has_aborted() flag being
4008     true, it means it has completed the current marking phase (and
4009     also all other marking tasks have done so and have all synced up).
4010 
4011     A method called regular_clock_call() is invoked "regularly" (in
4012     sub ms intervals) throughout marking. It is this clock method that
4013     checks all the abort conditions which were mentioned above and
4014     decides when the task should abort. A work-based scheme is used to
4015     trigger this clock method: when the number of object words the
4016     marking phase has scanned or the number of references the marking
4017     phase has visited reach a given limit. Additional invocations to
4018     the method clock have been planted in a few other strategic places
4019     too. The initial reason for the clock method was to avoid calling
4020     vtime too regularly, as it is quite expensive. So, once it was in
4021     place, it was natural to piggy-back all the other conditions on it
4022     too and not constantly check them throughout the code.
4023 
4024     If do_stealing is true then do_marking_step will attempt to steal
4025     work from the other CMTasks. It only makes sense to enable
4026     stealing when being called by multiple threads.
4027 
4028     If do_termination is true then do_marking_step will enter its
4029     termination protocol.
4030 
4031     The value of is_serial must be true when do_marking_step is being
4032     called serially (i.e. by the VMThread) and do_marking_step should
4033     skip any synchronization in the termination and overflow code.
4034     Examples include the serial remark code and the serial reference
4035     processing closures.
4036 
4037     The value of is_serial must be false when do_marking_step is
4038     being called by any of the worker threads in a work gang.
4039     Examples include the concurrent marking code (CMMarkingTask),
4040     the MT remark code, and the MT reference processing closures.
4041 
4042  *****************************************************************************/
4043 
4044 void CMTask::do_marking_step(double time_target_ms,
4045                              bool do_stealing,
4046                              bool do_termination,
4047                              bool is_serial) {
4048   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4049   assert(concurrent() == _cm->concurrent(), "they should be the same");
4050 
4051   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4052   assert(_task_queues != NULL, "invariant");
4053   assert(_task_queue != NULL, "invariant");
4054   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4055 
4056   assert(!_claimed,
4057          "only one thread should claim this task at any one time");
4058 
4059   // OK, this doesn't safeguard again all possible scenarios, as it is
4060   // possible for two threads to set the _claimed flag at the same
4061   // time. But it is only for debugging purposes anyway and it will
4062   // catch most problems.
4063   _claimed = true;
4064 
4065   _start_time_ms = os::elapsedVTime() * 1000.0;
4066   statsOnly( _interval_start_time_ms = _start_time_ms );
4067 
4068   double diff_prediction_ms =
4069     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4070   _time_target_ms = time_target_ms - diff_prediction_ms;
4071 
4072   // set up the variables that are used in the work-based scheme to
4073   // call the regular clock method
4074   _words_scanned = 0;
4075   _refs_reached  = 0;
4076   recalculate_limits();
4077 
4078   // clear all flags
4079   clear_has_aborted();
4080   _has_timed_out = false;
4081   _draining_satb_buffers = false;
4082 
4083   ++_calls;
4084 
4085   if (_cm->verbose_low()) {
4086     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4087                            "target = %1.2lfms >>>>>>>>>>",
4088                            _worker_id, _calls, _time_target_ms);
4089   }
4090 
4091   // Set up the bitmap and oop closures. Anything that uses them is
4092   // eventually called from this method, so it is OK to allocate these
4093   // statically.
4094   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4095   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4096   set_cm_oop_closure(&cm_oop_closure);
4097 
4098   if (_cm->has_overflown()) {
4099     // This can happen if the mark stack overflows during a GC pause
4100     // and this task, after a yield point, restarts. We have to abort
4101     // as we need to get into the overflow protocol which happens
4102     // right at the end of this task.
4103     set_has_aborted();
4104   }
4105 
4106   // First drain any available SATB buffers. After this, we will not
4107   // look at SATB buffers before the next invocation of this method.
4108   // If enough completed SATB buffers are queued up, the regular clock
4109   // will abort this task so that it restarts.
4110   drain_satb_buffers();
4111   // ...then partially drain the local queue and the global stack
4112   drain_local_queue(true);
4113   drain_global_stack(true);
4114 
4115   do {
4116     if (!has_aborted() && _curr_region != NULL) {
4117       // This means that we're already holding on to a region.
4118       assert(_finger != NULL, "if region is not NULL, then the finger "
4119              "should not be NULL either");
4120 
4121       // We might have restarted this task after an evacuation pause
4122       // which might have evacuated the region we're holding on to
4123       // underneath our feet. Let's read its limit again to make sure
4124       // that we do not iterate over a region of the heap that
4125       // contains garbage (update_region_limit() will also move
4126       // _finger to the start of the region if it is found empty).
4127       update_region_limit();
4128       // We will start from _finger not from the start of the region,
4129       // as we might be restarting this task after aborting half-way
4130       // through scanning this region. In this case, _finger points to
4131       // the address where we last found a marked object. If this is a
4132       // fresh region, _finger points to start().
4133       MemRegion mr = MemRegion(_finger, _region_limit);
4134 
4135       if (_cm->verbose_low()) {
4136         gclog_or_tty->print_cr("[%u] we're scanning part "
4137                                "["PTR_FORMAT", "PTR_FORMAT") "
4138                                "of region "HR_FORMAT,
4139                                _worker_id, _finger, _region_limit,
4140                                HR_FORMAT_PARAMS(_curr_region));
4141       }
4142 
4143       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4144              "humongous regions should go around loop once only");
4145 
4146       // Some special cases:
4147       // If the memory region is empty, we can just give up the region.
4148       // If the current region is humongous then we only need to check
4149       // the bitmap for the bit associated with the start of the object,
4150       // scan the object if it's live, and give up the region.
4151       // Otherwise, let's iterate over the bitmap of the part of the region
4152       // that is left.
4153       // If the iteration is successful, give up the region.
4154       if (mr.is_empty()) {
4155         giveup_current_region();
4156         regular_clock_call();
4157       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4158         if (_nextMarkBitMap->isMarked(mr.start())) {
4159           // The object is marked - apply the closure
4160           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4161           bitmap_closure.do_bit(offset);
4162         }
4163         // Even if this task aborted while scanning the humongous object
4164         // we can (and should) give up the current region.
4165         giveup_current_region();
4166         regular_clock_call();
4167       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4168         giveup_current_region();
4169         regular_clock_call();
4170       } else {
4171         assert(has_aborted(), "currently the only way to do so");
4172         // The only way to abort the bitmap iteration is to return
4173         // false from the do_bit() method. However, inside the
4174         // do_bit() method we move the _finger to point to the
4175         // object currently being looked at. So, if we bail out, we
4176         // have definitely set _finger to something non-null.
4177         assert(_finger != NULL, "invariant");
4178 
4179         // Region iteration was actually aborted. So now _finger
4180         // points to the address of the object we last scanned. If we
4181         // leave it there, when we restart this task, we will rescan
4182         // the object. It is easy to avoid this. We move the finger by
4183         // enough to point to the next possible object header (the
4184         // bitmap knows by how much we need to move it as it knows its
4185         // granularity).
4186         assert(_finger < _region_limit, "invariant");
4187         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4188         // Check if bitmap iteration was aborted while scanning the last object
4189         if (new_finger >= _region_limit) {
4190           giveup_current_region();
4191         } else {
4192           move_finger_to(new_finger);
4193         }
4194       }
4195     }
4196     // At this point we have either completed iterating over the
4197     // region we were holding on to, or we have aborted.
4198 
4199     // We then partially drain the local queue and the global stack.
4200     // (Do we really need this?)
4201     drain_local_queue(true);
4202     drain_global_stack(true);
4203 
4204     // Read the note on the claim_region() method on why it might
4205     // return NULL with potentially more regions available for
4206     // claiming and why we have to check out_of_regions() to determine
4207     // whether we're done or not.
4208     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4209       // We are going to try to claim a new region. We should have
4210       // given up on the previous one.
4211       // Separated the asserts so that we know which one fires.
4212       assert(_curr_region  == NULL, "invariant");
4213       assert(_finger       == NULL, "invariant");
4214       assert(_region_limit == NULL, "invariant");
4215       if (_cm->verbose_low()) {
4216         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4217       }
4218       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4219       if (claimed_region != NULL) {
4220         // Yes, we managed to claim one
4221         statsOnly( ++_regions_claimed );
4222 
4223         if (_cm->verbose_low()) {
4224           gclog_or_tty->print_cr("[%u] we successfully claimed "
4225                                  "region "PTR_FORMAT,
4226                                  _worker_id, claimed_region);
4227         }
4228 
4229         setup_for_region(claimed_region);
4230         assert(_curr_region == claimed_region, "invariant");
4231       }
4232       // It is important to call the regular clock here. It might take
4233       // a while to claim a region if, for example, we hit a large
4234       // block of empty regions. So we need to call the regular clock
4235       // method once round the loop to make sure it's called
4236       // frequently enough.
4237       regular_clock_call();
4238     }
4239 
4240     if (!has_aborted() && _curr_region == NULL) {
4241       assert(_cm->out_of_regions(),
4242              "at this point we should be out of regions");
4243     }
4244   } while ( _curr_region != NULL && !has_aborted());
4245 
4246   if (!has_aborted()) {
4247     // We cannot check whether the global stack is empty, since other
4248     // tasks might be pushing objects to it concurrently.
4249     assert(_cm->out_of_regions(),
4250            "at this point we should be out of regions");
4251 
4252     if (_cm->verbose_low()) {
4253       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4254     }
4255 
4256     // Try to reduce the number of available SATB buffers so that
4257     // remark has less work to do.
4258     drain_satb_buffers();
4259   }
4260 
4261   // Since we've done everything else, we can now totally drain the
4262   // local queue and global stack.
4263   drain_local_queue(false);
4264   drain_global_stack(false);
4265 
4266   // Attempt at work stealing from other task's queues.
4267   if (do_stealing && !has_aborted()) {
4268     // We have not aborted. This means that we have finished all that
4269     // we could. Let's try to do some stealing...
4270 
4271     // We cannot check whether the global stack is empty, since other
4272     // tasks might be pushing objects to it concurrently.
4273     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4274            "only way to reach here");
4275 
4276     if (_cm->verbose_low()) {
4277       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4278     }
4279 
4280     while (!has_aborted()) {
4281       oop obj;
4282       statsOnly( ++_steal_attempts );
4283 
4284       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4285         if (_cm->verbose_medium()) {
4286           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4287                                  _worker_id, (void*) obj);
4288         }
4289 
4290         statsOnly( ++_steals );
4291 
4292         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4293                "any stolen object should be marked");
4294         scan_object(obj);
4295 
4296         // And since we're towards the end, let's totally drain the
4297         // local queue and global stack.
4298         drain_local_queue(false);
4299         drain_global_stack(false);
4300       } else {
4301         break;
4302       }
4303     }
4304   }
4305 
4306   // If we are about to wrap up and go into termination, check if we
4307   // should raise the overflow flag.
4308   if (do_termination && !has_aborted()) {
4309     if (_cm->force_overflow()->should_force()) {
4310       _cm->set_has_overflown();
4311       regular_clock_call();
4312     }
4313   }
4314 
4315   // We still haven't aborted. Now, let's try to get into the
4316   // termination protocol.
4317   if (do_termination && !has_aborted()) {
4318     // We cannot check whether the global stack is empty, since other
4319     // tasks might be concurrently pushing objects on it.
4320     // Separated the asserts so that we know which one fires.
4321     assert(_cm->out_of_regions(), "only way to reach here");
4322     assert(_task_queue->size() == 0, "only way to reach here");
4323 
4324     if (_cm->verbose_low()) {
4325       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4326     }
4327 
4328     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4329 
4330     // The CMTask class also extends the TerminatorTerminator class,
4331     // hence its should_exit_termination() method will also decide
4332     // whether to exit the termination protocol or not.
4333     bool finished = (is_serial ||
4334                      _cm->terminator()->offer_termination(this));
4335     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4336     _termination_time_ms +=
4337       termination_end_time_ms - _termination_start_time_ms;
4338 
4339     if (finished) {
4340       // We're all done.
4341 
4342       if (_worker_id == 0) {
4343         // let's allow task 0 to do this
4344         if (concurrent()) {
4345           assert(_cm->concurrent_marking_in_progress(), "invariant");
4346           // we need to set this to false before the next
4347           // safepoint. This way we ensure that the marking phase
4348           // doesn't observe any more heap expansions.
4349           _cm->clear_concurrent_marking_in_progress();
4350         }
4351       }
4352 
4353       // We can now guarantee that the global stack is empty, since
4354       // all other tasks have finished. We separated the guarantees so
4355       // that, if a condition is false, we can immediately find out
4356       // which one.
4357       guarantee(_cm->out_of_regions(), "only way to reach here");
4358       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4359       guarantee(_task_queue->size() == 0, "only way to reach here");
4360       guarantee(!_cm->has_overflown(), "only way to reach here");
4361       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4362 
4363       if (_cm->verbose_low()) {
4364         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4365       }
4366     } else {
4367       // Apparently there's more work to do. Let's abort this task. It
4368       // will restart it and we can hopefully find more things to do.
4369 
4370       if (_cm->verbose_low()) {
4371         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4372                                _worker_id);
4373       }
4374 
4375       set_has_aborted();
4376       statsOnly( ++_aborted_termination );
4377     }
4378   }
4379 
4380   // Mainly for debugging purposes to make sure that a pointer to the
4381   // closure which was statically allocated in this frame doesn't
4382   // escape it by accident.
4383   set_cm_oop_closure(NULL);
4384   double end_time_ms = os::elapsedVTime() * 1000.0;
4385   double elapsed_time_ms = end_time_ms - _start_time_ms;
4386   // Update the step history.
4387   _step_times_ms.add(elapsed_time_ms);
4388 
4389   if (has_aborted()) {
4390     // The task was aborted for some reason.
4391 
4392     statsOnly( ++_aborted );
4393 
4394     if (_has_timed_out) {
4395       double diff_ms = elapsed_time_ms - _time_target_ms;
4396       // Keep statistics of how well we did with respect to hitting
4397       // our target only if we actually timed out (if we aborted for
4398       // other reasons, then the results might get skewed).
4399       _marking_step_diffs_ms.add(diff_ms);
4400     }
4401 
4402     if (_cm->has_overflown()) {
4403       // This is the interesting one. We aborted because a global
4404       // overflow was raised. This means we have to restart the
4405       // marking phase and start iterating over regions. However, in
4406       // order to do this we have to make sure that all tasks stop
4407       // what they are doing and re-initialise in a safe manner. We
4408       // will achieve this with the use of two barrier sync points.
4409 
4410       if (_cm->verbose_low()) {
4411         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4412       }
4413 
4414       if (!is_serial) {
4415         // We only need to enter the sync barrier if being called
4416         // from a parallel context
4417         _cm->enter_first_sync_barrier(_worker_id);
4418         
4419         // When we exit this sync barrier we know that all tasks have
4420         // stopped doing marking work. So, it's now safe to
4421         // re-initialise our data structures. At the end of this method,
4422         // task 0 will clear the global data structures.
4423       }
4424 
4425       statsOnly( ++_aborted_overflow );
4426 
4427       // We clear the local state of this task...
4428       clear_region_fields();
4429 
4430       if (!is_serial) {
4431         // ...and enter the second barrier.
4432         _cm->enter_second_sync_barrier(_worker_id);
4433       }
4434       // At this point, if we're during the concurrent phase of
4435       // marking, everything has been re-initialized and we're
4436       // ready to restart.
4437     }
4438 
4439     if (_cm->verbose_low()) {
4440       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4441                              "elapsed = %1.2lfms <<<<<<<<<<",
4442                              _worker_id, _time_target_ms, elapsed_time_ms);
4443       if (_cm->has_aborted()) {
4444         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4445                                _worker_id);
4446       }
4447     }
4448   } else {
4449     if (_cm->verbose_low()) {
4450       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4451                              "elapsed = %1.2lfms <<<<<<<<<<",
4452                              _worker_id, _time_target_ms, elapsed_time_ms);
4453     }
4454   }
4455 
4456   _claimed = false;
4457 }
4458 
4459 CMTask::CMTask(uint worker_id,
4460                ConcurrentMark* cm,
4461                size_t* marked_bytes,
4462                BitMap* card_bm,
4463                CMTaskQueue* task_queue,
4464                CMTaskQueueSet* task_queues)
4465   : _g1h(G1CollectedHeap::heap()),
4466     _worker_id(worker_id), _cm(cm),
4467     _claimed(false),
4468     _nextMarkBitMap(NULL), _hash_seed(17),
4469     _task_queue(task_queue),
4470     _task_queues(task_queues),
4471     _cm_oop_closure(NULL),
4472     _marked_bytes_array(marked_bytes),
4473     _card_bm(card_bm) {
4474   guarantee(task_queue != NULL, "invariant");
4475   guarantee(task_queues != NULL, "invariant");
4476 
4477   statsOnly( _clock_due_to_scanning = 0;
4478              _clock_due_to_marking  = 0 );
4479 
4480   _marking_step_diffs_ms.add(0.5);
4481 }
4482 
4483 // These are formatting macros that are used below to ensure
4484 // consistent formatting. The *_H_* versions are used to format the
4485 // header for a particular value and they should be kept consistent
4486 // with the corresponding macro. Also note that most of the macros add
4487 // the necessary white space (as a prefix) which makes them a bit
4488 // easier to compose.
4489 
4490 // All the output lines are prefixed with this string to be able to
4491 // identify them easily in a large log file.
4492 #define G1PPRL_LINE_PREFIX            "###"
4493 
4494 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4495 #ifdef _LP64
4496 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4497 #else // _LP64
4498 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4499 #endif // _LP64
4500 
4501 // For per-region info
4502 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4503 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4504 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4505 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4506 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4507 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4508 
4509 // For summary info
4510 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4511 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4512 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4513 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4514 
4515 G1PrintRegionLivenessInfoClosure::
4516 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4517   : _out(out),
4518     _total_used_bytes(0), _total_capacity_bytes(0),
4519     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4520     _hum_used_bytes(0), _hum_capacity_bytes(0),
4521     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4522   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4523   MemRegion g1_committed = g1h->g1_committed();
4524   MemRegion g1_reserved = g1h->g1_reserved();
4525   double now = os::elapsedTime();
4526 
4527   // Print the header of the output.
4528   _out->cr();
4529   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4530   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4531                  G1PPRL_SUM_ADDR_FORMAT("committed")
4532                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4533                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4534                  g1_committed.start(), g1_committed.end(),
4535                  g1_reserved.start(), g1_reserved.end(),
4536                  HeapRegion::GrainBytes);
4537   _out->print_cr(G1PPRL_LINE_PREFIX);
4538   _out->print_cr(G1PPRL_LINE_PREFIX
4539                  G1PPRL_TYPE_H_FORMAT
4540                  G1PPRL_ADDR_BASE_H_FORMAT
4541                  G1PPRL_BYTE_H_FORMAT
4542                  G1PPRL_BYTE_H_FORMAT
4543                  G1PPRL_BYTE_H_FORMAT
4544                  G1PPRL_DOUBLE_H_FORMAT,
4545                  "type", "address-range",
4546                  "used", "prev-live", "next-live", "gc-eff");
4547   _out->print_cr(G1PPRL_LINE_PREFIX
4548                  G1PPRL_TYPE_H_FORMAT
4549                  G1PPRL_ADDR_BASE_H_FORMAT
4550                  G1PPRL_BYTE_H_FORMAT
4551                  G1PPRL_BYTE_H_FORMAT
4552                  G1PPRL_BYTE_H_FORMAT
4553                  G1PPRL_DOUBLE_H_FORMAT,
4554                  "", "",
4555                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4556 }
4557 
4558 // It takes as a parameter a reference to one of the _hum_* fields, it
4559 // deduces the corresponding value for a region in a humongous region
4560 // series (either the region size, or what's left if the _hum_* field
4561 // is < the region size), and updates the _hum_* field accordingly.
4562 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4563   size_t bytes = 0;
4564   // The > 0 check is to deal with the prev and next live bytes which
4565   // could be 0.
4566   if (*hum_bytes > 0) {
4567     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4568     *hum_bytes -= bytes;
4569   }
4570   return bytes;
4571 }
4572 
4573 // It deduces the values for a region in a humongous region series
4574 // from the _hum_* fields and updates those accordingly. It assumes
4575 // that that _hum_* fields have already been set up from the "starts
4576 // humongous" region and we visit the regions in address order.
4577 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4578                                                      size_t* capacity_bytes,
4579                                                      size_t* prev_live_bytes,
4580                                                      size_t* next_live_bytes) {
4581   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4582   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4583   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4584   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4585   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4586 }
4587 
4588 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4589   const char* type = "";
4590   HeapWord* bottom       = r->bottom();
4591   HeapWord* end          = r->end();
4592   size_t capacity_bytes  = r->capacity();
4593   size_t used_bytes      = r->used();
4594   size_t prev_live_bytes = r->live_bytes();
4595   size_t next_live_bytes = r->next_live_bytes();
4596   double gc_eff          = r->gc_efficiency();
4597   if (r->used() == 0) {
4598     type = "FREE";
4599   } else if (r->is_survivor()) {
4600     type = "SURV";
4601   } else if (r->is_young()) {
4602     type = "EDEN";
4603   } else if (r->startsHumongous()) {
4604     type = "HUMS";
4605 
4606     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4607            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4608            "they should have been zeroed after the last time we used them");
4609     // Set up the _hum_* fields.
4610     _hum_capacity_bytes  = capacity_bytes;
4611     _hum_used_bytes      = used_bytes;
4612     _hum_prev_live_bytes = prev_live_bytes;
4613     _hum_next_live_bytes = next_live_bytes;
4614     get_hum_bytes(&used_bytes, &capacity_bytes,
4615                   &prev_live_bytes, &next_live_bytes);
4616     end = bottom + HeapRegion::GrainWords;
4617   } else if (r->continuesHumongous()) {
4618     type = "HUMC";
4619     get_hum_bytes(&used_bytes, &capacity_bytes,
4620                   &prev_live_bytes, &next_live_bytes);
4621     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4622   } else {
4623     type = "OLD";
4624   }
4625 
4626   _total_used_bytes      += used_bytes;
4627   _total_capacity_bytes  += capacity_bytes;
4628   _total_prev_live_bytes += prev_live_bytes;
4629   _total_next_live_bytes += next_live_bytes;
4630 
4631   // Print a line for this particular region.
4632   _out->print_cr(G1PPRL_LINE_PREFIX
4633                  G1PPRL_TYPE_FORMAT
4634                  G1PPRL_ADDR_BASE_FORMAT
4635                  G1PPRL_BYTE_FORMAT
4636                  G1PPRL_BYTE_FORMAT
4637                  G1PPRL_BYTE_FORMAT
4638                  G1PPRL_DOUBLE_FORMAT,
4639                  type, bottom, end,
4640                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4641 
4642   return false;
4643 }
4644 
4645 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4646   // Print the footer of the output.
4647   _out->print_cr(G1PPRL_LINE_PREFIX);
4648   _out->print_cr(G1PPRL_LINE_PREFIX
4649                  " SUMMARY"
4650                  G1PPRL_SUM_MB_FORMAT("capacity")
4651                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4652                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4653                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4654                  bytes_to_mb(_total_capacity_bytes),
4655                  bytes_to_mb(_total_used_bytes),
4656                  perc(_total_used_bytes, _total_capacity_bytes),
4657                  bytes_to_mb(_total_prev_live_bytes),
4658                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4659                  bytes_to_mb(_total_next_live_bytes),
4660                  perc(_total_next_live_bytes, _total_capacity_bytes));
4661   _out->cr();
4662 }