New src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "services/memTracker.hpp"
  49 
  50 // Concurrent marking bit map wrapper
  51 
  52 CMBitMapRO::CMBitMapRO(int shifter) :
  53   _bm(),
  54   _shifter(shifter) {
  55   _bmStartWord = 0;
  56   _bmWordSize = 0;
  57 }
  58 
  59 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  60                                                HeapWord* limit) const {
  61   // First we must round addr *up* to a possible object boundary.
  62   addr = (HeapWord*)align_size_up((intptr_t)addr,
  63                                   HeapWordSize << _shifter);
  64   size_t addrOffset = heapWordToOffset(addr);
  65   if (limit == NULL) {
  66     limit = _bmStartWord + _bmWordSize;
  67   }
  68   size_t limitOffset = heapWordToOffset(limit);
  69   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  70   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  71   assert(nextAddr >= addr, "get_next_one postcondition");
  72   assert(nextAddr == limit || isMarked(nextAddr),
  73          "get_next_one postcondition");
  74   return nextAddr;
  75 }
  76 
  77 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  78                                                  HeapWord* limit) const {
  79   size_t addrOffset = heapWordToOffset(addr);
  80   if (limit == NULL) {
  81     limit = _bmStartWord + _bmWordSize;
  82   }
  83   size_t limitOffset = heapWordToOffset(limit);
  84   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  85   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  86   assert(nextAddr >= addr, "get_next_one postcondition");
  87   assert(nextAddr == limit || !isMarked(nextAddr),
  88          "get_next_one postcondition");
  89   return nextAddr;
  90 }
  91 
  92 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  93   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  94   return (int) (diff >> _shifter);
  95 }
  96 
  97 #ifndef PRODUCT
  98 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
  99   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 100   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 101          "size inconsistency");
 102   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 103          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 104 }
 105 #endif
 106 
 107 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 108   _bm.print_on_error(st, prefix);
 109 }
 110 
 111 bool CMBitMap::allocate(ReservedSpace heap_rs) {
 112   _bmStartWord = (HeapWord*)(heap_rs.base());
 113   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 114   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 115                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 116   if (!brs.is_reserved()) {
 117     warning("ConcurrentMark marking bit map allocation failure");
 118     return false;
 119   }
 120   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 121   // For now we'll just commit all of the bit map up front.
 122   // Later on we'll try to be more parsimonious with swap.
 123   if (!_virtual_space.initialize(brs, brs.size())) {
 124     warning("ConcurrentMark marking bit map backing store failure");
 125     return false;
 126   }
 127   assert(_virtual_space.committed_size() == brs.size(),
 128          "didn't reserve backing store for all of concurrent marking bit map?");
 129   _bm.set_map((uintptr_t*)_virtual_space.low());
 130   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 131          _bmWordSize, "inconsistency in bit map sizing");
 132   _bm.set_size(_bmWordSize >> _shifter);
 133   return true;
 134 }
 135 
 136 void CMBitMap::clearAll() {
 137   _bm.clear();
 138   return;
 139 }
 140 
 141 void CMBitMap::markRange(MemRegion mr) {
 142   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 143   assert(!mr.is_empty(), "unexpected empty region");
 144   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 145           ((HeapWord *) mr.end())),
 146          "markRange memory region end is not card aligned");
 147   // convert address range into offset range
 148   _bm.at_put_range(heapWordToOffset(mr.start()),
 149                    heapWordToOffset(mr.end()), true);
 150 }
 151 
 152 void CMBitMap::clearRange(MemRegion mr) {
 153   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 154   assert(!mr.is_empty(), "unexpected empty region");
 155   // convert address range into offset range
 156   _bm.at_put_range(heapWordToOffset(mr.start()),
 157                    heapWordToOffset(mr.end()), false);
 158 }
 159 
 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 161                                             HeapWord* end_addr) {
 162   HeapWord* start = getNextMarkedWordAddress(addr);
 163   start = MIN2(start, end_addr);
 164   HeapWord* end   = getNextUnmarkedWordAddress(start);
 165   end = MIN2(end, end_addr);
 166   assert(start <= end, "Consistency check");
 167   MemRegion mr(start, end);
 168   if (!mr.is_empty()) {
 169     clearRange(mr);
 170   }
 171   return mr;
 172 }
 173 
 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 175   _base(NULL), _cm(cm)
 176 #ifdef ASSERT
 177   , _drain_in_progress(false)
 178   , _drain_in_progress_yields(false)
 179 #endif
 180 {}
 181 
 182 bool CMMarkStack::allocate(size_t capacity) {
 183   // allocate a stack of the requisite depth
 184   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 185   if (!rs.is_reserved()) {
 186     warning("ConcurrentMark MarkStack allocation failure");
 187     return false;
 188   }
 189   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 190   if (!_virtual_space.initialize(rs, rs.size())) {
 191     warning("ConcurrentMark MarkStack backing store failure");
 192     // Release the virtual memory reserved for the marking stack
 193     rs.release();
 194     return false;
 195   }
 196   assert(_virtual_space.committed_size() == rs.size(),
 197          "Didn't reserve backing store for all of ConcurrentMark stack?");
 198   _base = (oop*) _virtual_space.low();
 199   setEmpty();
 200   _capacity = (jint) capacity;
 201   _saved_index = -1;
 202   _should_expand = false;
 203   NOT_PRODUCT(_max_depth = 0);
 204   return true;
 205 }
 206 
 207 void CMMarkStack::expand() {
 208   // Called, during remark, if we've overflown the marking stack during marking.
 209   assert(isEmpty(), "stack should been emptied while handling overflow");
 210   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 211   // Clear expansion flag
 212   _should_expand = false;
 213   if (_capacity == (jint) MarkStackSizeMax) {
 214     if (PrintGCDetails && Verbose) {
 215       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 216     }
 217     return;
 218   }
 219   // Double capacity if possible
 220   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 221   // Do not give up existing stack until we have managed to
 222   // get the double capacity that we desired.
 223   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 224                                                            sizeof(oop)));
 225   if (rs.is_reserved()) {
 226     // Release the backing store associated with old stack
 227     _virtual_space.release();
 228     // Reinitialize virtual space for new stack
 229     if (!_virtual_space.initialize(rs, rs.size())) {
 230       fatal("Not enough swap for expanded marking stack capacity");
 231     }
 232     _base = (oop*)(_virtual_space.low());
 233     _index = 0;
 234     _capacity = new_capacity;
 235   } else {
 236     if (PrintGCDetails && Verbose) {
 237       // Failed to double capacity, continue;
 238       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 239                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 240                           _capacity / K, new_capacity / K);
 241     }
 242   }
 243 }
 244 
 245 void CMMarkStack::set_should_expand() {
 246   // If we're resetting the marking state because of an
 247   // marking stack overflow, record that we should, if
 248   // possible, expand the stack.
 249   _should_expand = _cm->has_overflown();
 250 }
 251 
 252 CMMarkStack::~CMMarkStack() {
 253   if (_base != NULL) {
 254     _base = NULL;
 255     _virtual_space.release();
 256   }
 257 }
 258 
 259 void CMMarkStack::par_push(oop ptr) {
 260   while (true) {
 261     if (isFull()) {
 262       _overflow = true;
 263       return;
 264     }
 265     // Otherwise...
 266     jint index = _index;
 267     jint next_index = index+1;
 268     jint res = Atomic::cmpxchg(next_index, &_index, index);
 269     if (res == index) {
 270       _base[index] = ptr;
 271       // Note that we don't maintain this atomically.  We could, but it
 272       // doesn't seem necessary.
 273       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 274       return;
 275     }
 276     // Otherwise, we need to try again.
 277   }
 278 }
 279 
 280 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 281   while (true) {
 282     if (isFull()) {
 283       _overflow = true;
 284       return;
 285     }
 286     // Otherwise...
 287     jint index = _index;
 288     jint next_index = index + n;
 289     if (next_index > _capacity) {
 290       _overflow = true;
 291       return;
 292     }
 293     jint res = Atomic::cmpxchg(next_index, &_index, index);
 294     if (res == index) {
 295       for (int i = 0; i < n; i++) {
 296         int  ind = index + i;
 297         assert(ind < _capacity, "By overflow test above.");
 298         _base[ind] = ptr_arr[i];
 299       }
 300       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 301       return;
 302     }
 303     // Otherwise, we need to try again.
 304   }
 305 }
 306 
 307 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 308   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 309   jint start = _index;
 310   jint next_index = start + n;
 311   if (next_index > _capacity) {
 312     _overflow = true;
 313     return;
 314   }
 315   // Otherwise.
 316   _index = next_index;
 317   for (int i = 0; i < n; i++) {
 318     int ind = start + i;
 319     assert(ind < _capacity, "By overflow test above.");
 320     _base[ind] = ptr_arr[i];
 321   }
 322   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 323 }
 324 
 325 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 326   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 327   jint index = _index;
 328   if (index == 0) {
 329     *n = 0;
 330     return false;
 331   } else {
 332     int k = MIN2(max, index);
 333     jint  new_ind = index - k;
 334     for (int j = 0; j < k; j++) {
 335       ptr_arr[j] = _base[new_ind + j];
 336     }
 337     _index = new_ind;
 338     *n = k;
 339     return true;
 340   }
 341 }
 342 
 343 template<class OopClosureClass>
 344 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 345   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 346          || SafepointSynchronize::is_at_safepoint(),
 347          "Drain recursion must be yield-safe.");
 348   bool res = true;
 349   debug_only(_drain_in_progress = true);
 350   debug_only(_drain_in_progress_yields = yield_after);
 351   while (!isEmpty()) {
 352     oop newOop = pop();
 353     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 354     assert(newOop->is_oop(), "Expected an oop");
 355     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 356            "only grey objects on this stack");
 357     newOop->oop_iterate(cl);
 358     if (yield_after && _cm->do_yield_check()) {
 359       res = false;
 360       break;
 361     }
 362   }
 363   debug_only(_drain_in_progress = false);
 364   return res;
 365 }
 366 
 367 void CMMarkStack::note_start_of_gc() {
 368   assert(_saved_index == -1,
 369          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 370   _saved_index = _index;
 371 }
 372 
 373 void CMMarkStack::note_end_of_gc() {
 374   // This is intentionally a guarantee, instead of an assert. If we
 375   // accidentally add something to the mark stack during GC, it
 376   // will be a correctness issue so it's better if we crash. we'll
 377   // only check this once per GC anyway, so it won't be a performance
 378   // issue in any way.
 379   guarantee(_saved_index == _index,
 380             err_msg("saved index: %d index: %d", _saved_index, _index));
 381   _saved_index = -1;
 382 }
 383 
 384 void CMMarkStack::oops_do(OopClosure* f) {
 385   assert(_saved_index == _index,
 386          err_msg("saved index: %d index: %d", _saved_index, _index));
 387   for (int i = 0; i < _index; i += 1) {
 388     f->do_oop(&_base[i]);
 389   }
 390 }
 391 
 392 bool ConcurrentMark::not_yet_marked(oop obj) const {
 393   return _g1h->is_obj_ill(obj);
 394 }
 395 
 396 CMRootRegions::CMRootRegions() :
 397   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 398   _should_abort(false),  _next_survivor(NULL) { }
 399 
 400 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 401   _young_list = g1h->young_list();
 402   _cm = cm;
 403 }
 404 
 405 void CMRootRegions::prepare_for_scan() {
 406   assert(!scan_in_progress(), "pre-condition");
 407 
 408   // Currently, only survivors can be root regions.
 409   assert(_next_survivor == NULL, "pre-condition");
 410   _next_survivor = _young_list->first_survivor_region();
 411   _scan_in_progress = (_next_survivor != NULL);
 412   _should_abort = false;
 413 }
 414 
 415 HeapRegion* CMRootRegions::claim_next() {
 416   if (_should_abort) {
 417     // If someone has set the should_abort flag, we return NULL to
 418     // force the caller to bail out of their loop.
 419     return NULL;
 420   }
 421 
 422   // Currently, only survivors can be root regions.
 423   HeapRegion* res = _next_survivor;
 424   if (res != NULL) {
 425     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 426     // Read it again in case it changed while we were waiting for the lock.
 427     res = _next_survivor;
 428     if (res != NULL) {
 429       if (res == _young_list->last_survivor_region()) {
 430         // We just claimed the last survivor so store NULL to indicate
 431         // that we're done.
 432         _next_survivor = NULL;
 433       } else {
 434         _next_survivor = res->get_next_young_region();
 435       }
 436     } else {
 437       // Someone else claimed the last survivor while we were trying
 438       // to take the lock so nothing else to do.
 439     }
 440   }
 441   assert(res == NULL || res->is_survivor(), "post-condition");
 442 
 443   return res;
 444 }
 445 
 446 void CMRootRegions::scan_finished() {
 447   assert(scan_in_progress(), "pre-condition");
 448 
 449   // Currently, only survivors can be root regions.
 450   if (!_should_abort) {
 451     assert(_next_survivor == NULL, "we should have claimed all survivors");
 452   }
 453   _next_survivor = NULL;
 454 
 455   {
 456     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 457     _scan_in_progress = false;
 458     RootRegionScan_lock->notify_all();
 459   }
 460 }
 461 
 462 bool CMRootRegions::wait_until_scan_finished() {
 463   if (!scan_in_progress()) return false;
 464 
 465   {
 466     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 467     while (scan_in_progress()) {
 468       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 469     }
 470   }
 471   return true;
 472 }
 473 
 474 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 475 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 476 #endif // _MSC_VER
 477 
 478 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 479   return MAX2((n_par_threads + 2) / 4, 1U);
 480 }
 481 
 482 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 483   _g1h(g1h),
 484   _markBitMap1(LogMinObjAlignment),
 485   _markBitMap2(LogMinObjAlignment),
 486   _parallel_marking_threads(0),
 487   _max_parallel_marking_threads(0),
 488   _sleep_factor(0.0),
 489   _marking_task_overhead(1.0),
 490   _cleanup_sleep_factor(0.0),
 491   _cleanup_task_overhead(1.0),
 492   _cleanup_list("Cleanup List"),
 493   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 494   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 495             CardTableModRefBS::card_shift,
 496             false /* in_resource_area*/),
 497 
 498   _prevMarkBitMap(&_markBitMap1),
 499   _nextMarkBitMap(&_markBitMap2),
 500 
 501   _markStack(this),
 502   // _finger set in set_non_marking_state
 503 
 504   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 505   // _active_tasks set in set_non_marking_state
 506   // _tasks set inside the constructor
 507   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 508   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 509 
 510   _has_overflown(false),
 511   _concurrent(false),
 512   _has_aborted(false),
 513   _restart_for_overflow(false),
 514   _concurrent_marking_in_progress(false),
 515 
 516   // _verbose_level set below
 517 
 518   _init_times(),
 519   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 520   _cleanup_times(),
 521   _total_counting_time(0.0),
 522   _total_rs_scrub_time(0.0),
 523 
 524   _parallel_workers(NULL),
 525 
 526   _count_card_bitmaps(NULL),
 527   _count_marked_bytes(NULL),
 528   _completed_initialization(false) {
 529   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 530   if (verbose_level < no_verbose) {
 531     verbose_level = no_verbose;
 532   }
 533   if (verbose_level > high_verbose) {
 534     verbose_level = high_verbose;
 535   }
 536   _verbose_level = verbose_level;
 537 
 538   if (verbose_low()) {
 539     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 540                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 541   }
 542 
 543   if (!_markBitMap1.allocate(heap_rs)) {
 544     warning("Failed to allocate first CM bit map");
 545     return;
 546   }
 547   if (!_markBitMap2.allocate(heap_rs)) {
 548     warning("Failed to allocate second CM bit map");
 549     return;
 550   }
 551 
 552   // Create & start a ConcurrentMark thread.
 553   _cmThread = new ConcurrentMarkThread(this);
 554   assert(cmThread() != NULL, "CM Thread should have been created");
 555   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 556 
 557   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 558   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 559   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 560 
 561   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 562   satb_qs.set_buffer_size(G1SATBBufferSize);
 563 
 564   _root_regions.init(_g1h, this);
 565 
 566   if (ConcGCThreads > ParallelGCThreads) {
 567     warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
 568             "than ParallelGCThreads (" UINT32_FORMAT ").",
 569             ConcGCThreads, ParallelGCThreads);
 570     return;
 571   }
 572   if (ParallelGCThreads == 0) {
 573     // if we are not running with any parallel GC threads we will not
 574     // spawn any marking threads either
 575     _parallel_marking_threads =       0;
 576     _max_parallel_marking_threads =   0;
 577     _sleep_factor             =     0.0;
 578     _marking_task_overhead    =     1.0;
 579   } else {
 580     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 581       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 582       // if both are set
 583       _sleep_factor             = 0.0;
 584       _marking_task_overhead    = 1.0;
 585     } else if (G1MarkingOverheadPercent > 0) {
 586       // We will calculate the number of parallel marking threads based
 587       // on a target overhead with respect to the soft real-time goal
 588       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 589       double overall_cm_overhead =
 590         (double) MaxGCPauseMillis * marking_overhead /
 591         (double) GCPauseIntervalMillis;
 592       double cpu_ratio = 1.0 / (double) os::processor_count();
 593       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 594       double marking_task_overhead =
 595         overall_cm_overhead / marking_thread_num *
 596                                                 (double) os::processor_count();
 597       double sleep_factor =
 598                          (1.0 - marking_task_overhead) / marking_task_overhead;
 599 
 600       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 601       _sleep_factor             = sleep_factor;
 602       _marking_task_overhead    = marking_task_overhead;
 603     } else {
 604       // Calculate the number of parallel marking threads by scaling
 605       // the number of parallel GC threads.
 606       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 607       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 608       _sleep_factor             = 0.0;
 609       _marking_task_overhead    = 1.0;
 610     }
 611 
 612     assert(ConcGCThreads > 0, "Should have been set");
 613     _parallel_marking_threads = (uint) ConcGCThreads;
 614     _max_parallel_marking_threads = _parallel_marking_threads;
 615 
 616     if (parallel_marking_threads() > 1) {
 617       _cleanup_task_overhead = 1.0;
 618     } else {
 619       _cleanup_task_overhead = marking_task_overhead();
 620     }
 621     _cleanup_sleep_factor =
 622                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 623 
 624 #if 0
 625     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 626     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 627     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 628     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 629     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 630 #endif
 631 
 632     guarantee(parallel_marking_threads() > 0, "peace of mind");
 633     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 634          _max_parallel_marking_threads, false, true);
 635     if (_parallel_workers == NULL) {
 636       vm_exit_during_initialization("Failed necessary allocation.");
 637     } else {
 638       _parallel_workers->initialize_workers();
 639     }
 640   }
 641 
 642   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 643     uintx mark_stack_size =
 644       MIN2(MarkStackSizeMax,
 645           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 646     // Verify that the calculated value for MarkStackSize is in range.
 647     // It would be nice to use the private utility routine from Arguments.
 648     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 649       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 650               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 651               mark_stack_size, 1, MarkStackSizeMax);
 652       return;
 653     }
 654     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 655   } else {
 656     // Verify MarkStackSize is in range.
 657     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 658       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 659         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 660           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 661                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 662                   MarkStackSize, 1, MarkStackSizeMax);
 663           return;
 664         }
 665       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 666         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 667           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 668                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 669                   MarkStackSize, MarkStackSizeMax);
 670           return;
 671         }
 672       }
 673     }
 674   }
 675 
 676   if (!_markStack.allocate(MarkStackSize)) {
 677     warning("Failed to allocate CM marking stack");
 678     return;
 679   }
 680 
 681   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 682   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 683 
 684   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 685   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 686 
 687   BitMap::idx_t card_bm_size = _card_bm.size();
 688 
 689   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 690   _active_tasks = _max_worker_id;
 691 
 692   size_t max_regions = (size_t) _g1h->max_regions();
 693   for (uint i = 0; i < _max_worker_id; ++i) {
 694     CMTaskQueue* task_queue = new CMTaskQueue();
 695     task_queue->initialize();
 696     _task_queues->register_queue(i, task_queue);
 697 
 698     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 699     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 700 
 701     _tasks[i] = new CMTask(i, this,
 702                            _count_marked_bytes[i],
 703                            &_count_card_bitmaps[i],
 704                            task_queue, _task_queues);
 705 
 706     _accum_task_vtime[i] = 0.0;
 707   }
 708 
 709   // Calculate the card number for the bottom of the heap. Used
 710   // in biasing indexes into the accounting card bitmaps.
 711   _heap_bottom_card_num =
 712     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 713                                 CardTableModRefBS::card_shift);
 714 
 715   // Clear all the liveness counting data
 716   clear_all_count_data();
 717 
 718   // so that the call below can read a sensible value
 719   _heap_start = (HeapWord*) heap_rs.base();
 720   set_non_marking_state();
 721   _completed_initialization = true;
 722 }
 723 
 724 void ConcurrentMark::update_g1_committed(bool force) {
 725   // If concurrent marking is not in progress, then we do not need to
 726   // update _heap_end.
 727   if (!concurrent_marking_in_progress() && !force) return;
 728 
 729   MemRegion committed = _g1h->g1_committed();
 730   assert(committed.start() == _heap_start, "start shouldn't change");
 731   HeapWord* new_end = committed.end();
 732   if (new_end > _heap_end) {
 733     // The heap has been expanded.
 734 
 735     _heap_end = new_end;
 736   }
 737   // Notice that the heap can also shrink. However, this only happens
 738   // during a Full GC (at least currently) and the entire marking
 739   // phase will bail out and the task will not be restarted. So, let's
 740   // do nothing.
 741 }
 742 
 743 void ConcurrentMark::reset() {
 744   // Starting values for these two. This should be called in a STW
 745   // phase. CM will be notified of any future g1_committed expansions
 746   // will be at the end of evacuation pauses, when tasks are
 747   // inactive.
 748   MemRegion committed = _g1h->g1_committed();
 749   _heap_start = committed.start();
 750   _heap_end   = committed.end();
 751 
 752   // Separated the asserts so that we know which one fires.
 753   assert(_heap_start != NULL, "heap bounds should look ok");
 754   assert(_heap_end != NULL, "heap bounds should look ok");
 755   assert(_heap_start < _heap_end, "heap bounds should look ok");
 756 
 757   // Reset all the marking data structures and any necessary flags
 758   reset_marking_state();
 759 
 760   if (verbose_low()) {
 761     gclog_or_tty->print_cr("[global] resetting");
 762   }
 763 
 764   // We do reset all of them, since different phases will use
 765   // different number of active threads. So, it's easiest to have all
 766   // of them ready.
 767   for (uint i = 0; i < _max_worker_id; ++i) {
 768     _tasks[i]->reset(_nextMarkBitMap);
 769   }
 770 
 771   // we need this to make sure that the flag is on during the evac
 772   // pause with initial mark piggy-backed
 773   set_concurrent_marking_in_progress();
 774 }
 775 
 776 
 777 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 778   _markStack.set_should_expand();
 779   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 780   if (clear_overflow) {
 781     clear_has_overflown();
 782   } else {
 783     assert(has_overflown(), "pre-condition");
 784   }
 785   _finger = _heap_start;
 786 
 787   for (uint i = 0; i < _max_worker_id; ++i) {
 788     CMTaskQueue* queue = _task_queues->queue(i);
 789     queue->set_empty();
 790   }
 791 }
 792 
 793 void ConcurrentMark::set_concurrency(uint active_tasks) {
 794   assert(active_tasks <= _max_worker_id, "we should not have more");
 795 
 796   _active_tasks = active_tasks;
 797   // Need to update the three data structures below according to the
 798   // number of active threads for this phase.
 799   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 800   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 801   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 802 }
 803 
 804 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 805   set_concurrency(active_tasks);
 806 
 807   _concurrent = concurrent;
 808   // We propagate this to all tasks, not just the active ones.
 809   for (uint i = 0; i < _max_worker_id; ++i)
 810     _tasks[i]->set_concurrent(concurrent);
 811 
 812   if (concurrent) {
 813     set_concurrent_marking_in_progress();
 814   } else {
 815     // We currently assume that the concurrent flag has been set to
 816     // false before we start remark. At this point we should also be
 817     // in a STW phase.
 818     assert(!concurrent_marking_in_progress(), "invariant");
 819     assert(_finger == _heap_end,
 820            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 821                    _finger, _heap_end));
 822     update_g1_committed(true);
 823   }
 824 }
 825 
 826 void ConcurrentMark::set_non_marking_state() {
 827   // We set the global marking state to some default values when we're
 828   // not doing marking.
 829   reset_marking_state();
 830   _active_tasks = 0;
 831   clear_concurrent_marking_in_progress();
 832 }
 833 
 834 ConcurrentMark::~ConcurrentMark() {
 835   // The ConcurrentMark instance is never freed.
 836   ShouldNotReachHere();
 837 }
 838 
 839 void ConcurrentMark::clearNextBitmap() {
 840   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 841   G1CollectorPolicy* g1p = g1h->g1_policy();
 842 
 843   // Make sure that the concurrent mark thread looks to still be in
 844   // the current cycle.
 845   guarantee(cmThread()->during_cycle(), "invariant");
 846 
 847   // We are finishing up the current cycle by clearing the next
 848   // marking bitmap and getting it ready for the next cycle. During
 849   // this time no other cycle can start. So, let's make sure that this
 850   // is the case.
 851   guarantee(!g1h->mark_in_progress(), "invariant");
 852 
 853   // clear the mark bitmap (no grey objects to start with).
 854   // We need to do this in chunks and offer to yield in between
 855   // each chunk.
 856   HeapWord* start  = _nextMarkBitMap->startWord();
 857   HeapWord* end    = _nextMarkBitMap->endWord();
 858   HeapWord* cur    = start;
 859   size_t chunkSize = M;
 860   while (cur < end) {
 861     HeapWord* next = cur + chunkSize;
 862     if (next > end) {
 863       next = end;
 864     }
 865     MemRegion mr(cur,next);
 866     _nextMarkBitMap->clearRange(mr);
 867     cur = next;
 868     do_yield_check();
 869 
 870     // Repeat the asserts from above. We'll do them as asserts here to
 871     // minimize their overhead on the product. However, we'll have
 872     // them as guarantees at the beginning / end of the bitmap
 873     // clearing to get some checking in the product.
 874     assert(cmThread()->during_cycle(), "invariant");
 875     assert(!g1h->mark_in_progress(), "invariant");
 876   }
 877 
 878   // Clear the liveness counting data
 879   clear_all_count_data();
 880 
 881   // Repeat the asserts from above.
 882   guarantee(cmThread()->during_cycle(), "invariant");
 883   guarantee(!g1h->mark_in_progress(), "invariant");
 884 }
 885 
 886 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 887 public:
 888   bool doHeapRegion(HeapRegion* r) {
 889     if (!r->continuesHumongous()) {
 890       r->note_start_of_marking();
 891     }
 892     return false;
 893   }
 894 };
 895 
 896 void ConcurrentMark::checkpointRootsInitialPre() {
 897   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 898   G1CollectorPolicy* g1p = g1h->g1_policy();
 899 
 900   _has_aborted = false;
 901 
 902 #ifndef PRODUCT
 903   if (G1PrintReachableAtInitialMark) {
 904     print_reachable("at-cycle-start",
 905                     VerifyOption_G1UsePrevMarking, true /* all */);
 906   }
 907 #endif
 908 
 909   // Initialise marking structures. This has to be done in a STW phase.
 910   reset();
 911 
 912   // For each region note start of marking.
 913   NoteStartOfMarkHRClosure startcl;
 914   g1h->heap_region_iterate(&startcl);
 915 }
 916 
 917 
 918 void ConcurrentMark::checkpointRootsInitialPost() {
 919   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 920 
 921   // If we force an overflow during remark, the remark operation will
 922   // actually abort and we'll restart concurrent marking. If we always
 923   // force an oveflow during remark we'll never actually complete the
 924   // marking phase. So, we initilize this here, at the start of the
 925   // cycle, so that at the remaining overflow number will decrease at
 926   // every remark and we'll eventually not need to cause one.
 927   force_overflow_stw()->init();
 928 
 929   // Start Concurrent Marking weak-reference discovery.
 930   ReferenceProcessor* rp = g1h->ref_processor_cm();
 931   // enable ("weak") refs discovery
 932   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 933   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 934 
 935   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 936   // This is the start of  the marking cycle, we're expected all
 937   // threads to have SATB queues with active set to false.
 938   satb_mq_set.set_active_all_threads(true, /* new active value */
 939                                      false /* expected_active */);
 940 
 941   _root_regions.prepare_for_scan();
 942 
 943   // update_g1_committed() will be called at the end of an evac pause
 944   // when marking is on. So, it's also called at the end of the
 945   // initial-mark pause to update the heap end, if the heap expands
 946   // during it. No need to call it here.
 947 }
 948 
 949 /*
 950  * Notice that in the next two methods, we actually leave the STS
 951  * during the barrier sync and join it immediately afterwards. If we
 952  * do not do this, the following deadlock can occur: one thread could
 953  * be in the barrier sync code, waiting for the other thread to also
 954  * sync up, whereas another one could be trying to yield, while also
 955  * waiting for the other threads to sync up too.
 956  *
 957  * Note, however, that this code is also used during remark and in
 958  * this case we should not attempt to leave / enter the STS, otherwise
 959  * we'll either hit an asseert (debug / fastdebug) or deadlock
 960  * (product). So we should only leave / enter the STS if we are
 961  * operating concurrently.
 962  *
 963  * Because the thread that does the sync barrier has left the STS, it
 964  * is possible to be suspended for a Full GC or an evacuation pause
 965  * could occur. This is actually safe, since the entering the sync
 966  * barrier is one of the last things do_marking_step() does, and it
 967  * doesn't manipulate any data structures afterwards.
 968  */
 969 
 970 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 971   if (verbose_low()) {
 972     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 973   }
 974 
 975   if (concurrent()) {
 976     ConcurrentGCThread::stsLeave();
 977   }
 978   _first_overflow_barrier_sync.enter();
 979   if (concurrent()) {
 980     ConcurrentGCThread::stsJoin();
 981   }
 982   // at this point everyone should have synced up and not be doing any
 983   // more work
 984 
 985   if (verbose_low()) {
 986     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 987   }
 988 
 989   // If we're executing the concurrent phase of marking, reset the marking
 990   // state; otherwise the marking state is reset after reference processing,
 991   // during the remark pause.
 992   // If we reset here as a result of an overflow during the remark we will
 993   // see assertion failures from any subsequent set_concurrency_and_phase()
 994   // calls.
 995   if (concurrent()) {
 996     // let the task associated with with worker 0 do this
 997     if (worker_id == 0) {
 998       // task 0 is responsible for clearing the global data structures
 999       // We should be here because of an overflow. During STW we should
1000       // not clear the overflow flag since we rely on it being true when
1001       // we exit this method to abort the pause and restart concurent
1002       // marking.
1003       reset_marking_state(true /* clear_overflow */);
1004       force_overflow()->update();
1005 
1006       if (G1Log::fine()) {
1007         gclog_or_tty->date_stamp(PrintGCDateStamps);
1008         gclog_or_tty->stamp(PrintGCTimeStamps);
1009         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1010       }
1011     }
1012   }
1013 
1014   // after this, each task should reset its own data structures then
1015   // then go into the second barrier
1016 }
1017 
1018 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1019   if (verbose_low()) {
1020     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1021   }
1022 
1023   if (concurrent()) {
1024     ConcurrentGCThread::stsLeave();
1025   }
1026   _second_overflow_barrier_sync.enter();
1027   if (concurrent()) {
1028     ConcurrentGCThread::stsJoin();
1029   }
1030   // at this point everything should be re-initialized and ready to go
1031 
1032   if (verbose_low()) {
1033     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1034   }
1035 }
1036 
1037 #ifndef PRODUCT
1038 void ForceOverflowSettings::init() {
1039   _num_remaining = G1ConcMarkForceOverflow;
1040   _force = false;
1041   update();
1042 }
1043 
1044 void ForceOverflowSettings::update() {
1045   if (_num_remaining > 0) {
1046     _num_remaining -= 1;
1047     _force = true;
1048   } else {
1049     _force = false;
1050   }
1051 }
1052 
1053 bool ForceOverflowSettings::should_force() {
1054   if (_force) {
1055     _force = false;
1056     return true;
1057   } else {
1058     return false;
1059   }
1060 }
1061 #endif // !PRODUCT
1062 
1063 class CMConcurrentMarkingTask: public AbstractGangTask {
1064 private:
1065   ConcurrentMark*       _cm;
1066   ConcurrentMarkThread* _cmt;
1067 
1068 public:
1069   void work(uint worker_id) {
1070     assert(Thread::current()->is_ConcurrentGC_thread(),
1071            "this should only be done by a conc GC thread");
1072     ResourceMark rm;
1073 
1074     double start_vtime = os::elapsedVTime();
1075 
1076     ConcurrentGCThread::stsJoin();
1077 
1078     assert(worker_id < _cm->active_tasks(), "invariant");
1079     CMTask* the_task = _cm->task(worker_id);
1080     the_task->record_start_time();
1081     if (!_cm->has_aborted()) {
1082       do {
1083         double start_vtime_sec = os::elapsedVTime();
1084         double start_time_sec = os::elapsedTime();
1085         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1086 
1087         the_task->do_marking_step(mark_step_duration_ms,
1088                                   true  /* do_termination */,
1089                                   false /* is_serial*/);
1090 
1091         double end_time_sec = os::elapsedTime();
1092         double end_vtime_sec = os::elapsedVTime();
1093         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1094         double elapsed_time_sec = end_time_sec - start_time_sec;
1095         _cm->clear_has_overflown();
1096 
1097         bool ret = _cm->do_yield_check(worker_id);
1098 
1099         jlong sleep_time_ms;
1100         if (!_cm->has_aborted() && the_task->has_aborted()) {
1101           sleep_time_ms =
1102             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1103           ConcurrentGCThread::stsLeave();
1104           os::sleep(Thread::current(), sleep_time_ms, false);
1105           ConcurrentGCThread::stsJoin();
1106         }
1107         double end_time2_sec = os::elapsedTime();
1108         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1109 
1110 #if 0
1111           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1112                                  "overhead %1.4lf",
1113                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1114                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1115           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1116                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1117 #endif
1118       } while (!_cm->has_aborted() && the_task->has_aborted());
1119     }
1120     the_task->record_end_time();
1121     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1122 
1123     ConcurrentGCThread::stsLeave();
1124 
1125     double end_vtime = os::elapsedVTime();
1126     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1127   }
1128 
1129   CMConcurrentMarkingTask(ConcurrentMark* cm,
1130                           ConcurrentMarkThread* cmt) :
1131       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1132 
1133   ~CMConcurrentMarkingTask() { }
1134 };
1135 
1136 // Calculates the number of active workers for a concurrent
1137 // phase.
1138 uint ConcurrentMark::calc_parallel_marking_threads() {
1139   if (G1CollectedHeap::use_parallel_gc_threads()) {
1140     uint n_conc_workers = 0;
1141     if (!UseDynamicNumberOfGCThreads ||
1142         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1143          !ForceDynamicNumberOfGCThreads)) {
1144       n_conc_workers = max_parallel_marking_threads();
1145     } else {
1146       n_conc_workers =
1147         AdaptiveSizePolicy::calc_default_active_workers(
1148                                      max_parallel_marking_threads(),
1149                                      1, /* Minimum workers */
1150                                      parallel_marking_threads(),
1151                                      Threads::number_of_non_daemon_threads());
1152       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1153       // that scaling has already gone into "_max_parallel_marking_threads".
1154     }
1155     assert(n_conc_workers > 0, "Always need at least 1");
1156     return n_conc_workers;
1157   }
1158   // If we are not running with any parallel GC threads we will not
1159   // have spawned any marking threads either. Hence the number of
1160   // concurrent workers should be 0.
1161   return 0;
1162 }
1163 
1164 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1165   // Currently, only survivors can be root regions.
1166   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1167   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1168 
1169   const uintx interval = PrefetchScanIntervalInBytes;
1170   HeapWord* curr = hr->bottom();
1171   const HeapWord* end = hr->top();
1172   while (curr < end) {
1173     Prefetch::read(curr, interval);
1174     oop obj = oop(curr);
1175     int size = obj->oop_iterate(&cl);
1176     assert(size == obj->size(), "sanity");
1177     curr += size;
1178   }
1179 }
1180 
1181 class CMRootRegionScanTask : public AbstractGangTask {
1182 private:
1183   ConcurrentMark* _cm;
1184 
1185 public:
1186   CMRootRegionScanTask(ConcurrentMark* cm) :
1187     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1188 
1189   void work(uint worker_id) {
1190     assert(Thread::current()->is_ConcurrentGC_thread(),
1191            "this should only be done by a conc GC thread");
1192 
1193     CMRootRegions* root_regions = _cm->root_regions();
1194     HeapRegion* hr = root_regions->claim_next();
1195     while (hr != NULL) {
1196       _cm->scanRootRegion(hr, worker_id);
1197       hr = root_regions->claim_next();
1198     }
1199   }
1200 };
1201 
1202 void ConcurrentMark::scanRootRegions() {
1203   // scan_in_progress() will have been set to true only if there was
1204   // at least one root region to scan. So, if it's false, we
1205   // should not attempt to do any further work.
1206   if (root_regions()->scan_in_progress()) {
1207     _parallel_marking_threads = calc_parallel_marking_threads();
1208     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1209            "Maximum number of marking threads exceeded");
1210     uint active_workers = MAX2(1U, parallel_marking_threads());
1211 
1212     CMRootRegionScanTask task(this);
1213     if (use_parallel_marking_threads()) {
1214       _parallel_workers->set_active_workers((int) active_workers);
1215       _parallel_workers->run_task(&task);
1216     } else {
1217       task.work(0);
1218     }
1219 
1220     // It's possible that has_aborted() is true here without actually
1221     // aborting the survivor scan earlier. This is OK as it's
1222     // mainly used for sanity checking.
1223     root_regions()->scan_finished();
1224   }
1225 }
1226 
1227 void ConcurrentMark::markFromRoots() {
1228   // we might be tempted to assert that:
1229   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1230   //        "inconsistent argument?");
1231   // However that wouldn't be right, because it's possible that
1232   // a safepoint is indeed in progress as a younger generation
1233   // stop-the-world GC happens even as we mark in this generation.
1234 
1235   _restart_for_overflow = false;
1236   force_overflow_conc()->init();
1237 
1238   // _g1h has _n_par_threads
1239   _parallel_marking_threads = calc_parallel_marking_threads();
1240   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1241     "Maximum number of marking threads exceeded");
1242 
1243   uint active_workers = MAX2(1U, parallel_marking_threads());
1244 
1245   // Parallel task terminator is set in "set_concurrency_and_phase()"
1246   set_concurrency_and_phase(active_workers, true /* concurrent */);
1247 
1248   CMConcurrentMarkingTask markingTask(this, cmThread());
1249   if (use_parallel_marking_threads()) {
1250     _parallel_workers->set_active_workers((int)active_workers);
1251     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1252     // and the decisions on that MT processing is made elsewhere.
1253     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1254     _parallel_workers->run_task(&markingTask);
1255   } else {
1256     markingTask.work(0);
1257   }
1258   print_stats();
1259 }
1260 
1261 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1262   // world is stopped at this checkpoint
1263   assert(SafepointSynchronize::is_at_safepoint(),
1264          "world should be stopped");
1265 
1266   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1267 
1268   // If a full collection has happened, we shouldn't do this.
1269   if (has_aborted()) {
1270     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1271     return;
1272   }
1273 
1274   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1275 
1276   if (VerifyDuringGC) {
1277     HandleMark hm;  // handle scope
1278     Universe::heap()->prepare_for_verify();
1279     Universe::verify(VerifyOption_G1UsePrevMarking,
1280                      " VerifyDuringGC:(before)");
1281   }
1282 
1283   G1CollectorPolicy* g1p = g1h->g1_policy();
1284   g1p->record_concurrent_mark_remark_start();
1285 
1286   double start = os::elapsedTime();
1287 
1288   checkpointRootsFinalWork();
1289 
1290   double mark_work_end = os::elapsedTime();
1291 
1292   weakRefsWork(clear_all_soft_refs);
1293 
1294   if (has_overflown()) {
1295     // Oops.  We overflowed.  Restart concurrent marking.
1296     _restart_for_overflow = true;
1297     if (G1TraceMarkStackOverflow) {
1298       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1299     }
1300 
1301     // Verify the heap w.r.t. the previous marking bitmap.
1302     if (VerifyDuringGC) {
1303       HandleMark hm;  // handle scope
1304       Universe::heap()->prepare_for_verify();
1305       Universe::verify(VerifyOption_G1UsePrevMarking,
1306                        " VerifyDuringGC:(overflow)");
1307     }
1308 
1309     // Clear the marking state because we will be restarting
1310     // marking due to overflowing the global mark stack.
1311     reset_marking_state();
1312   } else {
1313     // Aggregate the per-task counting data that we have accumulated
1314     // while marking.
1315     aggregate_count_data();
1316 
1317     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1318     // We're done with marking.
1319     // This is the end of  the marking cycle, we're expected all
1320     // threads to have SATB queues with active set to true.
1321     satb_mq_set.set_active_all_threads(false, /* new active value */
1322                                        true /* expected_active */);
1323 
1324     if (VerifyDuringGC) {
1325       HandleMark hm;  // handle scope
1326       Universe::heap()->prepare_for_verify();
1327       Universe::verify(VerifyOption_G1UseNextMarking,
1328                        " VerifyDuringGC:(after)");
1329     }
1330     assert(!restart_for_overflow(), "sanity");
1331     // Completely reset the marking state since marking completed
1332     set_non_marking_state();
1333   }
1334 
1335   // Expand the marking stack, if we have to and if we can.
1336   if (_markStack.should_expand()) {
1337     _markStack.expand();
1338   }
1339 
1340   // Statistics
1341   double now = os::elapsedTime();
1342   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1343   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1344   _remark_times.add((now - start) * 1000.0);
1345 
1346   g1p->record_concurrent_mark_remark_end();
1347 
1348   G1CMIsAliveClosure is_alive(g1h);
1349   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1350 }
1351 
1352 // Base class of the closures that finalize and verify the
1353 // liveness counting data.
1354 class CMCountDataClosureBase: public HeapRegionClosure {
1355 protected:
1356   G1CollectedHeap* _g1h;
1357   ConcurrentMark* _cm;
1358   CardTableModRefBS* _ct_bs;
1359 
1360   BitMap* _region_bm;
1361   BitMap* _card_bm;
1362 
1363   // Takes a region that's not empty (i.e., it has at least one
1364   // live object in it and sets its corresponding bit on the region
1365   // bitmap to 1. If the region is "starts humongous" it will also set
1366   // to 1 the bits on the region bitmap that correspond to its
1367   // associated "continues humongous" regions.
1368   void set_bit_for_region(HeapRegion* hr) {
1369     assert(!hr->continuesHumongous(), "should have filtered those out");
1370 
1371     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1372     if (!hr->startsHumongous()) {
1373       // Normal (non-humongous) case: just set the bit.
1374       _region_bm->par_at_put(index, true);
1375     } else {
1376       // Starts humongous case: calculate how many regions are part of
1377       // this humongous region and then set the bit range.
1378       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1379       _region_bm->par_at_put_range(index, end_index, true);
1380     }
1381   }
1382 
1383 public:
1384   CMCountDataClosureBase(G1CollectedHeap* g1h,
1385                          BitMap* region_bm, BitMap* card_bm):
1386     _g1h(g1h), _cm(g1h->concurrent_mark()),
1387     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1388     _region_bm(region_bm), _card_bm(card_bm) { }
1389 };
1390 
1391 // Closure that calculates the # live objects per region. Used
1392 // for verification purposes during the cleanup pause.
1393 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1394   CMBitMapRO* _bm;
1395   size_t _region_marked_bytes;
1396 
1397 public:
1398   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1399                          BitMap* region_bm, BitMap* card_bm) :
1400     CMCountDataClosureBase(g1h, region_bm, card_bm),
1401     _bm(bm), _region_marked_bytes(0) { }
1402 
1403   bool doHeapRegion(HeapRegion* hr) {
1404 
1405     if (hr->continuesHumongous()) {
1406       // We will ignore these here and process them when their
1407       // associated "starts humongous" region is processed (see
1408       // set_bit_for_heap_region()). Note that we cannot rely on their
1409       // associated "starts humongous" region to have their bit set to
1410       // 1 since, due to the region chunking in the parallel region
1411       // iteration, a "continues humongous" region might be visited
1412       // before its associated "starts humongous".
1413       return false;
1414     }
1415 
1416     HeapWord* ntams = hr->next_top_at_mark_start();
1417     HeapWord* start = hr->bottom();
1418 
1419     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1420            err_msg("Preconditions not met - "
1421                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1422                    start, ntams, hr->end()));
1423 
1424     // Find the first marked object at or after "start".
1425     start = _bm->getNextMarkedWordAddress(start, ntams);
1426 
1427     size_t marked_bytes = 0;
1428 
1429     while (start < ntams) {
1430       oop obj = oop(start);
1431       int obj_sz = obj->size();
1432       HeapWord* obj_end = start + obj_sz;
1433 
1434       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1435       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1436 
1437       // Note: if we're looking at the last region in heap - obj_end
1438       // could be actually just beyond the end of the heap; end_idx
1439       // will then correspond to a (non-existent) card that is also
1440       // just beyond the heap.
1441       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1442         // end of object is not card aligned - increment to cover
1443         // all the cards spanned by the object
1444         end_idx += 1;
1445       }
1446 
1447       // Set the bits in the card BM for the cards spanned by this object.
1448       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1449 
1450       // Add the size of this object to the number of marked bytes.
1451       marked_bytes += (size_t)obj_sz * HeapWordSize;
1452 
1453       // Find the next marked object after this one.
1454       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1455     }
1456 
1457     // Mark the allocated-since-marking portion...
1458     HeapWord* top = hr->top();
1459     if (ntams < top) {
1460       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1461       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1462 
1463       // Note: if we're looking at the last region in heap - top
1464       // could be actually just beyond the end of the heap; end_idx
1465       // will then correspond to a (non-existent) card that is also
1466       // just beyond the heap.
1467       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1468         // end of object is not card aligned - increment to cover
1469         // all the cards spanned by the object
1470         end_idx += 1;
1471       }
1472       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1473 
1474       // This definitely means the region has live objects.
1475       set_bit_for_region(hr);
1476     }
1477 
1478     // Update the live region bitmap.
1479     if (marked_bytes > 0) {
1480       set_bit_for_region(hr);
1481     }
1482 
1483     // Set the marked bytes for the current region so that
1484     // it can be queried by a calling verificiation routine
1485     _region_marked_bytes = marked_bytes;
1486 
1487     return false;
1488   }
1489 
1490   size_t region_marked_bytes() const { return _region_marked_bytes; }
1491 };
1492 
1493 // Heap region closure used for verifying the counting data
1494 // that was accumulated concurrently and aggregated during
1495 // the remark pause. This closure is applied to the heap
1496 // regions during the STW cleanup pause.
1497 
1498 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1499   G1CollectedHeap* _g1h;
1500   ConcurrentMark* _cm;
1501   CalcLiveObjectsClosure _calc_cl;
1502   BitMap* _region_bm;   // Region BM to be verified
1503   BitMap* _card_bm;     // Card BM to be verified
1504   bool _verbose;        // verbose output?
1505 
1506   BitMap* _exp_region_bm; // Expected Region BM values
1507   BitMap* _exp_card_bm;   // Expected card BM values
1508 
1509   int _failures;
1510 
1511 public:
1512   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1513                                 BitMap* region_bm,
1514                                 BitMap* card_bm,
1515                                 BitMap* exp_region_bm,
1516                                 BitMap* exp_card_bm,
1517                                 bool verbose) :
1518     _g1h(g1h), _cm(g1h->concurrent_mark()),
1519     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1520     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1521     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1522     _failures(0) { }
1523 
1524   int failures() const { return _failures; }
1525 
1526   bool doHeapRegion(HeapRegion* hr) {
1527     if (hr->continuesHumongous()) {
1528       // We will ignore these here and process them when their
1529       // associated "starts humongous" region is processed (see
1530       // set_bit_for_heap_region()). Note that we cannot rely on their
1531       // associated "starts humongous" region to have their bit set to
1532       // 1 since, due to the region chunking in the parallel region
1533       // iteration, a "continues humongous" region might be visited
1534       // before its associated "starts humongous".
1535       return false;
1536     }
1537 
1538     int failures = 0;
1539 
1540     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1541     // this region and set the corresponding bits in the expected region
1542     // and card bitmaps.
1543     bool res = _calc_cl.doHeapRegion(hr);
1544     assert(res == false, "should be continuing");
1545 
1546     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1547                     Mutex::_no_safepoint_check_flag);
1548 
1549     // Verify the marked bytes for this region.
1550     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1551     size_t act_marked_bytes = hr->next_marked_bytes();
1552 
1553     // We're not OK if expected marked bytes > actual marked bytes. It means
1554     // we have missed accounting some objects during the actual marking.
1555     if (exp_marked_bytes > act_marked_bytes) {
1556       if (_verbose) {
1557         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1558                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1559                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1560       }
1561       failures += 1;
1562     }
1563 
1564     // Verify the bit, for this region, in the actual and expected
1565     // (which was just calculated) region bit maps.
1566     // We're not OK if the bit in the calculated expected region
1567     // bitmap is set and the bit in the actual region bitmap is not.
1568     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1569 
1570     bool expected = _exp_region_bm->at(index);
1571     bool actual = _region_bm->at(index);
1572     if (expected && !actual) {
1573       if (_verbose) {
1574         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1575                                "expected: %s, actual: %s",
1576                                hr->hrs_index(),
1577                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1578       }
1579       failures += 1;
1580     }
1581 
1582     // Verify that the card bit maps for the cards spanned by the current
1583     // region match. We have an error if we have a set bit in the expected
1584     // bit map and the corresponding bit in the actual bitmap is not set.
1585 
1586     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1587     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1588 
1589     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1590       expected = _exp_card_bm->at(i);
1591       actual = _card_bm->at(i);
1592 
1593       if (expected && !actual) {
1594         if (_verbose) {
1595           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1596                                  "expected: %s, actual: %s",
1597                                  hr->hrs_index(), i,
1598                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1599         }
1600         failures += 1;
1601       }
1602     }
1603 
1604     if (failures > 0 && _verbose)  {
1605       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1606                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1607                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1608                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1609     }
1610 
1611     _failures += failures;
1612 
1613     // We could stop iteration over the heap when we
1614     // find the first violating region by returning true.
1615     return false;
1616   }
1617 };
1618 
1619 
1620 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1621 protected:
1622   G1CollectedHeap* _g1h;
1623   ConcurrentMark* _cm;
1624   BitMap* _actual_region_bm;
1625   BitMap* _actual_card_bm;
1626 
1627   uint    _n_workers;
1628 
1629   BitMap* _expected_region_bm;
1630   BitMap* _expected_card_bm;
1631 
1632   int  _failures;
1633   bool _verbose;
1634 
1635 public:
1636   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1637                             BitMap* region_bm, BitMap* card_bm,
1638                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1639     : AbstractGangTask("G1 verify final counting"),
1640       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1641       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1642       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1643       _failures(0), _verbose(false),
1644       _n_workers(0) {
1645     assert(VerifyDuringGC, "don't call this otherwise");
1646 
1647     // Use the value already set as the number of active threads
1648     // in the call to run_task().
1649     if (G1CollectedHeap::use_parallel_gc_threads()) {
1650       assert( _g1h->workers()->active_workers() > 0,
1651         "Should have been previously set");
1652       _n_workers = _g1h->workers()->active_workers();
1653     } else {
1654       _n_workers = 1;
1655     }
1656 
1657     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1658     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1659 
1660     _verbose = _cm->verbose_medium();
1661   }
1662 
1663   void work(uint worker_id) {
1664     assert(worker_id < _n_workers, "invariant");
1665 
1666     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1667                                             _actual_region_bm, _actual_card_bm,
1668                                             _expected_region_bm,
1669                                             _expected_card_bm,
1670                                             _verbose);
1671 
1672     if (G1CollectedHeap::use_parallel_gc_threads()) {
1673       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1674                                             worker_id,
1675                                             _n_workers,
1676                                             HeapRegion::VerifyCountClaimValue);
1677     } else {
1678       _g1h->heap_region_iterate(&verify_cl);
1679     }
1680 
1681     Atomic::add(verify_cl.failures(), &_failures);
1682   }
1683 
1684   int failures() const { return _failures; }
1685 };
1686 
1687 // Closure that finalizes the liveness counting data.
1688 // Used during the cleanup pause.
1689 // Sets the bits corresponding to the interval [NTAMS, top]
1690 // (which contains the implicitly live objects) in the
1691 // card liveness bitmap. Also sets the bit for each region,
1692 // containing live data, in the region liveness bitmap.
1693 
1694 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1695  public:
1696   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1697                               BitMap* region_bm,
1698                               BitMap* card_bm) :
1699     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1700 
1701   bool doHeapRegion(HeapRegion* hr) {
1702 
1703     if (hr->continuesHumongous()) {
1704       // We will ignore these here and process them when their
1705       // associated "starts humongous" region is processed (see
1706       // set_bit_for_heap_region()). Note that we cannot rely on their
1707       // associated "starts humongous" region to have their bit set to
1708       // 1 since, due to the region chunking in the parallel region
1709       // iteration, a "continues humongous" region might be visited
1710       // before its associated "starts humongous".
1711       return false;
1712     }
1713 
1714     HeapWord* ntams = hr->next_top_at_mark_start();
1715     HeapWord* top   = hr->top();
1716 
1717     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1718 
1719     // Mark the allocated-since-marking portion...
1720     if (ntams < top) {
1721       // This definitely means the region has live objects.
1722       set_bit_for_region(hr);
1723 
1724       // Now set the bits in the card bitmap for [ntams, top)
1725       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1726       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1727 
1728       // Note: if we're looking at the last region in heap - top
1729       // could be actually just beyond the end of the heap; end_idx
1730       // will then correspond to a (non-existent) card that is also
1731       // just beyond the heap.
1732       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1733         // end of object is not card aligned - increment to cover
1734         // all the cards spanned by the object
1735         end_idx += 1;
1736       }
1737 
1738       assert(end_idx <= _card_bm->size(),
1739              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1740                      end_idx, _card_bm->size()));
1741       assert(start_idx < _card_bm->size(),
1742              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1743                      start_idx, _card_bm->size()));
1744 
1745       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1746     }
1747 
1748     // Set the bit for the region if it contains live data
1749     if (hr->next_marked_bytes() > 0) {
1750       set_bit_for_region(hr);
1751     }
1752 
1753     return false;
1754   }
1755 };
1756 
1757 class G1ParFinalCountTask: public AbstractGangTask {
1758 protected:
1759   G1CollectedHeap* _g1h;
1760   ConcurrentMark* _cm;
1761   BitMap* _actual_region_bm;
1762   BitMap* _actual_card_bm;
1763 
1764   uint    _n_workers;
1765 
1766 public:
1767   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1768     : AbstractGangTask("G1 final counting"),
1769       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1770       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1771       _n_workers(0) {
1772     // Use the value already set as the number of active threads
1773     // in the call to run_task().
1774     if (G1CollectedHeap::use_parallel_gc_threads()) {
1775       assert( _g1h->workers()->active_workers() > 0,
1776         "Should have been previously set");
1777       _n_workers = _g1h->workers()->active_workers();
1778     } else {
1779       _n_workers = 1;
1780     }
1781   }
1782 
1783   void work(uint worker_id) {
1784     assert(worker_id < _n_workers, "invariant");
1785 
1786     FinalCountDataUpdateClosure final_update_cl(_g1h,
1787                                                 _actual_region_bm,
1788                                                 _actual_card_bm);
1789 
1790     if (G1CollectedHeap::use_parallel_gc_threads()) {
1791       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1792                                             worker_id,
1793                                             _n_workers,
1794                                             HeapRegion::FinalCountClaimValue);
1795     } else {
1796       _g1h->heap_region_iterate(&final_update_cl);
1797     }
1798   }
1799 };
1800 
1801 class G1ParNoteEndTask;
1802 
1803 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1804   G1CollectedHeap* _g1;
1805   int _worker_num;
1806   size_t _max_live_bytes;
1807   uint _regions_claimed;
1808   size_t _freed_bytes;
1809   FreeRegionList* _local_cleanup_list;
1810   OldRegionSet* _old_proxy_set;
1811   HumongousRegionSet* _humongous_proxy_set;
1812   HRRSCleanupTask* _hrrs_cleanup_task;
1813   double _claimed_region_time;
1814   double _max_region_time;
1815 
1816 public:
1817   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1818                              int worker_num,
1819                              FreeRegionList* local_cleanup_list,
1820                              OldRegionSet* old_proxy_set,
1821                              HumongousRegionSet* humongous_proxy_set,
1822                              HRRSCleanupTask* hrrs_cleanup_task) :
1823     _g1(g1), _worker_num(worker_num),
1824     _max_live_bytes(0), _regions_claimed(0),
1825     _freed_bytes(0),
1826     _claimed_region_time(0.0), _max_region_time(0.0),
1827     _local_cleanup_list(local_cleanup_list),
1828     _old_proxy_set(old_proxy_set),
1829     _humongous_proxy_set(humongous_proxy_set),
1830     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1831 
1832   size_t freed_bytes() { return _freed_bytes; }
1833 
1834   bool doHeapRegion(HeapRegion *hr) {
1835     if (hr->continuesHumongous()) {
1836       return false;
1837     }
1838     // We use a claim value of zero here because all regions
1839     // were claimed with value 1 in the FinalCount task.
1840     _g1->reset_gc_time_stamps(hr);
1841     double start = os::elapsedTime();
1842     _regions_claimed++;
1843     hr->note_end_of_marking();
1844     _max_live_bytes += hr->max_live_bytes();
1845     _g1->free_region_if_empty(hr,
1846                               &_freed_bytes,
1847                               _local_cleanup_list,
1848                               _old_proxy_set,
1849                               _humongous_proxy_set,
1850                               _hrrs_cleanup_task,
1851                               true /* par */);
1852     double region_time = (os::elapsedTime() - start);
1853     _claimed_region_time += region_time;
1854     if (region_time > _max_region_time) {
1855       _max_region_time = region_time;
1856     }
1857     return false;
1858   }
1859 
1860   size_t max_live_bytes() { return _max_live_bytes; }
1861   uint regions_claimed() { return _regions_claimed; }
1862   double claimed_region_time_sec() { return _claimed_region_time; }
1863   double max_region_time_sec() { return _max_region_time; }
1864 };
1865 
1866 class G1ParNoteEndTask: public AbstractGangTask {
1867   friend class G1NoteEndOfConcMarkClosure;
1868 
1869 protected:
1870   G1CollectedHeap* _g1h;
1871   size_t _max_live_bytes;
1872   size_t _freed_bytes;
1873   FreeRegionList* _cleanup_list;
1874 
1875 public:
1876   G1ParNoteEndTask(G1CollectedHeap* g1h,
1877                    FreeRegionList* cleanup_list) :
1878     AbstractGangTask("G1 note end"), _g1h(g1h),
1879     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1880 
1881   void work(uint worker_id) {
1882     double start = os::elapsedTime();
1883     FreeRegionList local_cleanup_list("Local Cleanup List");
1884     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1885     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1886     HRRSCleanupTask hrrs_cleanup_task;
1887     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1888                                            &old_proxy_set,
1889                                            &humongous_proxy_set,
1890                                            &hrrs_cleanup_task);
1891     if (G1CollectedHeap::use_parallel_gc_threads()) {
1892       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1893                                             _g1h->workers()->active_workers(),
1894                                             HeapRegion::NoteEndClaimValue);
1895     } else {
1896       _g1h->heap_region_iterate(&g1_note_end);
1897     }
1898     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1899 
1900     // Now update the lists
1901     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1902                                             NULL /* free_list */,
1903                                             &old_proxy_set,
1904                                             &humongous_proxy_set,
1905                                             true /* par */);
1906     {
1907       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1908       _max_live_bytes += g1_note_end.max_live_bytes();
1909       _freed_bytes += g1_note_end.freed_bytes();
1910 
1911       // If we iterate over the global cleanup list at the end of
1912       // cleanup to do this printing we will not guarantee to only
1913       // generate output for the newly-reclaimed regions (the list
1914       // might not be empty at the beginning of cleanup; we might
1915       // still be working on its previous contents). So we do the
1916       // printing here, before we append the new regions to the global
1917       // cleanup list.
1918 
1919       G1HRPrinter* hr_printer = _g1h->hr_printer();
1920       if (hr_printer->is_active()) {
1921         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1922         while (iter.more_available()) {
1923           HeapRegion* hr = iter.get_next();
1924           hr_printer->cleanup(hr);
1925         }
1926       }
1927 
1928       _cleanup_list->add_as_tail(&local_cleanup_list);
1929       assert(local_cleanup_list.is_empty(), "post-condition");
1930 
1931       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1932     }
1933   }
1934   size_t max_live_bytes() { return _max_live_bytes; }
1935   size_t freed_bytes() { return _freed_bytes; }
1936 };
1937 
1938 class G1ParScrubRemSetTask: public AbstractGangTask {
1939 protected:
1940   G1RemSet* _g1rs;
1941   BitMap* _region_bm;
1942   BitMap* _card_bm;
1943 public:
1944   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1945                        BitMap* region_bm, BitMap* card_bm) :
1946     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1947     _region_bm(region_bm), _card_bm(card_bm) { }
1948 
1949   void work(uint worker_id) {
1950     if (G1CollectedHeap::use_parallel_gc_threads()) {
1951       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1952                        HeapRegion::ScrubRemSetClaimValue);
1953     } else {
1954       _g1rs->scrub(_region_bm, _card_bm);
1955     }
1956   }
1957 
1958 };
1959 
1960 void ConcurrentMark::cleanup() {
1961   // world is stopped at this checkpoint
1962   assert(SafepointSynchronize::is_at_safepoint(),
1963          "world should be stopped");
1964   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1965 
1966   // If a full collection has happened, we shouldn't do this.
1967   if (has_aborted()) {
1968     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1969     return;
1970   }
1971 
1972   HRSPhaseSetter x(HRSPhaseCleanup);
1973   g1h->verify_region_sets_optional();
1974 
1975   if (VerifyDuringGC) {
1976     HandleMark hm;  // handle scope
1977     Universe::heap()->prepare_for_verify();
1978     Universe::verify(VerifyOption_G1UsePrevMarking,
1979                      " VerifyDuringGC:(before)");
1980   }
1981 
1982   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1983   g1p->record_concurrent_mark_cleanup_start();
1984 
1985   double start = os::elapsedTime();
1986 
1987   HeapRegionRemSet::reset_for_cleanup_tasks();
1988 
1989   uint n_workers;
1990 
1991   // Do counting once more with the world stopped for good measure.
1992   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1993 
1994   if (G1CollectedHeap::use_parallel_gc_threads()) {
1995    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1996            "sanity check");
1997 
1998     g1h->set_par_threads();
1999     n_workers = g1h->n_par_threads();
2000     assert(g1h->n_par_threads() == n_workers,
2001            "Should not have been reset");
2002     g1h->workers()->run_task(&g1_par_count_task);
2003     // Done with the parallel phase so reset to 0.
2004     g1h->set_par_threads(0);
2005 
2006     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2007            "sanity check");
2008   } else {
2009     n_workers = 1;
2010     g1_par_count_task.work(0);
2011   }
2012 
2013   if (VerifyDuringGC) {
2014     // Verify that the counting data accumulated during marking matches
2015     // that calculated by walking the marking bitmap.
2016 
2017     // Bitmaps to hold expected values
2018     BitMap expected_region_bm(_region_bm.size(), false);
2019     BitMap expected_card_bm(_card_bm.size(), false);
2020 
2021     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2022                                                  &_region_bm,
2023                                                  &_card_bm,
2024                                                  &expected_region_bm,
2025                                                  &expected_card_bm);
2026 
2027     if (G1CollectedHeap::use_parallel_gc_threads()) {
2028       g1h->set_par_threads((int)n_workers);
2029       g1h->workers()->run_task(&g1_par_verify_task);
2030       // Done with the parallel phase so reset to 0.
2031       g1h->set_par_threads(0);
2032 
2033       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2034              "sanity check");
2035     } else {
2036       g1_par_verify_task.work(0);
2037     }
2038 
2039     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2040   }
2041 
2042   size_t start_used_bytes = g1h->used();
2043   g1h->set_marking_complete();
2044 
2045   double count_end = os::elapsedTime();
2046   double this_final_counting_time = (count_end - start);
2047   _total_counting_time += this_final_counting_time;
2048 
2049   if (G1PrintRegionLivenessInfo) {
2050     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2051     _g1h->heap_region_iterate(&cl);
2052   }
2053 
2054   // Install newly created mark bitMap as "prev".
2055   swapMarkBitMaps();
2056 
2057   g1h->reset_gc_time_stamp();
2058 
2059   // Note end of marking in all heap regions.
2060   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2061   if (G1CollectedHeap::use_parallel_gc_threads()) {
2062     g1h->set_par_threads((int)n_workers);
2063     g1h->workers()->run_task(&g1_par_note_end_task);
2064     g1h->set_par_threads(0);
2065 
2066     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2067            "sanity check");
2068   } else {
2069     g1_par_note_end_task.work(0);
2070   }
2071   g1h->check_gc_time_stamps();
2072 
2073   if (!cleanup_list_is_empty()) {
2074     // The cleanup list is not empty, so we'll have to process it
2075     // concurrently. Notify anyone else that might be wanting free
2076     // regions that there will be more free regions coming soon.
2077     g1h->set_free_regions_coming();
2078   }
2079 
2080   // call below, since it affects the metric by which we sort the heap
2081   // regions.
2082   if (G1ScrubRemSets) {
2083     double rs_scrub_start = os::elapsedTime();
2084     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2085     if (G1CollectedHeap::use_parallel_gc_threads()) {
2086       g1h->set_par_threads((int)n_workers);
2087       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2088       g1h->set_par_threads(0);
2089 
2090       assert(g1h->check_heap_region_claim_values(
2091                                             HeapRegion::ScrubRemSetClaimValue),
2092              "sanity check");
2093     } else {
2094       g1_par_scrub_rs_task.work(0);
2095     }
2096 
2097     double rs_scrub_end = os::elapsedTime();
2098     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2099     _total_rs_scrub_time += this_rs_scrub_time;
2100   }
2101 
2102   // this will also free any regions totally full of garbage objects,
2103   // and sort the regions.
2104   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2105 
2106   // Statistics.
2107   double end = os::elapsedTime();
2108   _cleanup_times.add((end - start) * 1000.0);
2109 
2110   if (G1Log::fine()) {
2111     g1h->print_size_transition(gclog_or_tty,
2112                                start_used_bytes,
2113                                g1h->used(),
2114                                g1h->capacity());
2115   }
2116 
2117   // Clean up will have freed any regions completely full of garbage.
2118   // Update the soft reference policy with the new heap occupancy.
2119   Universe::update_heap_info_at_gc();
2120 
2121   // We need to make this be a "collection" so any collection pause that
2122   // races with it goes around and waits for completeCleanup to finish.
2123   g1h->increment_total_collections();
2124 
2125   // We reclaimed old regions so we should calculate the sizes to make
2126   // sure we update the old gen/space data.
2127   g1h->g1mm()->update_sizes();
2128 
2129   if (VerifyDuringGC) {
2130     HandleMark hm;  // handle scope
2131     Universe::heap()->prepare_for_verify();
2132     Universe::verify(VerifyOption_G1UsePrevMarking,
2133                      " VerifyDuringGC:(after)");
2134   }
2135 
2136   g1h->verify_region_sets_optional();
2137   g1h->trace_heap_after_concurrent_cycle();
2138 }
2139 
2140 void ConcurrentMark::completeCleanup() {
2141   if (has_aborted()) return;
2142 
2143   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2144 
2145   _cleanup_list.verify_optional();
2146   FreeRegionList tmp_free_list("Tmp Free List");
2147 
2148   if (G1ConcRegionFreeingVerbose) {
2149     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2150                            "cleanup list has %u entries",
2151                            _cleanup_list.length());
2152   }
2153 
2154   // Noone else should be accessing the _cleanup_list at this point,
2155   // so it's not necessary to take any locks
2156   while (!_cleanup_list.is_empty()) {
2157     HeapRegion* hr = _cleanup_list.remove_head();
2158     assert(hr != NULL, "the list was not empty");
2159     hr->par_clear();
2160     tmp_free_list.add_as_tail(hr);
2161 
2162     // Instead of adding one region at a time to the secondary_free_list,
2163     // we accumulate them in the local list and move them a few at a
2164     // time. This also cuts down on the number of notify_all() calls
2165     // we do during this process. We'll also append the local list when
2166     // _cleanup_list is empty (which means we just removed the last
2167     // region from the _cleanup_list).
2168     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2169         _cleanup_list.is_empty()) {
2170       if (G1ConcRegionFreeingVerbose) {
2171         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2172                                "appending %u entries to the secondary_free_list, "
2173                                "cleanup list still has %u entries",
2174                                tmp_free_list.length(),
2175                                _cleanup_list.length());
2176       }
2177 
2178       {
2179         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2180         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2181         SecondaryFreeList_lock->notify_all();
2182       }
2183 
2184       if (G1StressConcRegionFreeing) {
2185         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2186           os::sleep(Thread::current(), (jlong) 1, false);
2187         }
2188       }
2189     }
2190   }
2191   assert(tmp_free_list.is_empty(), "post-condition");
2192 }
2193 
2194 // Supporting Object and Oop closures for reference discovery
2195 // and processing in during marking
2196 
2197 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2198   HeapWord* addr = (HeapWord*)obj;
2199   return addr != NULL &&
2200          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2201 }
2202 
2203 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2204 // Uses the CMTask associated with a worker thread (for serial reference
2205 // processing the CMTask for worker 0 is used) to preserve (mark) and
2206 // trace referent objects.
2207 //
2208 // Using the CMTask and embedded local queues avoids having the worker
2209 // threads operating on the global mark stack. This reduces the risk
2210 // of overflowing the stack - which we would rather avoid at this late
2211 // state. Also using the tasks' local queues removes the potential
2212 // of the workers interfering with each other that could occur if
2213 // operating on the global stack.
2214 
2215 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2216   ConcurrentMark* _cm;
2217   CMTask*         _task;
2218   int             _ref_counter_limit;
2219   int             _ref_counter;
2220   bool            _is_serial;
2221  public:
2222   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2223     _cm(cm), _task(task), _is_serial(is_serial),
2224     _ref_counter_limit(G1RefProcDrainInterval) {
2225     assert(_ref_counter_limit > 0, "sanity");
2226     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2227     _ref_counter = _ref_counter_limit;
2228   }
2229 
2230   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2231   virtual void do_oop(      oop* p) { do_oop_work(p); }
2232 
2233   template <class T> void do_oop_work(T* p) {
2234     if (!_cm->has_overflown()) {
2235       oop obj = oopDesc::load_decode_heap_oop(p);
2236       if (_cm->verbose_high()) {
2237         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2238                                "*"PTR_FORMAT" = "PTR_FORMAT,
2239                                _task->worker_id(), p, (void*) obj);
2240       }
2241 
2242       _task->deal_with_reference(obj);
2243       _ref_counter--;
2244 
2245       if (_ref_counter == 0) {
2246         // We have dealt with _ref_counter_limit references, pushing them
2247         // and objects reachable from them on to the local stack (and
2248         // possibly the global stack). Call CMTask::do_marking_step() to
2249         // process these entries.
2250         //
2251         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2252         // there's nothing more to do (i.e. we're done with the entries that
2253         // were pushed as a result of the CMTask::deal_with_reference() calls
2254         // above) or we overflow.
2255         //
2256         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2257         // flag while there may still be some work to do. (See the comment at
2258         // the beginning of CMTask::do_marking_step() for those conditions -
2259         // one of which is reaching the specified time target.) It is only
2260         // when CMTask::do_marking_step() returns without setting the
2261         // has_aborted() flag that the marking step has completed.
2262         do {
2263           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2264           _task->do_marking_step(mark_step_duration_ms,
2265                                  false      /* do_termination */,
2266                                  _is_serial);
2267         } while (_task->has_aborted() && !_cm->has_overflown());
2268         _ref_counter = _ref_counter_limit;
2269       }
2270     } else {
2271       if (_cm->verbose_high()) {
2272          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2273       }
2274     }
2275   }
2276 };
2277 
2278 // 'Drain' oop closure used by both serial and parallel reference processing.
2279 // Uses the CMTask associated with a given worker thread (for serial
2280 // reference processing the CMtask for worker 0 is used). Calls the
2281 // do_marking_step routine, with an unbelievably large timeout value,
2282 // to drain the marking data structures of the remaining entries
2283 // added by the 'keep alive' oop closure above.
2284 
2285 class G1CMDrainMarkingStackClosure: public VoidClosure {
2286   ConcurrentMark* _cm;
2287   CMTask*         _task;
2288   bool            _is_serial;
2289  public:
2290   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2291     _cm(cm), _task(task), _is_serial(is_serial) {
2292     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2293   }
2294 
2295   void do_void() {
2296     do {
2297       if (_cm->verbose_high()) {
2298         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2299                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2300       }
2301 
2302       // We call CMTask::do_marking_step() to completely drain the local
2303       // and global marking stacks of entries pushed by the 'keep alive'
2304       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2305       //
2306       // CMTask::do_marking_step() is called in a loop, which we'll exit
2307       // if there's nothing more to do (i.e. we'completely drained the
2308       // entries that were pushed as a a result of applying the 'keep alive'
2309       // closure to the entries on the discovered ref lists) or we overflow
2310       // the global marking stack.
2311       //
2312       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2313       // flag while there may still be some work to do. (See the comment at
2314       // the beginning of CMTask::do_marking_step() for those conditions -
2315       // one of which is reaching the specified time target.) It is only
2316       // when CMTask::do_marking_step() returns without setting the
2317       // has_aborted() flag that the marking step has completed.
2318 
2319       _task->do_marking_step(1000000000.0 /* something very large */,
2320                              true         /* do_termination */,
2321                              _is_serial);
2322     } while (_task->has_aborted() && !_cm->has_overflown());
2323   }
2324 };
2325 
2326 // Implementation of AbstractRefProcTaskExecutor for parallel
2327 // reference processing at the end of G1 concurrent marking
2328 
2329 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2330 private:
2331   G1CollectedHeap* _g1h;
2332   ConcurrentMark*  _cm;
2333   WorkGang*        _workers;
2334   int              _active_workers;
2335 
2336 public:
2337   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2338                         ConcurrentMark* cm,
2339                         WorkGang* workers,
2340                         int n_workers) :
2341     _g1h(g1h), _cm(cm),
2342     _workers(workers), _active_workers(n_workers) { }
2343 
2344   // Executes the given task using concurrent marking worker threads.
2345   virtual void execute(ProcessTask& task);
2346   virtual void execute(EnqueueTask& task);
2347 };
2348 
2349 class G1CMRefProcTaskProxy: public AbstractGangTask {
2350   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2351   ProcessTask&     _proc_task;
2352   G1CollectedHeap* _g1h;
2353   ConcurrentMark*  _cm;
2354 
2355 public:
2356   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2357                      G1CollectedHeap* g1h,
2358                      ConcurrentMark* cm) :
2359     AbstractGangTask("Process reference objects in parallel"),
2360     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2361     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2362     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2363   }
2364 
2365   virtual void work(uint worker_id) {
2366     CMTask* task = _cm->task(worker_id);
2367     G1CMIsAliveClosure g1_is_alive(_g1h);
2368     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2369     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2370 
2371     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2372   }
2373 };
2374 
2375 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2376   assert(_workers != NULL, "Need parallel worker threads.");
2377   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2378 
2379   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2380 
2381   // We need to reset the concurrency level before each
2382   // proxy task execution, so that the termination protocol
2383   // and overflow handling in CMTask::do_marking_step() knows
2384   // how many workers to wait for.
2385   _cm->set_concurrency(_active_workers);
2386   _g1h->set_par_threads(_active_workers);
2387   _workers->run_task(&proc_task_proxy);
2388   _g1h->set_par_threads(0);
2389 }
2390 
2391 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2392   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2393   EnqueueTask& _enq_task;
2394 
2395 public:
2396   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2397     AbstractGangTask("Enqueue reference objects in parallel"),
2398     _enq_task(enq_task) { }
2399 
2400   virtual void work(uint worker_id) {
2401     _enq_task.work(worker_id);
2402   }
2403 };
2404 
2405 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2406   assert(_workers != NULL, "Need parallel worker threads.");
2407   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2408 
2409   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2410 
2411   // Not strictly necessary but...
2412   //
2413   // We need to reset the concurrency level before each
2414   // proxy task execution, so that the termination protocol
2415   // and overflow handling in CMTask::do_marking_step() knows
2416   // how many workers to wait for.
2417   _cm->set_concurrency(_active_workers);
2418   _g1h->set_par_threads(_active_workers);
2419   _workers->run_task(&enq_task_proxy);
2420   _g1h->set_par_threads(0);
2421 }
2422 
2423 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2424   if (has_overflown()) {
2425     // Skip processing the discovered references if we have
2426     // overflown the global marking stack. Reference objects
2427     // only get discovered once so it is OK to not
2428     // de-populate the discovered reference lists. We could have,
2429     // but the only benefit would be that, when marking restarts,
2430     // less reference objects are discovered.
2431     return;
2432   }
2433 
2434   ResourceMark rm;
2435   HandleMark   hm;
2436 
2437   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2438 
2439   // Is alive closure.
2440   G1CMIsAliveClosure g1_is_alive(g1h);
2441 
2442   // Inner scope to exclude the cleaning of the string and symbol
2443   // tables from the displayed time.
2444   {
2445     if (G1Log::finer()) {
2446       gclog_or_tty->put(' ');
2447     }
2448     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2449 
2450     ReferenceProcessor* rp = g1h->ref_processor_cm();
2451 
2452     // See the comment in G1CollectedHeap::ref_processing_init()
2453     // about how reference processing currently works in G1.
2454 
2455     // Set the soft reference policy
2456     rp->setup_policy(clear_all_soft_refs);
2457     assert(_markStack.isEmpty(), "mark stack should be empty");
2458 
2459     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2460     // in serial reference processing. Note these closures are also
2461     // used for serially processing (by the the current thread) the
2462     // JNI references during parallel reference processing.
2463     //
2464     // These closures do not need to synchronize with the worker
2465     // threads involved in parallel reference processing as these
2466     // instances are executed serially by the current thread (e.g.
2467     // reference processing is not multi-threaded and is thus
2468     // performed by the current thread instead of a gang worker).
2469     //
2470     // The gang tasks involved in parallel reference procssing create
2471     // their own instances of these closures, which do their own
2472     // synchronization among themselves.
2473     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2474     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2475 
2476     // We need at least one active thread. If reference processing
2477     // is not multi-threaded we use the current (VMThread) thread,
2478     // otherwise we use the work gang from the G1CollectedHeap and
2479     // we utilize all the worker threads we can.
2480     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2481     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2482     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2483 
2484     // Parallel processing task executor.
2485     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2486                                               g1h->workers(), active_workers);
2487     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2488 
2489     // Set the concurrency level. The phase was already set prior to
2490     // executing the remark task.
2491     set_concurrency(active_workers);
2492 
2493     // Set the degree of MT processing here.  If the discovery was done MT,
2494     // the number of threads involved during discovery could differ from
2495     // the number of active workers.  This is OK as long as the discovered
2496     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2497     rp->set_active_mt_degree(active_workers);
2498 
2499     // Process the weak references.
2500     const ReferenceProcessorStats& stats =
2501         rp->process_discovered_references(&g1_is_alive,
2502                                           &g1_keep_alive,
2503                                           &g1_drain_mark_stack,
2504                                           executor,
2505                                           g1h->gc_timer_cm());
2506     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2507 
2508     // The do_oop work routines of the keep_alive and drain_marking_stack
2509     // oop closures will set the has_overflown flag if we overflow the
2510     // global marking stack.
2511 
2512     assert(_markStack.overflow() || _markStack.isEmpty(),
2513             "mark stack should be empty (unless it overflowed)");
2514 
2515     if (_markStack.overflow()) {
2516       // This should have been done already when we tried to push an
2517       // entry on to the global mark stack. But let's do it again.
2518       set_has_overflown();
2519     }
2520 
2521     assert(rp->num_q() == active_workers, "why not");
2522 
2523     rp->enqueue_discovered_references(executor);
2524 
2525     rp->verify_no_references_recorded();
2526     assert(!rp->discovery_enabled(), "Post condition");
2527   }
2528 
2529   // Now clean up stale oops in StringTable
2530   StringTable::unlink(&g1_is_alive);
2531   // Clean up unreferenced symbols in symbol table.
2532   SymbolTable::unlink();
2533 }
2534 
2535 void ConcurrentMark::swapMarkBitMaps() {
2536   CMBitMapRO* temp = _prevMarkBitMap;
2537   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2538   _nextMarkBitMap  = (CMBitMap*)  temp;
2539 }
2540 
2541 class CMRemarkTask: public AbstractGangTask {
2542 private:
2543   ConcurrentMark* _cm;
2544   bool            _is_serial;
2545 public:
2546   void work(uint worker_id) {
2547     // Since all available tasks are actually started, we should
2548     // only proceed if we're supposed to be actived.
2549     if (worker_id < _cm->active_tasks()) {
2550       CMTask* task = _cm->task(worker_id);
2551       task->record_start_time();
2552       do {
2553         task->do_marking_step(1000000000.0 /* something very large */,
2554                               true         /* do_termination       */,
2555                               _is_serial);
2556       } while (task->has_aborted() && !_cm->has_overflown());
2557       // If we overflow, then we do not want to restart. We instead
2558       // want to abort remark and do concurrent marking again.
2559       task->record_end_time();
2560     }
2561   }
2562 
2563   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2564     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2565     _cm->terminator()->reset_for_reuse(active_workers);
2566   }
2567 };
2568 
2569 void ConcurrentMark::checkpointRootsFinalWork() {
2570   ResourceMark rm;
2571   HandleMark   hm;
2572   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2573 
2574   g1h->ensure_parsability(false);
2575 
2576   if (G1CollectedHeap::use_parallel_gc_threads()) {
2577     G1CollectedHeap::StrongRootsScope srs(g1h);
2578     // this is remark, so we'll use up all active threads
2579     uint active_workers = g1h->workers()->active_workers();
2580     if (active_workers == 0) {
2581       assert(active_workers > 0, "Should have been set earlier");
2582       active_workers = (uint) ParallelGCThreads;
2583       g1h->workers()->set_active_workers(active_workers);
2584     }
2585     set_concurrency_and_phase(active_workers, false /* concurrent */);
2586     // Leave _parallel_marking_threads at it's
2587     // value originally calculated in the ConcurrentMark
2588     // constructor and pass values of the active workers
2589     // through the gang in the task.
2590 
2591     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2592     // We will start all available threads, even if we decide that the
2593     // active_workers will be fewer. The extra ones will just bail out
2594     // immediately.
2595     g1h->set_par_threads(active_workers);
2596     g1h->workers()->run_task(&remarkTask);
2597     g1h->set_par_threads(0);
2598   } else {
2599     G1CollectedHeap::StrongRootsScope srs(g1h);
2600     uint active_workers = 1;
2601     set_concurrency_and_phase(active_workers, false /* concurrent */);
2602 
2603     // Note - if there's no work gang then the VMThread will be
2604     // the thread to execute the remark - serially. We have
2605     // to pass true for the is_serial parameter so that
2606     // CMTask::do_marking_step() doesn't enter the sync
2607     // barriers in the event of an overflow. Doing so will
2608     // cause an assert that the current thread is not a
2609     // concurrent GC thread.
2610     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2611     remarkTask.work(0);
2612   }
2613   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2614   guarantee(has_overflown() ||
2615             satb_mq_set.completed_buffers_num() == 0,
2616             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2617                     BOOL_TO_STR(has_overflown()),
2618                     satb_mq_set.completed_buffers_num()));
2619 
2620   print_stats();
2621 }
2622 
2623 #ifndef PRODUCT
2624 
2625 class PrintReachableOopClosure: public OopClosure {
2626 private:
2627   G1CollectedHeap* _g1h;
2628   outputStream*    _out;
2629   VerifyOption     _vo;
2630   bool             _all;
2631 
2632 public:
2633   PrintReachableOopClosure(outputStream* out,
2634                            VerifyOption  vo,
2635                            bool          all) :
2636     _g1h(G1CollectedHeap::heap()),
2637     _out(out), _vo(vo), _all(all) { }
2638 
2639   void do_oop(narrowOop* p) { do_oop_work(p); }
2640   void do_oop(      oop* p) { do_oop_work(p); }
2641 
2642   template <class T> void do_oop_work(T* p) {
2643     oop         obj = oopDesc::load_decode_heap_oop(p);
2644     const char* str = NULL;
2645     const char* str2 = "";
2646 
2647     if (obj == NULL) {
2648       str = "";
2649     } else if (!_g1h->is_in_g1_reserved(obj)) {
2650       str = " O";
2651     } else {
2652       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2653       guarantee(hr != NULL, "invariant");
2654       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2655       bool marked = _g1h->is_marked(obj, _vo);
2656 
2657       if (over_tams) {
2658         str = " >";
2659         if (marked) {
2660           str2 = " AND MARKED";
2661         }
2662       } else if (marked) {
2663         str = " M";
2664       } else {
2665         str = " NOT";
2666       }
2667     }
2668 
2669     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2670                    p, (void*) obj, str, str2);
2671   }
2672 };
2673 
2674 class PrintReachableObjectClosure : public ObjectClosure {
2675 private:
2676   G1CollectedHeap* _g1h;
2677   outputStream*    _out;
2678   VerifyOption     _vo;
2679   bool             _all;
2680   HeapRegion*      _hr;
2681 
2682 public:
2683   PrintReachableObjectClosure(outputStream* out,
2684                               VerifyOption  vo,
2685                               bool          all,
2686                               HeapRegion*   hr) :
2687     _g1h(G1CollectedHeap::heap()),
2688     _out(out), _vo(vo), _all(all), _hr(hr) { }
2689 
2690   void do_object(oop o) {
2691     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2692     bool marked = _g1h->is_marked(o, _vo);
2693     bool print_it = _all || over_tams || marked;
2694 
2695     if (print_it) {
2696       _out->print_cr(" "PTR_FORMAT"%s",
2697                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2698       PrintReachableOopClosure oopCl(_out, _vo, _all);
2699       o->oop_iterate_no_header(&oopCl);
2700     }
2701   }
2702 };
2703 
2704 class PrintReachableRegionClosure : public HeapRegionClosure {
2705 private:
2706   G1CollectedHeap* _g1h;
2707   outputStream*    _out;
2708   VerifyOption     _vo;
2709   bool             _all;
2710 
2711 public:
2712   bool doHeapRegion(HeapRegion* hr) {
2713     HeapWord* b = hr->bottom();
2714     HeapWord* e = hr->end();
2715     HeapWord* t = hr->top();
2716     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2717     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2718                    "TAMS: "PTR_FORMAT, b, e, t, p);
2719     _out->cr();
2720 
2721     HeapWord* from = b;
2722     HeapWord* to   = t;
2723 
2724     if (to > from) {
2725       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2726       _out->cr();
2727       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2728       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2729       _out->cr();
2730     }
2731 
2732     return false;
2733   }
2734 
2735   PrintReachableRegionClosure(outputStream* out,
2736                               VerifyOption  vo,
2737                               bool          all) :
2738     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2739 };
2740 
2741 void ConcurrentMark::print_reachable(const char* str,
2742                                      VerifyOption vo,
2743                                      bool all) {
2744   gclog_or_tty->cr();
2745   gclog_or_tty->print_cr("== Doing heap dump... ");
2746 
2747   if (G1PrintReachableBaseFile == NULL) {
2748     gclog_or_tty->print_cr("  #### error: no base file defined");
2749     return;
2750   }
2751 
2752   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2753       (JVM_MAXPATHLEN - 1)) {
2754     gclog_or_tty->print_cr("  #### error: file name too long");
2755     return;
2756   }
2757 
2758   char file_name[JVM_MAXPATHLEN];
2759   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2760   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2761 
2762   fileStream fout(file_name);
2763   if (!fout.is_open()) {
2764     gclog_or_tty->print_cr("  #### error: could not open file");
2765     return;
2766   }
2767 
2768   outputStream* out = &fout;
2769   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2770   out->cr();
2771 
2772   out->print_cr("--- ITERATING OVER REGIONS");
2773   out->cr();
2774   PrintReachableRegionClosure rcl(out, vo, all);
2775   _g1h->heap_region_iterate(&rcl);
2776   out->cr();
2777 
2778   gclog_or_tty->print_cr("  done");
2779   gclog_or_tty->flush();
2780 }
2781 
2782 #endif // PRODUCT
2783 
2784 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2785   // Note we are overriding the read-only view of the prev map here, via
2786   // the cast.
2787   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2788 }
2789 
2790 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2791   _nextMarkBitMap->clearRange(mr);
2792 }
2793 
2794 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2795   clearRangePrevBitmap(mr);
2796   clearRangeNextBitmap(mr);
2797 }
2798 
2799 HeapRegion*
2800 ConcurrentMark::claim_region(uint worker_id) {
2801   // "checkpoint" the finger
2802   HeapWord* finger = _finger;
2803 
2804   // _heap_end will not change underneath our feet; it only changes at
2805   // yield points.
2806   while (finger < _heap_end) {
2807     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2808 
2809     // Note on how this code handles humongous regions. In the
2810     // normal case the finger will reach the start of a "starts
2811     // humongous" (SH) region. Its end will either be the end of the
2812     // last "continues humongous" (CH) region in the sequence, or the
2813     // standard end of the SH region (if the SH is the only region in
2814     // the sequence). That way claim_region() will skip over the CH
2815     // regions. However, there is a subtle race between a CM thread
2816     // executing this method and a mutator thread doing a humongous
2817     // object allocation. The two are not mutually exclusive as the CM
2818     // thread does not need to hold the Heap_lock when it gets
2819     // here. So there is a chance that claim_region() will come across
2820     // a free region that's in the progress of becoming a SH or a CH
2821     // region. In the former case, it will either
2822     //   a) Miss the update to the region's end, in which case it will
2823     //      visit every subsequent CH region, will find their bitmaps
2824     //      empty, and do nothing, or
2825     //   b) Will observe the update of the region's end (in which case
2826     //      it will skip the subsequent CH regions).
2827     // If it comes across a region that suddenly becomes CH, the
2828     // scenario will be similar to b). So, the race between
2829     // claim_region() and a humongous object allocation might force us
2830     // to do a bit of unnecessary work (due to some unnecessary bitmap
2831     // iterations) but it should not introduce and correctness issues.
2832     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2833     HeapWord*   bottom        = curr_region->bottom();
2834     HeapWord*   end           = curr_region->end();
2835     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2836 
2837     if (verbose_low()) {
2838       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2839                              "["PTR_FORMAT", "PTR_FORMAT"), "
2840                              "limit = "PTR_FORMAT,
2841                              worker_id, curr_region, bottom, end, limit);
2842     }
2843 
2844     // Is the gap between reading the finger and doing the CAS too long?
2845     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2846     if (res == finger) {
2847       // we succeeded
2848 
2849       // notice that _finger == end cannot be guaranteed here since,
2850       // someone else might have moved the finger even further
2851       assert(_finger >= end, "the finger should have moved forward");
2852 
2853       if (verbose_low()) {
2854         gclog_or_tty->print_cr("[%u] we were successful with region = "
2855                                PTR_FORMAT, worker_id, curr_region);
2856       }
2857 
2858       if (limit > bottom) {
2859         if (verbose_low()) {
2860           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2861                                  "returning it ", worker_id, curr_region);
2862         }
2863         return curr_region;
2864       } else {
2865         assert(limit == bottom,
2866                "the region limit should be at bottom");
2867         if (verbose_low()) {
2868           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2869                                  "returning NULL", worker_id, curr_region);
2870         }
2871         // we return NULL and the caller should try calling
2872         // claim_region() again.
2873         return NULL;
2874       }
2875     } else {
2876       assert(_finger > finger, "the finger should have moved forward");
2877       if (verbose_low()) {
2878         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2879                                "global finger = "PTR_FORMAT", "
2880                                "our finger = "PTR_FORMAT,
2881                                worker_id, _finger, finger);
2882       }
2883 
2884       // read it again
2885       finger = _finger;
2886     }
2887   }
2888 
2889   return NULL;
2890 }
2891 
2892 #ifndef PRODUCT
2893 enum VerifyNoCSetOopsPhase {
2894   VerifyNoCSetOopsStack,
2895   VerifyNoCSetOopsQueues,
2896   VerifyNoCSetOopsSATBCompleted,
2897   VerifyNoCSetOopsSATBThread
2898 };
2899 
2900 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2901 private:
2902   G1CollectedHeap* _g1h;
2903   VerifyNoCSetOopsPhase _phase;
2904   int _info;
2905 
2906   const char* phase_str() {
2907     switch (_phase) {
2908     case VerifyNoCSetOopsStack:         return "Stack";
2909     case VerifyNoCSetOopsQueues:        return "Queue";
2910     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2911     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2912     default:                            ShouldNotReachHere();
2913     }
2914     return NULL;
2915   }
2916 
2917   void do_object_work(oop obj) {
2918     guarantee(!_g1h->obj_in_cs(obj),
2919               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2920                       (void*) obj, phase_str(), _info));
2921   }
2922 
2923 public:
2924   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2925 
2926   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2927     _phase = phase;
2928     _info = info;
2929   }
2930 
2931   virtual void do_oop(oop* p) {
2932     oop obj = oopDesc::load_decode_heap_oop(p);
2933     do_object_work(obj);
2934   }
2935 
2936   virtual void do_oop(narrowOop* p) {
2937     // We should not come across narrow oops while scanning marking
2938     // stacks and SATB buffers.
2939     ShouldNotReachHere();
2940   }
2941 
2942   virtual void do_object(oop obj) {
2943     do_object_work(obj);
2944   }
2945 };
2946 
2947 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2948                                          bool verify_enqueued_buffers,
2949                                          bool verify_thread_buffers,
2950                                          bool verify_fingers) {
2951   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2952   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2953     return;
2954   }
2955 
2956   VerifyNoCSetOopsClosure cl;
2957 
2958   if (verify_stacks) {
2959     // Verify entries on the global mark stack
2960     cl.set_phase(VerifyNoCSetOopsStack);
2961     _markStack.oops_do(&cl);
2962 
2963     // Verify entries on the task queues
2964     for (uint i = 0; i < _max_worker_id; i += 1) {
2965       cl.set_phase(VerifyNoCSetOopsQueues, i);
2966       CMTaskQueue* queue = _task_queues->queue(i);
2967       queue->oops_do(&cl);
2968     }
2969   }
2970 
2971   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2972 
2973   // Verify entries on the enqueued SATB buffers
2974   if (verify_enqueued_buffers) {
2975     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2976     satb_qs.iterate_completed_buffers_read_only(&cl);
2977   }
2978 
2979   // Verify entries on the per-thread SATB buffers
2980   if (verify_thread_buffers) {
2981     cl.set_phase(VerifyNoCSetOopsSATBThread);
2982     satb_qs.iterate_thread_buffers_read_only(&cl);
2983   }
2984 
2985   if (verify_fingers) {
2986     // Verify the global finger
2987     HeapWord* global_finger = finger();
2988     if (global_finger != NULL && global_finger < _heap_end) {
2989       // The global finger always points to a heap region boundary. We
2990       // use heap_region_containing_raw() to get the containing region
2991       // given that the global finger could be pointing to a free region
2992       // which subsequently becomes continues humongous. If that
2993       // happens, heap_region_containing() will return the bottom of the
2994       // corresponding starts humongous region and the check below will
2995       // not hold any more.
2996       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2997       guarantee(global_finger == global_hr->bottom(),
2998                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2999                         global_finger, HR_FORMAT_PARAMS(global_hr)));
3000     }
3001 
3002     // Verify the task fingers
3003     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3004     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3005       CMTask* task = _tasks[i];
3006       HeapWord* task_finger = task->finger();
3007       if (task_finger != NULL && task_finger < _heap_end) {
3008         // See above note on the global finger verification.
3009         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3010         guarantee(task_finger == task_hr->bottom() ||
3011                   !task_hr->in_collection_set(),
3012                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3013                           task_finger, HR_FORMAT_PARAMS(task_hr)));
3014       }
3015     }
3016   }
3017 }
3018 #endif // PRODUCT
3019 
3020 // Aggregate the counting data that was constructed concurrently
3021 // with marking.
3022 class AggregateCountDataHRClosure: public HeapRegionClosure {
3023   G1CollectedHeap* _g1h;
3024   ConcurrentMark* _cm;
3025   CardTableModRefBS* _ct_bs;
3026   BitMap* _cm_card_bm;
3027   uint _max_worker_id;
3028 
3029  public:
3030   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3031                               BitMap* cm_card_bm,
3032                               uint max_worker_id) :
3033     _g1h(g1h), _cm(g1h->concurrent_mark()),
3034     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3035     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3036 
3037   bool doHeapRegion(HeapRegion* hr) {
3038     if (hr->continuesHumongous()) {
3039       // We will ignore these here and process them when their
3040       // associated "starts humongous" region is processed.
3041       // Note that we cannot rely on their associated
3042       // "starts humongous" region to have their bit set to 1
3043       // since, due to the region chunking in the parallel region
3044       // iteration, a "continues humongous" region might be visited
3045       // before its associated "starts humongous".
3046       return false;
3047     }
3048 
3049     HeapWord* start = hr->bottom();
3050     HeapWord* limit = hr->next_top_at_mark_start();
3051     HeapWord* end = hr->end();
3052 
3053     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3054            err_msg("Preconditions not met - "
3055                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3056                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3057                    start, limit, hr->top(), hr->end()));
3058 
3059     assert(hr->next_marked_bytes() == 0, "Precondition");
3060 
3061     if (start == limit) {
3062       // NTAMS of this region has not been set so nothing to do.
3063       return false;
3064     }
3065 
3066     // 'start' should be in the heap.
3067     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3068     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3069     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3070 
3071     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3072     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3073     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3074 
3075     // If ntams is not card aligned then we bump card bitmap index
3076     // for limit so that we get the all the cards spanned by
3077     // the object ending at ntams.
3078     // Note: if this is the last region in the heap then ntams
3079     // could be actually just beyond the end of the the heap;
3080     // limit_idx will then  correspond to a (non-existent) card
3081     // that is also outside the heap.
3082     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3083       limit_idx += 1;
3084     }
3085 
3086     assert(limit_idx <= end_idx, "or else use atomics");
3087 
3088     // Aggregate the "stripe" in the count data associated with hr.
3089     uint hrs_index = hr->hrs_index();
3090     size_t marked_bytes = 0;
3091 
3092     for (uint i = 0; i < _max_worker_id; i += 1) {
3093       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3094       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3095 
3096       // Fetch the marked_bytes in this region for task i and
3097       // add it to the running total for this region.
3098       marked_bytes += marked_bytes_array[hrs_index];
3099 
3100       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3101       // into the global card bitmap.
3102       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3103 
3104       while (scan_idx < limit_idx) {
3105         assert(task_card_bm->at(scan_idx) == true, "should be");
3106         _cm_card_bm->set_bit(scan_idx);
3107         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3108 
3109         // BitMap::get_next_one_offset() can handle the case when
3110         // its left_offset parameter is greater than its right_offset
3111         // parameter. It does, however, have an early exit if
3112         // left_offset == right_offset. So let's limit the value
3113         // passed in for left offset here.
3114         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3115         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3116       }
3117     }
3118 
3119     // Update the marked bytes for this region.
3120     hr->add_to_marked_bytes(marked_bytes);
3121 
3122     // Next heap region
3123     return false;
3124   }
3125 };
3126 
3127 class G1AggregateCountDataTask: public AbstractGangTask {
3128 protected:
3129   G1CollectedHeap* _g1h;
3130   ConcurrentMark* _cm;
3131   BitMap* _cm_card_bm;
3132   uint _max_worker_id;
3133   int _active_workers;
3134 
3135 public:
3136   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3137                            ConcurrentMark* cm,
3138                            BitMap* cm_card_bm,
3139                            uint max_worker_id,
3140                            int n_workers) :
3141     AbstractGangTask("Count Aggregation"),
3142     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3143     _max_worker_id(max_worker_id),
3144     _active_workers(n_workers) { }
3145 
3146   void work(uint worker_id) {
3147     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3148 
3149     if (G1CollectedHeap::use_parallel_gc_threads()) {
3150       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3151                                             _active_workers,
3152                                             HeapRegion::AggregateCountClaimValue);
3153     } else {
3154       _g1h->heap_region_iterate(&cl);
3155     }
3156   }
3157 };
3158 
3159 
3160 void ConcurrentMark::aggregate_count_data() {
3161   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3162                         _g1h->workers()->active_workers() :
3163                         1);
3164 
3165   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3166                                            _max_worker_id, n_workers);
3167 
3168   if (G1CollectedHeap::use_parallel_gc_threads()) {
3169     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3170            "sanity check");
3171     _g1h->set_par_threads(n_workers);
3172     _g1h->workers()->run_task(&g1_par_agg_task);
3173     _g1h->set_par_threads(0);
3174 
3175     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3176            "sanity check");
3177     _g1h->reset_heap_region_claim_values();
3178   } else {
3179     g1_par_agg_task.work(0);
3180   }
3181 }
3182 
3183 // Clear the per-worker arrays used to store the per-region counting data
3184 void ConcurrentMark::clear_all_count_data() {
3185   // Clear the global card bitmap - it will be filled during
3186   // liveness count aggregation (during remark) and the
3187   // final counting task.
3188   _card_bm.clear();
3189 
3190   // Clear the global region bitmap - it will be filled as part
3191   // of the final counting task.
3192   _region_bm.clear();
3193 
3194   uint max_regions = _g1h->max_regions();
3195   assert(_max_worker_id > 0, "uninitialized");
3196 
3197   for (uint i = 0; i < _max_worker_id; i += 1) {
3198     BitMap* task_card_bm = count_card_bitmap_for(i);
3199     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3200 
3201     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3202     assert(marked_bytes_array != NULL, "uninitialized");
3203 
3204     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3205     task_card_bm->clear();
3206   }
3207 }
3208 
3209 void ConcurrentMark::print_stats() {
3210   if (verbose_stats()) {
3211     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3212     for (size_t i = 0; i < _active_tasks; ++i) {
3213       _tasks[i]->print_stats();
3214       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3215     }
3216   }
3217 }
3218 
3219 // abandon current marking iteration due to a Full GC
3220 void ConcurrentMark::abort() {
3221   // Clear all marks to force marking thread to do nothing
3222   _nextMarkBitMap->clearAll();
3223   // Clear the liveness counting data
3224   clear_all_count_data();
3225   // Empty mark stack
3226   reset_marking_state();
3227   for (uint i = 0; i < _max_worker_id; ++i) {
3228     _tasks[i]->clear_region_fields();
3229   }
3230   _has_aborted = true;
3231 
3232   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3233   satb_mq_set.abandon_partial_marking();
3234   // This can be called either during or outside marking, we'll read
3235   // the expected_active value from the SATB queue set.
3236   satb_mq_set.set_active_all_threads(
3237                                  false, /* new active value */
3238                                  satb_mq_set.is_active() /* expected_active */);
3239 
3240   _g1h->trace_heap_after_concurrent_cycle();
3241   _g1h->register_concurrent_cycle_end();
3242 }
3243 
3244 static void print_ms_time_info(const char* prefix, const char* name,
3245                                NumberSeq& ns) {
3246   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3247                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3248   if (ns.num() > 0) {
3249     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3250                            prefix, ns.sd(), ns.maximum());
3251   }
3252 }
3253 
3254 void ConcurrentMark::print_summary_info() {
3255   gclog_or_tty->print_cr(" Concurrent marking:");
3256   print_ms_time_info("  ", "init marks", _init_times);
3257   print_ms_time_info("  ", "remarks", _remark_times);
3258   {
3259     print_ms_time_info("     ", "final marks", _remark_mark_times);
3260     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3261 
3262   }
3263   print_ms_time_info("  ", "cleanups", _cleanup_times);
3264   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3265                          _total_counting_time,
3266                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3267                           (double)_cleanup_times.num()
3268                          : 0.0));
3269   if (G1ScrubRemSets) {
3270     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3271                            _total_rs_scrub_time,
3272                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3273                             (double)_cleanup_times.num()
3274                            : 0.0));
3275   }
3276   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3277                          (_init_times.sum() + _remark_times.sum() +
3278                           _cleanup_times.sum())/1000.0);
3279   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3280                 "(%8.2f s marking).",
3281                 cmThread()->vtime_accum(),
3282                 cmThread()->vtime_mark_accum());
3283 }
3284 
3285 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3286   if (use_parallel_marking_threads()) {
3287     _parallel_workers->print_worker_threads_on(st);
3288   }
3289 }
3290 
3291 void ConcurrentMark::print_on_error(outputStream* st) const {
3292   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3293       _prevMarkBitMap, _nextMarkBitMap);
3294   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3295   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3296 }
3297 
3298 // We take a break if someone is trying to stop the world.
3299 bool ConcurrentMark::do_yield_check(uint worker_id) {
3300   if (should_yield()) {
3301     if (worker_id == 0) {
3302       _g1h->g1_policy()->record_concurrent_pause();
3303     }
3304     cmThread()->yield();
3305     return true;
3306   } else {
3307     return false;
3308   }
3309 }
3310 
3311 bool ConcurrentMark::should_yield() {
3312   return cmThread()->should_yield();
3313 }
3314 
3315 bool ConcurrentMark::containing_card_is_marked(void* p) {
3316   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3317   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3318 }
3319 
3320 bool ConcurrentMark::containing_cards_are_marked(void* start,
3321                                                  void* last) {
3322   return containing_card_is_marked(start) &&
3323          containing_card_is_marked(last);
3324 }
3325 
3326 #ifndef PRODUCT
3327 // for debugging purposes
3328 void ConcurrentMark::print_finger() {
3329   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3330                          _heap_start, _heap_end, _finger);
3331   for (uint i = 0; i < _max_worker_id; ++i) {
3332     gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
3333   }
3334   gclog_or_tty->print_cr("");
3335 }
3336 #endif
3337 
3338 void CMTask::scan_object(oop obj) {
3339   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3340 
3341   if (_cm->verbose_high()) {
3342     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3343                            _worker_id, (void*) obj);
3344   }
3345 
3346   size_t obj_size = obj->size();
3347   _words_scanned += obj_size;
3348 
3349   obj->oop_iterate(_cm_oop_closure);
3350   statsOnly( ++_objs_scanned );
3351   check_limits();
3352 }
3353 
3354 // Closure for iteration over bitmaps
3355 class CMBitMapClosure : public BitMapClosure {
3356 private:
3357   // the bitmap that is being iterated over
3358   CMBitMap*                   _nextMarkBitMap;
3359   ConcurrentMark*             _cm;
3360   CMTask*                     _task;
3361 
3362 public:
3363   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3364     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3365 
3366   bool do_bit(size_t offset) {
3367     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3368     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3369     assert( addr < _cm->finger(), "invariant");
3370 
3371     statsOnly( _task->increase_objs_found_on_bitmap() );
3372     assert(addr >= _task->finger(), "invariant");
3373 
3374     // We move that task's local finger along.
3375     _task->move_finger_to(addr);
3376 
3377     _task->scan_object(oop(addr));
3378     // we only partially drain the local queue and global stack
3379     _task->drain_local_queue(true);
3380     _task->drain_global_stack(true);
3381 
3382     // if the has_aborted flag has been raised, we need to bail out of
3383     // the iteration
3384     return !_task->has_aborted();
3385   }
3386 };
3387 
3388 // Closure for iterating over objects, currently only used for
3389 // processing SATB buffers.
3390 class CMObjectClosure : public ObjectClosure {
3391 private:
3392   CMTask* _task;
3393 
3394 public:
3395   void do_object(oop obj) {
3396     _task->deal_with_reference(obj);
3397   }
3398 
3399   CMObjectClosure(CMTask* task) : _task(task) { }
3400 };
3401 
3402 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3403                                ConcurrentMark* cm,
3404                                CMTask* task)
3405   : _g1h(g1h), _cm(cm), _task(task) {
3406   assert(_ref_processor == NULL, "should be initialized to NULL");
3407 
3408   if (G1UseConcMarkReferenceProcessing) {
3409     _ref_processor = g1h->ref_processor_cm();
3410     assert(_ref_processor != NULL, "should not be NULL");
3411   }
3412 }
3413 
3414 void CMTask::setup_for_region(HeapRegion* hr) {
3415   // Separated the asserts so that we know which one fires.
3416   assert(hr != NULL,
3417         "claim_region() should have filtered out continues humongous regions");
3418   assert(!hr->continuesHumongous(),
3419         "claim_region() should have filtered out continues humongous regions");
3420 
3421   if (_cm->verbose_low()) {
3422     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3423                            _worker_id, hr);
3424   }
3425 
3426   _curr_region  = hr;
3427   _finger       = hr->bottom();
3428   update_region_limit();
3429 }
3430 
3431 void CMTask::update_region_limit() {
3432   HeapRegion* hr            = _curr_region;
3433   HeapWord* bottom          = hr->bottom();
3434   HeapWord* limit           = hr->next_top_at_mark_start();
3435 
3436   if (limit == bottom) {
3437     if (_cm->verbose_low()) {
3438       gclog_or_tty->print_cr("[%u] found an empty region "
3439                              "["PTR_FORMAT", "PTR_FORMAT")",
3440                              _worker_id, bottom, limit);
3441     }
3442     // The region was collected underneath our feet.
3443     // We set the finger to bottom to ensure that the bitmap
3444     // iteration that will follow this will not do anything.
3445     // (this is not a condition that holds when we set the region up,
3446     // as the region is not supposed to be empty in the first place)
3447     _finger = bottom;
3448   } else if (limit >= _region_limit) {
3449     assert(limit >= _finger, "peace of mind");
3450   } else {
3451     assert(limit < _region_limit, "only way to get here");
3452     // This can happen under some pretty unusual circumstances.  An
3453     // evacuation pause empties the region underneath our feet (NTAMS
3454     // at bottom). We then do some allocation in the region (NTAMS
3455     // stays at bottom), followed by the region being used as a GC
3456     // alloc region (NTAMS will move to top() and the objects
3457     // originally below it will be grayed). All objects now marked in
3458     // the region are explicitly grayed, if below the global finger,
3459     // and we do not need in fact to scan anything else. So, we simply
3460     // set _finger to be limit to ensure that the bitmap iteration
3461     // doesn't do anything.
3462     _finger = limit;
3463   }
3464 
3465   _region_limit = limit;
3466 }
3467 
3468 void CMTask::giveup_current_region() {
3469   assert(_curr_region != NULL, "invariant");
3470   if (_cm->verbose_low()) {
3471     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3472                            _worker_id, _curr_region);
3473   }
3474   clear_region_fields();
3475 }
3476 
3477 void CMTask::clear_region_fields() {
3478   // Values for these three fields that indicate that we're not
3479   // holding on to a region.
3480   _curr_region   = NULL;
3481   _finger        = NULL;
3482   _region_limit  = NULL;
3483 }
3484 
3485 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3486   if (cm_oop_closure == NULL) {
3487     assert(_cm_oop_closure != NULL, "invariant");
3488   } else {
3489     assert(_cm_oop_closure == NULL, "invariant");
3490   }
3491   _cm_oop_closure = cm_oop_closure;
3492 }
3493 
3494 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3495   guarantee(nextMarkBitMap != NULL, "invariant");
3496 
3497   if (_cm->verbose_low()) {
3498     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3499   }
3500 
3501   _nextMarkBitMap                = nextMarkBitMap;
3502   clear_region_fields();
3503 
3504   _calls                         = 0;
3505   _elapsed_time_ms               = 0.0;
3506   _termination_time_ms           = 0.0;
3507   _termination_start_time_ms     = 0.0;
3508 
3509 #if _MARKING_STATS_
3510   _local_pushes                  = 0;
3511   _local_pops                    = 0;
3512   _local_max_size                = 0;
3513   _objs_scanned                  = 0;
3514   _global_pushes                 = 0;
3515   _global_pops                   = 0;
3516   _global_max_size               = 0;
3517   _global_transfers_to           = 0;
3518   _global_transfers_from         = 0;
3519   _regions_claimed               = 0;
3520   _objs_found_on_bitmap          = 0;
3521   _satb_buffers_processed        = 0;
3522   _steal_attempts                = 0;
3523   _steals                        = 0;
3524   _aborted                       = 0;
3525   _aborted_overflow              = 0;
3526   _aborted_cm_aborted            = 0;
3527   _aborted_yield                 = 0;
3528   _aborted_timed_out             = 0;
3529   _aborted_satb                  = 0;
3530   _aborted_termination           = 0;
3531 #endif // _MARKING_STATS_
3532 }
3533 
3534 bool CMTask::should_exit_termination() {
3535   regular_clock_call();
3536   // This is called when we are in the termination protocol. We should
3537   // quit if, for some reason, this task wants to abort or the global
3538   // stack is not empty (this means that we can get work from it).
3539   return !_cm->mark_stack_empty() || has_aborted();
3540 }
3541 
3542 void CMTask::reached_limit() {
3543   assert(_words_scanned >= _words_scanned_limit ||
3544          _refs_reached >= _refs_reached_limit ,
3545          "shouldn't have been called otherwise");
3546   regular_clock_call();
3547 }
3548 
3549 void CMTask::regular_clock_call() {
3550   if (has_aborted()) return;
3551 
3552   // First, we need to recalculate the words scanned and refs reached
3553   // limits for the next clock call.
3554   recalculate_limits();
3555 
3556   // During the regular clock call we do the following
3557 
3558   // (1) If an overflow has been flagged, then we abort.
3559   if (_cm->has_overflown()) {
3560     set_has_aborted();
3561     return;
3562   }
3563 
3564   // If we are not concurrent (i.e. we're doing remark) we don't need
3565   // to check anything else. The other steps are only needed during
3566   // the concurrent marking phase.
3567   if (!concurrent()) return;
3568 
3569   // (2) If marking has been aborted for Full GC, then we also abort.
3570   if (_cm->has_aborted()) {
3571     set_has_aborted();
3572     statsOnly( ++_aborted_cm_aborted );
3573     return;
3574   }
3575 
3576   double curr_time_ms = os::elapsedVTime() * 1000.0;
3577 
3578   // (3) If marking stats are enabled, then we update the step history.
3579 #if _MARKING_STATS_
3580   if (_words_scanned >= _words_scanned_limit) {
3581     ++_clock_due_to_scanning;
3582   }
3583   if (_refs_reached >= _refs_reached_limit) {
3584     ++_clock_due_to_marking;
3585   }
3586 
3587   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3588   _interval_start_time_ms = curr_time_ms;
3589   _all_clock_intervals_ms.add(last_interval_ms);
3590 
3591   if (_cm->verbose_medium()) {
3592       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3593                         "scanned = %d%s, refs reached = %d%s",
3594                         _worker_id, last_interval_ms,
3595                         _words_scanned,
3596                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3597                         _refs_reached,
3598                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3599   }
3600 #endif // _MARKING_STATS_
3601 
3602   // (4) We check whether we should yield. If we have to, then we abort.
3603   if (_cm->should_yield()) {
3604     // We should yield. To do this we abort the task. The caller is
3605     // responsible for yielding.
3606     set_has_aborted();
3607     statsOnly( ++_aborted_yield );
3608     return;
3609   }
3610 
3611   // (5) We check whether we've reached our time quota. If we have,
3612   // then we abort.
3613   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3614   if (elapsed_time_ms > _time_target_ms) {
3615     set_has_aborted();
3616     _has_timed_out = true;
3617     statsOnly( ++_aborted_timed_out );
3618     return;
3619   }
3620 
3621   // (6) Finally, we check whether there are enough completed STAB
3622   // buffers available for processing. If there are, we abort.
3623   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3624   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3625     if (_cm->verbose_low()) {
3626       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3627                              _worker_id);
3628     }
3629     // we do need to process SATB buffers, we'll abort and restart
3630     // the marking task to do so
3631     set_has_aborted();
3632     statsOnly( ++_aborted_satb );
3633     return;
3634   }
3635 }
3636 
3637 void CMTask::recalculate_limits() {
3638   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3639   _words_scanned_limit      = _real_words_scanned_limit;
3640 
3641   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3642   _refs_reached_limit       = _real_refs_reached_limit;
3643 }
3644 
3645 void CMTask::decrease_limits() {
3646   // This is called when we believe that we're going to do an infrequent
3647   // operation which will increase the per byte scanned cost (i.e. move
3648   // entries to/from the global stack). It basically tries to decrease the
3649   // scanning limit so that the clock is called earlier.
3650 
3651   if (_cm->verbose_medium()) {
3652     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3653   }
3654 
3655   _words_scanned_limit = _real_words_scanned_limit -
3656     3 * words_scanned_period / 4;
3657   _refs_reached_limit  = _real_refs_reached_limit -
3658     3 * refs_reached_period / 4;
3659 }
3660 
3661 void CMTask::move_entries_to_global_stack() {
3662   // local array where we'll store the entries that will be popped
3663   // from the local queue
3664   oop buffer[global_stack_transfer_size];
3665 
3666   int n = 0;
3667   oop obj;
3668   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3669     buffer[n] = obj;
3670     ++n;
3671   }
3672 
3673   if (n > 0) {
3674     // we popped at least one entry from the local queue
3675 
3676     statsOnly( ++_global_transfers_to; _local_pops += n );
3677 
3678     if (!_cm->mark_stack_push(buffer, n)) {
3679       if (_cm->verbose_low()) {
3680         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3681                                _worker_id);
3682       }
3683       set_has_aborted();
3684     } else {
3685       // the transfer was successful
3686 
3687       if (_cm->verbose_medium()) {
3688         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3689                                _worker_id, n);
3690       }
3691       statsOnly( int tmp_size = _cm->mark_stack_size();
3692                  if (tmp_size > _global_max_size) {
3693                    _global_max_size = tmp_size;
3694                  }
3695                  _global_pushes += n );
3696     }
3697   }
3698 
3699   // this operation was quite expensive, so decrease the limits
3700   decrease_limits();
3701 }
3702 
3703 void CMTask::get_entries_from_global_stack() {
3704   // local array where we'll store the entries that will be popped
3705   // from the global stack.
3706   oop buffer[global_stack_transfer_size];
3707   int n;
3708   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3709   assert(n <= global_stack_transfer_size,
3710          "we should not pop more than the given limit");
3711   if (n > 0) {
3712     // yes, we did actually pop at least one entry
3713 
3714     statsOnly( ++_global_transfers_from; _global_pops += n );
3715     if (_cm->verbose_medium()) {
3716       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3717                              _worker_id, n);
3718     }
3719     for (int i = 0; i < n; ++i) {
3720       bool success = _task_queue->push(buffer[i]);
3721       // We only call this when the local queue is empty or under a
3722       // given target limit. So, we do not expect this push to fail.
3723       assert(success, "invariant");
3724     }
3725 
3726     statsOnly( int tmp_size = _task_queue->size();
3727                if (tmp_size > _local_max_size) {
3728                  _local_max_size = tmp_size;
3729                }
3730                _local_pushes += n );
3731   }
3732 
3733   // this operation was quite expensive, so decrease the limits
3734   decrease_limits();
3735 }
3736 
3737 void CMTask::drain_local_queue(bool partially) {
3738   if (has_aborted()) return;
3739 
3740   // Decide what the target size is, depending whether we're going to
3741   // drain it partially (so that other tasks can steal if they run out
3742   // of things to do) or totally (at the very end).
3743   size_t target_size;
3744   if (partially) {
3745     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3746   } else {
3747     target_size = 0;
3748   }
3749 
3750   if (_task_queue->size() > target_size) {
3751     if (_cm->verbose_high()) {
3752       gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3753                              _worker_id, target_size);
3754     }
3755 
3756     oop obj;
3757     bool ret = _task_queue->pop_local(obj);
3758     while (ret) {
3759       statsOnly( ++_local_pops );
3760 
3761       if (_cm->verbose_high()) {
3762         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3763                                (void*) obj);
3764       }
3765 
3766       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3767       assert(!_g1h->is_on_master_free_list(
3768                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3769 
3770       scan_object(obj);
3771 
3772       if (_task_queue->size() <= target_size || has_aborted()) {
3773         ret = false;
3774       } else {
3775         ret = _task_queue->pop_local(obj);
3776       }
3777     }
3778 
3779     if (_cm->verbose_high()) {
3780       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3781                              _worker_id, _task_queue->size());
3782     }
3783   }
3784 }
3785 
3786 void CMTask::drain_global_stack(bool partially) {
3787   if (has_aborted()) return;
3788 
3789   // We have a policy to drain the local queue before we attempt to
3790   // drain the global stack.
3791   assert(partially || _task_queue->size() == 0, "invariant");
3792 
3793   // Decide what the target size is, depending whether we're going to
3794   // drain it partially (so that other tasks can steal if they run out
3795   // of things to do) or totally (at the very end).  Notice that,
3796   // because we move entries from the global stack in chunks or
3797   // because another task might be doing the same, we might in fact
3798   // drop below the target. But, this is not a problem.
3799   size_t target_size;
3800   if (partially) {
3801     target_size = _cm->partial_mark_stack_size_target();
3802   } else {
3803     target_size = 0;
3804   }
3805 
3806   if (_cm->mark_stack_size() > target_size) {
3807     if (_cm->verbose_low()) {
3808       gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3809                              _worker_id, target_size);
3810     }
3811 
3812     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3813       get_entries_from_global_stack();
3814       drain_local_queue(partially);
3815     }
3816 
3817     if (_cm->verbose_low()) {
3818       gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3819                              _worker_id, _cm->mark_stack_size());
3820     }
3821   }
3822 }
3823 
3824 // SATB Queue has several assumptions on whether to call the par or
3825 // non-par versions of the methods. this is why some of the code is
3826 // replicated. We should really get rid of the single-threaded version
3827 // of the code to simplify things.
3828 void CMTask::drain_satb_buffers() {
3829   if (has_aborted()) return;
3830 
3831   // We set this so that the regular clock knows that we're in the
3832   // middle of draining buffers and doesn't set the abort flag when it
3833   // notices that SATB buffers are available for draining. It'd be
3834   // very counter productive if it did that. :-)
3835   _draining_satb_buffers = true;
3836 
3837   CMObjectClosure oc(this);
3838   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3839   if (G1CollectedHeap::use_parallel_gc_threads()) {
3840     satb_mq_set.set_par_closure(_worker_id, &oc);
3841   } else {
3842     satb_mq_set.set_closure(&oc);
3843   }
3844 
3845   // This keeps claiming and applying the closure to completed buffers
3846   // until we run out of buffers or we need to abort.
3847   if (G1CollectedHeap::use_parallel_gc_threads()) {
3848     while (!has_aborted() &&
3849            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3850       if (_cm->verbose_medium()) {
3851         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3852       }
3853       statsOnly( ++_satb_buffers_processed );
3854       regular_clock_call();
3855     }
3856   } else {
3857     while (!has_aborted() &&
3858            satb_mq_set.apply_closure_to_completed_buffer()) {
3859       if (_cm->verbose_medium()) {
3860         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3861       }
3862       statsOnly( ++_satb_buffers_processed );
3863       regular_clock_call();
3864     }
3865   }
3866 
3867   if (!concurrent() && !has_aborted()) {
3868     // We should only do this during remark.
3869     if (G1CollectedHeap::use_parallel_gc_threads()) {
3870       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3871     } else {
3872       satb_mq_set.iterate_closure_all_threads();
3873     }
3874   }
3875 
3876   _draining_satb_buffers = false;
3877 
3878   assert(has_aborted() ||
3879          concurrent() ||
3880          satb_mq_set.completed_buffers_num() == 0, "invariant");
3881 
3882   if (G1CollectedHeap::use_parallel_gc_threads()) {
3883     satb_mq_set.set_par_closure(_worker_id, NULL);
3884   } else {
3885     satb_mq_set.set_closure(NULL);
3886   }
3887 
3888   // again, this was a potentially expensive operation, decrease the
3889   // limits to get the regular clock call early
3890   decrease_limits();
3891 }
3892 
3893 void CMTask::print_stats() {
3894   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3895                          _worker_id, _calls);
3896   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3897                          _elapsed_time_ms, _termination_time_ms);
3898   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3899                          _step_times_ms.num(), _step_times_ms.avg(),
3900                          _step_times_ms.sd());
3901   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3902                          _step_times_ms.maximum(), _step_times_ms.sum());
3903 
3904 #if _MARKING_STATS_
3905   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3906                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3907                          _all_clock_intervals_ms.sd());
3908   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3909                          _all_clock_intervals_ms.maximum(),
3910                          _all_clock_intervals_ms.sum());
3911   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3912                          _clock_due_to_scanning, _clock_due_to_marking);
3913   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3914                          _objs_scanned, _objs_found_on_bitmap);
3915   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3916                          _local_pushes, _local_pops, _local_max_size);
3917   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3918                          _global_pushes, _global_pops, _global_max_size);
3919   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3920                          _global_transfers_to,_global_transfers_from);
3921   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3922   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3923   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3924                          _steal_attempts, _steals);
3925   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3926   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3927                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3928   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3929                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3930 #endif // _MARKING_STATS_
3931 }
3932 
3933 /*****************************************************************************
3934 
3935     The do_marking_step(time_target_ms, ...) method is the building
3936     block of the parallel marking framework. It can be called in parallel
3937     with other invocations of do_marking_step() on different tasks
3938     (but only one per task, obviously) and concurrently with the
3939     mutator threads, or during remark, hence it eliminates the need
3940     for two versions of the code. When called during remark, it will
3941     pick up from where the task left off during the concurrent marking
3942     phase. Interestingly, tasks are also claimable during evacuation
3943     pauses too, since do_marking_step() ensures that it aborts before
3944     it needs to yield.
3945 
3946     The data structures that it uses to do marking work are the
3947     following:
3948 
3949       (1) Marking Bitmap. If there are gray objects that appear only
3950       on the bitmap (this happens either when dealing with an overflow
3951       or when the initial marking phase has simply marked the roots
3952       and didn't push them on the stack), then tasks claim heap
3953       regions whose bitmap they then scan to find gray objects. A
3954       global finger indicates where the end of the last claimed region
3955       is. A local finger indicates how far into the region a task has
3956       scanned. The two fingers are used to determine how to gray an
3957       object (i.e. whether simply marking it is OK, as it will be
3958       visited by a task in the future, or whether it needs to be also
3959       pushed on a stack).
3960 
3961       (2) Local Queue. The local queue of the task which is accessed
3962       reasonably efficiently by the task. Other tasks can steal from
3963       it when they run out of work. Throughout the marking phase, a
3964       task attempts to keep its local queue short but not totally
3965       empty, so that entries are available for stealing by other
3966       tasks. Only when there is no more work, a task will totally
3967       drain its local queue.
3968 
3969       (3) Global Mark Stack. This handles local queue overflow. During
3970       marking only sets of entries are moved between it and the local
3971       queues, as access to it requires a mutex and more fine-grain
3972       interaction with it which might cause contention. If it
3973       overflows, then the marking phase should restart and iterate
3974       over the bitmap to identify gray objects. Throughout the marking
3975       phase, tasks attempt to keep the global mark stack at a small
3976       length but not totally empty, so that entries are available for
3977       popping by other tasks. Only when there is no more work, tasks
3978       will totally drain the global mark stack.
3979 
3980       (4) SATB Buffer Queue. This is where completed SATB buffers are
3981       made available. Buffers are regularly removed from this queue
3982       and scanned for roots, so that the queue doesn't get too
3983       long. During remark, all completed buffers are processed, as
3984       well as the filled in parts of any uncompleted buffers.
3985 
3986     The do_marking_step() method tries to abort when the time target
3987     has been reached. There are a few other cases when the
3988     do_marking_step() method also aborts:
3989 
3990       (1) When the marking phase has been aborted (after a Full GC).
3991 
3992       (2) When a global overflow (on the global stack) has been
3993       triggered. Before the task aborts, it will actually sync up with
3994       the other tasks to ensure that all the marking data structures
3995       (local queues, stacks, fingers etc.)  are re-initialized so that
3996       when do_marking_step() completes, the marking phase can
3997       immediately restart.
3998 
3999       (3) When enough completed SATB buffers are available. The
4000       do_marking_step() method only tries to drain SATB buffers right
4001       at the beginning. So, if enough buffers are available, the
4002       marking step aborts and the SATB buffers are processed at
4003       the beginning of the next invocation.
4004 
4005       (4) To yield. when we have to yield then we abort and yield
4006       right at the end of do_marking_step(). This saves us from a lot
4007       of hassle as, by yielding we might allow a Full GC. If this
4008       happens then objects will be compacted underneath our feet, the
4009       heap might shrink, etc. We save checking for this by just
4010       aborting and doing the yield right at the end.
4011 
4012     From the above it follows that the do_marking_step() method should
4013     be called in a loop (or, otherwise, regularly) until it completes.
4014 
4015     If a marking step completes without its has_aborted() flag being
4016     true, it means it has completed the current marking phase (and
4017     also all other marking tasks have done so and have all synced up).
4018 
4019     A method called regular_clock_call() is invoked "regularly" (in
4020     sub ms intervals) throughout marking. It is this clock method that
4021     checks all the abort conditions which were mentioned above and
4022     decides when the task should abort. A work-based scheme is used to
4023     trigger this clock method: when the number of object words the
4024     marking phase has scanned or the number of references the marking
4025     phase has visited reach a given limit. Additional invocations to
4026     the method clock have been planted in a few other strategic places
4027     too. The initial reason for the clock method was to avoid calling
4028     vtime too regularly, as it is quite expensive. So, once it was in
4029     place, it was natural to piggy-back all the other conditions on it
4030     too and not constantly check them throughout the code.
4031 
4032     If do_termination is true then do_marking_step will enter its
4033     termination protocol.
4034 
4035     The value of is_serial must be true when do_marking_step is being
4036     called serially (i.e. by the VMThread) and do_marking_step should
4037     skip any synchronization in the termination and overflow code.
4038     Examples include the serial remark code and the serial reference
4039     processing closures.
4040 
4041     The value of is_serial must be false when do_marking_step is
4042     being called by any of the worker threads in a work gang.
4043     Examples include the concurrent marking code (CMMarkingTask),
4044     the MT remark code, and the MT reference processing closures.
4045 
4046  *****************************************************************************/
4047 
4048 void CMTask::do_marking_step(double time_target_ms,
4049                              bool do_termination,
4050                              bool is_serial) {
4051   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4052   assert(concurrent() == _cm->concurrent(), "they should be the same");
4053 
4054   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4055   assert(_task_queues != NULL, "invariant");
4056   assert(_task_queue != NULL, "invariant");
4057   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4058 
4059   assert(!_claimed,
4060          "only one thread should claim this task at any one time");
4061 
4062   // OK, this doesn't safeguard again all possible scenarios, as it is
4063   // possible for two threads to set the _claimed flag at the same
4064   // time. But it is only for debugging purposes anyway and it will
4065   // catch most problems.
4066   _claimed = true;
4067 
4068   _start_time_ms = os::elapsedVTime() * 1000.0;
4069   statsOnly( _interval_start_time_ms = _start_time_ms );
4070 
4071   // If do_stealing is true then do_marking_step will attempt to
4072   // steal work from the other CMTasks. It only makes sense to
4073   // enable stealing when the termination protocol is enabled
4074   // and do_marking_step() is not being called serially.
4075   bool do_stealing = do_termination && !is_serial;
4076 
4077   double diff_prediction_ms =
4078     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4079   _time_target_ms = time_target_ms - diff_prediction_ms;
4080 
4081   // set up the variables that are used in the work-based scheme to
4082   // call the regular clock method
4083   _words_scanned = 0;
4084   _refs_reached  = 0;
4085   recalculate_limits();
4086 
4087   // clear all flags
4088   clear_has_aborted();
4089   _has_timed_out = false;
4090   _draining_satb_buffers = false;
4091 
4092   ++_calls;
4093 
4094   if (_cm->verbose_low()) {
4095     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4096                            "target = %1.2lfms >>>>>>>>>>",
4097                            _worker_id, _calls, _time_target_ms);
4098   }
4099 
4100   // Set up the bitmap and oop closures. Anything that uses them is
4101   // eventually called from this method, so it is OK to allocate these
4102   // statically.
4103   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4104   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4105   set_cm_oop_closure(&cm_oop_closure);
4106 
4107   if (_cm->has_overflown()) {
4108     // This can happen if the mark stack overflows during a GC pause
4109     // and this task, after a yield point, restarts. We have to abort
4110     // as we need to get into the overflow protocol which happens
4111     // right at the end of this task.
4112     set_has_aborted();
4113   }
4114 
4115   // First drain any available SATB buffers. After this, we will not
4116   // look at SATB buffers before the next invocation of this method.
4117   // If enough completed SATB buffers are queued up, the regular clock
4118   // will abort this task so that it restarts.
4119   drain_satb_buffers();
4120   // ...then partially drain the local queue and the global stack
4121   drain_local_queue(true);
4122   drain_global_stack(true);
4123 
4124   do {
4125     if (!has_aborted() && _curr_region != NULL) {
4126       // This means that we're already holding on to a region.
4127       assert(_finger != NULL, "if region is not NULL, then the finger "
4128              "should not be NULL either");
4129 
4130       // We might have restarted this task after an evacuation pause
4131       // which might have evacuated the region we're holding on to
4132       // underneath our feet. Let's read its limit again to make sure
4133       // that we do not iterate over a region of the heap that
4134       // contains garbage (update_region_limit() will also move
4135       // _finger to the start of the region if it is found empty).
4136       update_region_limit();
4137       // We will start from _finger not from the start of the region,
4138       // as we might be restarting this task after aborting half-way
4139       // through scanning this region. In this case, _finger points to
4140       // the address where we last found a marked object. If this is a
4141       // fresh region, _finger points to start().
4142       MemRegion mr = MemRegion(_finger, _region_limit);
4143 
4144       if (_cm->verbose_low()) {
4145         gclog_or_tty->print_cr("[%u] we're scanning part "
4146                                "["PTR_FORMAT", "PTR_FORMAT") "
4147                                "of region "HR_FORMAT,
4148                                _worker_id, _finger, _region_limit,
4149                                HR_FORMAT_PARAMS(_curr_region));
4150       }
4151 
4152       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4153              "humongous regions should go around loop once only");
4154 
4155       // Some special cases:
4156       // If the memory region is empty, we can just give up the region.
4157       // If the current region is humongous then we only need to check
4158       // the bitmap for the bit associated with the start of the object,
4159       // scan the object if it's live, and give up the region.
4160       // Otherwise, let's iterate over the bitmap of the part of the region
4161       // that is left.
4162       // If the iteration is successful, give up the region.
4163       if (mr.is_empty()) {
4164         giveup_current_region();
4165         regular_clock_call();
4166       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4167         if (_nextMarkBitMap->isMarked(mr.start())) {
4168           // The object is marked - apply the closure
4169           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4170           bitmap_closure.do_bit(offset);
4171         }
4172         // Even if this task aborted while scanning the humongous object
4173         // we can (and should) give up the current region.
4174         giveup_current_region();
4175         regular_clock_call();
4176       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4177         giveup_current_region();
4178         regular_clock_call();
4179       } else {
4180         assert(has_aborted(), "currently the only way to do so");
4181         // The only way to abort the bitmap iteration is to return
4182         // false from the do_bit() method. However, inside the
4183         // do_bit() method we move the _finger to point to the
4184         // object currently being looked at. So, if we bail out, we
4185         // have definitely set _finger to something non-null.
4186         assert(_finger != NULL, "invariant");
4187 
4188         // Region iteration was actually aborted. So now _finger
4189         // points to the address of the object we last scanned. If we
4190         // leave it there, when we restart this task, we will rescan
4191         // the object. It is easy to avoid this. We move the finger by
4192         // enough to point to the next possible object header (the
4193         // bitmap knows by how much we need to move it as it knows its
4194         // granularity).
4195         assert(_finger < _region_limit, "invariant");
4196         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4197         // Check if bitmap iteration was aborted while scanning the last object
4198         if (new_finger >= _region_limit) {
4199           giveup_current_region();
4200         } else {
4201           move_finger_to(new_finger);
4202         }
4203       }
4204     }
4205     // At this point we have either completed iterating over the
4206     // region we were holding on to, or we have aborted.
4207 
4208     // We then partially drain the local queue and the global stack.
4209     // (Do we really need this?)
4210     drain_local_queue(true);
4211     drain_global_stack(true);
4212 
4213     // Read the note on the claim_region() method on why it might
4214     // return NULL with potentially more regions available for
4215     // claiming and why we have to check out_of_regions() to determine
4216     // whether we're done or not.
4217     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4218       // We are going to try to claim a new region. We should have
4219       // given up on the previous one.
4220       // Separated the asserts so that we know which one fires.
4221       assert(_curr_region  == NULL, "invariant");
4222       assert(_finger       == NULL, "invariant");
4223       assert(_region_limit == NULL, "invariant");
4224       if (_cm->verbose_low()) {
4225         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4226       }
4227       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4228       if (claimed_region != NULL) {
4229         // Yes, we managed to claim one
4230         statsOnly( ++_regions_claimed );
4231 
4232         if (_cm->verbose_low()) {
4233           gclog_or_tty->print_cr("[%u] we successfully claimed "
4234                                  "region "PTR_FORMAT,
4235                                  _worker_id, claimed_region);
4236         }
4237 
4238         setup_for_region(claimed_region);
4239         assert(_curr_region == claimed_region, "invariant");
4240       }
4241       // It is important to call the regular clock here. It might take
4242       // a while to claim a region if, for example, we hit a large
4243       // block of empty regions. So we need to call the regular clock
4244       // method once round the loop to make sure it's called
4245       // frequently enough.
4246       regular_clock_call();
4247     }
4248 
4249     if (!has_aborted() && _curr_region == NULL) {
4250       assert(_cm->out_of_regions(),
4251              "at this point we should be out of regions");
4252     }
4253   } while ( _curr_region != NULL && !has_aborted());
4254 
4255   if (!has_aborted()) {
4256     // We cannot check whether the global stack is empty, since other
4257     // tasks might be pushing objects to it concurrently.
4258     assert(_cm->out_of_regions(),
4259            "at this point we should be out of regions");
4260 
4261     if (_cm->verbose_low()) {
4262       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4263     }
4264 
4265     // Try to reduce the number of available SATB buffers so that
4266     // remark has less work to do.
4267     drain_satb_buffers();
4268   }
4269 
4270   // Since we've done everything else, we can now totally drain the
4271   // local queue and global stack.
4272   drain_local_queue(false);
4273   drain_global_stack(false);
4274 
4275   // Attempt at work stealing from other task's queues.
4276   if (do_stealing && !has_aborted()) {
4277     // We have not aborted. This means that we have finished all that
4278     // we could. Let's try to do some stealing...
4279 
4280     // We cannot check whether the global stack is empty, since other
4281     // tasks might be pushing objects to it concurrently.
4282     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4283            "only way to reach here");
4284 
4285     if (_cm->verbose_low()) {
4286       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4287     }
4288 
4289     while (!has_aborted()) {
4290       oop obj;
4291       statsOnly( ++_steal_attempts );
4292 
4293       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4294         if (_cm->verbose_medium()) {
4295           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4296                                  _worker_id, (void*) obj);
4297         }
4298 
4299         statsOnly( ++_steals );
4300 
4301         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4302                "any stolen object should be marked");
4303         scan_object(obj);
4304 
4305         // And since we're towards the end, let's totally drain the
4306         // local queue and global stack.
4307         drain_local_queue(false);
4308         drain_global_stack(false);
4309       } else {
4310         break;
4311       }
4312     }
4313   }
4314 
4315   // If we are about to wrap up and go into termination, check if we
4316   // should raise the overflow flag.
4317   if (do_termination && !has_aborted()) {
4318     if (_cm->force_overflow()->should_force()) {
4319       _cm->set_has_overflown();
4320       regular_clock_call();
4321     }
4322   }
4323 
4324   // We still haven't aborted. Now, let's try to get into the
4325   // termination protocol.
4326   if (do_termination && !has_aborted()) {
4327     // We cannot check whether the global stack is empty, since other
4328     // tasks might be concurrently pushing objects on it.
4329     // Separated the asserts so that we know which one fires.
4330     assert(_cm->out_of_regions(), "only way to reach here");
4331     assert(_task_queue->size() == 0, "only way to reach here");
4332 
4333     if (_cm->verbose_low()) {
4334       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4335     }
4336 
4337     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4338 
4339     // The CMTask class also extends the TerminatorTerminator class,
4340     // hence its should_exit_termination() method will also decide
4341     // whether to exit the termination protocol or not.
4342     bool finished = (is_serial ||
4343                      _cm->terminator()->offer_termination(this));
4344     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4345     _termination_time_ms +=
4346       termination_end_time_ms - _termination_start_time_ms;
4347 
4348     if (finished) {
4349       // We're all done.
4350 
4351       if (_worker_id == 0) {
4352         // let's allow task 0 to do this
4353         if (concurrent()) {
4354           assert(_cm->concurrent_marking_in_progress(), "invariant");
4355           // we need to set this to false before the next
4356           // safepoint. This way we ensure that the marking phase
4357           // doesn't observe any more heap expansions.
4358           _cm->clear_concurrent_marking_in_progress();
4359         }
4360       }
4361 
4362       // We can now guarantee that the global stack is empty, since
4363       // all other tasks have finished. We separated the guarantees so
4364       // that, if a condition is false, we can immediately find out
4365       // which one.
4366       guarantee(_cm->out_of_regions(), "only way to reach here");
4367       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4368       guarantee(_task_queue->size() == 0, "only way to reach here");
4369       guarantee(!_cm->has_overflown(), "only way to reach here");
4370       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4371 
4372       if (_cm->verbose_low()) {
4373         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4374       }
4375     } else {
4376       // Apparently there's more work to do. Let's abort this task. It
4377       // will restart it and we can hopefully find more things to do.
4378 
4379       if (_cm->verbose_low()) {
4380         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4381                                _worker_id);
4382       }
4383 
4384       set_has_aborted();
4385       statsOnly( ++_aborted_termination );
4386     }
4387   }
4388 
4389   // Mainly for debugging purposes to make sure that a pointer to the
4390   // closure which was statically allocated in this frame doesn't
4391   // escape it by accident.
4392   set_cm_oop_closure(NULL);
4393   double end_time_ms = os::elapsedVTime() * 1000.0;
4394   double elapsed_time_ms = end_time_ms - _start_time_ms;
4395   // Update the step history.
4396   _step_times_ms.add(elapsed_time_ms);
4397 
4398   if (has_aborted()) {
4399     // The task was aborted for some reason.
4400 
4401     statsOnly( ++_aborted );
4402 
4403     if (_has_timed_out) {
4404       double diff_ms = elapsed_time_ms - _time_target_ms;
4405       // Keep statistics of how well we did with respect to hitting
4406       // our target only if we actually timed out (if we aborted for
4407       // other reasons, then the results might get skewed).
4408       _marking_step_diffs_ms.add(diff_ms);
4409     }
4410 
4411     if (_cm->has_overflown()) {
4412       // This is the interesting one. We aborted because a global
4413       // overflow was raised. This means we have to restart the
4414       // marking phase and start iterating over regions. However, in
4415       // order to do this we have to make sure that all tasks stop
4416       // what they are doing and re-initialise in a safe manner. We
4417       // will achieve this with the use of two barrier sync points.
4418 
4419       if (_cm->verbose_low()) {
4420         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4421       }
4422 
4423       if (!is_serial) {
4424         // We only need to enter the sync barrier if being called
4425         // from a parallel context
4426         _cm->enter_first_sync_barrier(_worker_id);
4427 
4428         // When we exit this sync barrier we know that all tasks have
4429         // stopped doing marking work. So, it's now safe to
4430         // re-initialise our data structures. At the end of this method,
4431         // task 0 will clear the global data structures.
4432       }
4433 
4434       statsOnly( ++_aborted_overflow );
4435 
4436       // We clear the local state of this task...
4437       clear_region_fields();
4438 
4439       if (!is_serial) {
4440         // ...and enter the second barrier.
4441         _cm->enter_second_sync_barrier(_worker_id);
4442       }
4443       // At this point, if we're during the concurrent phase of
4444       // marking, everything has been re-initialized and we're
4445       // ready to restart.
4446     }
4447 
4448     if (_cm->verbose_low()) {
4449       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4450                              "elapsed = %1.2lfms <<<<<<<<<<",
4451                              _worker_id, _time_target_ms, elapsed_time_ms);
4452       if (_cm->has_aborted()) {
4453         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4454                                _worker_id);
4455       }
4456     }
4457   } else {
4458     if (_cm->verbose_low()) {
4459       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4460                              "elapsed = %1.2lfms <<<<<<<<<<",
4461                              _worker_id, _time_target_ms, elapsed_time_ms);
4462     }
4463   }
4464 
4465   _claimed = false;
4466 }
4467 
4468 CMTask::CMTask(uint worker_id,
4469                ConcurrentMark* cm,
4470                size_t* marked_bytes,
4471                BitMap* card_bm,
4472                CMTaskQueue* task_queue,
4473                CMTaskQueueSet* task_queues)
4474   : _g1h(G1CollectedHeap::heap()),
4475     _worker_id(worker_id), _cm(cm),
4476     _claimed(false),
4477     _nextMarkBitMap(NULL), _hash_seed(17),
4478     _task_queue(task_queue),
4479     _task_queues(task_queues),
4480     _cm_oop_closure(NULL),
4481     _marked_bytes_array(marked_bytes),
4482     _card_bm(card_bm) {
4483   guarantee(task_queue != NULL, "invariant");
4484   guarantee(task_queues != NULL, "invariant");
4485 
4486   statsOnly( _clock_due_to_scanning = 0;
4487              _clock_due_to_marking  = 0 );
4488 
4489   _marking_step_diffs_ms.add(0.5);
4490 }
4491 
4492 // These are formatting macros that are used below to ensure
4493 // consistent formatting. The *_H_* versions are used to format the
4494 // header for a particular value and they should be kept consistent
4495 // with the corresponding macro. Also note that most of the macros add
4496 // the necessary white space (as a prefix) which makes them a bit
4497 // easier to compose.
4498 
4499 // All the output lines are prefixed with this string to be able to
4500 // identify them easily in a large log file.
4501 #define G1PPRL_LINE_PREFIX            "###"
4502 
4503 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4504 #ifdef _LP64
4505 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4506 #else // _LP64
4507 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4508 #endif // _LP64
4509 
4510 // For per-region info
4511 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4512 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4513 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4514 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4515 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4516 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4517 
4518 // For summary info
4519 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4520 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4521 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4522 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4523 
4524 G1PrintRegionLivenessInfoClosure::
4525 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4526   : _out(out),
4527     _total_used_bytes(0), _total_capacity_bytes(0),
4528     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4529     _hum_used_bytes(0), _hum_capacity_bytes(0),
4530     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4531     _total_remset_bytes(0) {
4532   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4533   MemRegion g1_committed = g1h->g1_committed();
4534   MemRegion g1_reserved = g1h->g1_reserved();
4535   double now = os::elapsedTime();
4536 
4537   // Print the header of the output.
4538   _out->cr();
4539   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4540   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4541                  G1PPRL_SUM_ADDR_FORMAT("committed")
4542                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4543                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4544                  g1_committed.start(), g1_committed.end(),
4545                  g1_reserved.start(), g1_reserved.end(),
4546                  HeapRegion::GrainBytes);
4547   _out->print_cr(G1PPRL_LINE_PREFIX);
4548   _out->print_cr(G1PPRL_LINE_PREFIX
4549                 G1PPRL_TYPE_H_FORMAT
4550                 G1PPRL_ADDR_BASE_H_FORMAT
4551                 G1PPRL_BYTE_H_FORMAT
4552                 G1PPRL_BYTE_H_FORMAT
4553                 G1PPRL_BYTE_H_FORMAT
4554                 G1PPRL_DOUBLE_H_FORMAT
4555                 G1PPRL_BYTE_H_FORMAT,
4556                 "type", "address-range",
4557                 "used", "prev-live", "next-live", "gc-eff", "remset");
4558   _out->print_cr(G1PPRL_LINE_PREFIX
4559                 G1PPRL_TYPE_H_FORMAT
4560                 G1PPRL_ADDR_BASE_H_FORMAT
4561                 G1PPRL_BYTE_H_FORMAT
4562                 G1PPRL_BYTE_H_FORMAT
4563                 G1PPRL_BYTE_H_FORMAT
4564                 G1PPRL_DOUBLE_H_FORMAT
4565                 G1PPRL_BYTE_H_FORMAT,
4566                 "", "",
4567                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", "(bytes)");
4568 }
4569 
4570 // It takes as a parameter a reference to one of the _hum_* fields, it
4571 // deduces the corresponding value for a region in a humongous region
4572 // series (either the region size, or what's left if the _hum_* field
4573 // is < the region size), and updates the _hum_* field accordingly.
4574 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4575   size_t bytes = 0;
4576   // The > 0 check is to deal with the prev and next live bytes which
4577   // could be 0.
4578   if (*hum_bytes > 0) {
4579     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4580     *hum_bytes -= bytes;
4581   }
4582   return bytes;
4583 }
4584 
4585 // It deduces the values for a region in a humongous region series
4586 // from the _hum_* fields and updates those accordingly. It assumes
4587 // that that _hum_* fields have already been set up from the "starts
4588 // humongous" region and we visit the regions in address order.
4589 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4590                                                      size_t* capacity_bytes,
4591                                                      size_t* prev_live_bytes,
4592                                                      size_t* next_live_bytes) {
4593   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4594   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4595   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4596   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4597   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4598 }
4599 
4600 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4601   const char* type = "";
4602   HeapWord* bottom       = r->bottom();
4603   HeapWord* end          = r->end();
4604   size_t capacity_bytes  = r->capacity();
4605   size_t used_bytes      = r->used();
4606   size_t prev_live_bytes = r->live_bytes();
4607   size_t next_live_bytes = r->next_live_bytes();
4608   double gc_eff          = r->gc_efficiency();
4609   size_t remset_bytes    = r->rem_set()->mem_size();
4610   if (r->used() == 0) {
4611     type = "FREE";
4612   } else if (r->is_survivor()) {
4613     type = "SURV";
4614   } else if (r->is_young()) {
4615     type = "EDEN";
4616   } else if (r->startsHumongous()) {
4617     type = "HUMS";
4618 
4619     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4620            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4621            "they should have been zeroed after the last time we used them");
4622     // Set up the _hum_* fields.
4623     _hum_capacity_bytes  = capacity_bytes;
4624     _hum_used_bytes      = used_bytes;
4625     _hum_prev_live_bytes = prev_live_bytes;
4626     _hum_next_live_bytes = next_live_bytes;
4627     get_hum_bytes(&used_bytes, &capacity_bytes,
4628                   &prev_live_bytes, &next_live_bytes);
4629     end = bottom + HeapRegion::GrainWords;
4630   } else if (r->continuesHumongous()) {
4631     type = "HUMC";
4632     get_hum_bytes(&used_bytes, &capacity_bytes,
4633                   &prev_live_bytes, &next_live_bytes);
4634     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4635   } else {
4636     type = "OLD";
4637   }
4638 
4639   _total_used_bytes      += used_bytes;
4640   _total_capacity_bytes  += capacity_bytes;
4641   _total_prev_live_bytes += prev_live_bytes;
4642   _total_next_live_bytes += next_live_bytes;
4643   _total_remset_bytes    += remset_bytes;
4644 
4645   // Print a line for this particular region.
4646   _out->print_cr(G1PPRL_LINE_PREFIX
4647                  G1PPRL_TYPE_FORMAT
4648                  G1PPRL_ADDR_BASE_FORMAT
4649                  G1PPRL_BYTE_FORMAT
4650                  G1PPRL_BYTE_FORMAT
4651                  G1PPRL_BYTE_FORMAT
4652                  G1PPRL_DOUBLE_FORMAT
4653                  G1PPRL_BYTE_FORMAT,
4654                  type, bottom, end,
4655                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff , remset_bytes);
4656 
4657   return false;
4658 }
4659 
4660 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4661   // add static memory usages to remembered set sizes
4662   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4663   // Print the footer of the output.
4664   _out->print_cr(G1PPRL_LINE_PREFIX);
4665   _out->print_cr(G1PPRL_LINE_PREFIX
4666                  " SUMMARY"
4667                  G1PPRL_SUM_MB_FORMAT("capacity")
4668                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4669                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4670                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4671                  G1PPRL_SUM_MB_FORMAT("remset"),
4672                  bytes_to_mb(_total_capacity_bytes),
4673                  bytes_to_mb(_total_used_bytes),
4674                  perc(_total_used_bytes, _total_capacity_bytes),
4675                  bytes_to_mb(_total_prev_live_bytes),
4676                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4677                  bytes_to_mb(_total_next_live_bytes),
4678                  perc(_total_next_live_bytes, _total_capacity_bytes),
4679                  bytes_to_mb(_total_remset_bytes));
4680   _out->cr();
4681 }